Skip to content
Snippets Groups Projects
Unverified Commit 54d8a808 authored by Adam Schiller's avatar Adam Schiller Committed by GitHub
Browse files

fix bug in BatchEvalRunner for multi-evaluator eval_kwargs_lists (#12418)

parent f6d81279
Branches
Tags
No related merge requests found
......@@ -206,15 +206,20 @@ class BatchEvalRunner:
)
eval_kwargs_lists = self._validate_nested_eval_kwargs_types(eval_kwargs_lists)
# boolean to check if using multi kwarg evaluator
multi_kwargs = isinstance(next(iter(eval_kwargs_lists.values())), dict)
# run evaluations
eval_jobs = []
for idx, query in enumerate(cast(List[str], queries)):
response_str = cast(List, response_strs)[idx]
contexts = cast(List, contexts_list)[idx]
for name, evaluator in self.evaluators.items():
if name in eval_kwargs_lists:
# multi-evaluator
kwargs = eval_kwargs_lists[name]
if multi_kwargs:
# multi-evaluator - get appropriate runtime kwargs if present
kwargs = (
eval_kwargs_lists[name] if name in eval_kwargs_lists else {}
)
else:
# single evaluator (maintain backwards compatibility)
kwargs = eval_kwargs_lists
......@@ -259,14 +264,19 @@ class BatchEvalRunner:
queries, responses = self._validate_and_clean_inputs(queries, responses)
eval_kwargs_lists = self._validate_nested_eval_kwargs_types(eval_kwargs_lists)
# boolean to check if using multi kwarg evaluator
multi_kwargs = isinstance(next(iter(eval_kwargs_lists.values())), dict)
# run evaluations
eval_jobs = []
for idx, query in enumerate(cast(List[str], queries)):
response = cast(List, responses)[idx]
for name, evaluator in self.evaluators.items():
if name in eval_kwargs_lists:
# multi-evaluator
kwargs = eval_kwargs_lists[name]
if multi_kwargs:
# multi-evaluator - get appropriate runtime kwargs if present
kwargs = (
eval_kwargs_lists[name] if name in eval_kwargs_lists else {}
)
else:
# single evaluator (maintain backwards compatibility)
kwargs = eval_kwargs_lists
......@@ -385,7 +395,8 @@ class BatchEvalRunner:
app_name: str,
results: Dict[str, List[EvaluationResult]],
) -> None:
"""Upload the evaluation results to LlamaCloud.
"""
Upload the evaluation results to LlamaCloud.
Args:
project_name (str): The name of the project.
......
......@@ -59,7 +59,12 @@ def get_eval_results(key, eval_results):
@pytest.mark.asyncio()
def test_batch_runner() -> None:
# single evaluator
runner = BatchEvalRunner(evaluators={"evaluator1": MockEvaluator()})
runner = BatchEvalRunner(
evaluators={
"evaluator1": MockEvaluator(),
"no_kwarg_evaluator": MockEvaluator(),
}
)
exp_queries = ["query1", "query2"]
exp_response_strs = ["response1", "response2"]
......@@ -86,6 +91,7 @@ def test_batch_runner() -> None:
runner.evaluators = {
"evaluator1": MockEvaluator(),
"evaluator2": MockEvaluator(),
"no_kwarg_evaluator": MockEvaluator(),
}
exp_queries = ["query1", "query2"]
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment