diff --git a/libs/langchain/langchain/smith/evaluation/string_run_evaluator.py b/libs/langchain/langchain/smith/evaluation/string_run_evaluator.py index 0319433808..d8133b19d8 100644 --- a/libs/langchain/langchain/smith/evaluation/string_run_evaluator.py +++ b/libs/langchain/langchain/smith/evaluation/string_run_evaluator.py @@ -313,17 +313,30 @@ class StringRunEvaluatorChain(Chain, RunEvaluator): self, run: Run, example: Optional[Example] = None ) -> EvaluationResult: """Evaluate an example.""" - result = self({"run": run, "example": example}, include_run_info=True) - return self._prepare_evaluator_output(result) + try: + result = self({"run": run, "example": example}, include_run_info=True) + return self._prepare_evaluator_output(result) + except Exception as e: + return EvaluationResult( + key=self.string_evaluator.evaluation_name, + comment=f"Error evaluating run {run.id}: {e}", + # TODO: Add run ID once we can declare it via callbacks + ) async def aevaluate_run( self, run: Run, example: Optional[Example] = None ) -> EvaluationResult: """Evaluate an example.""" - result = await self.acall( - {"run": run, "example": example}, include_run_info=True - ) - return self._prepare_evaluator_output(result) + try: + result = await self.acall( + {"run": run, "example": example}, include_run_info=True + ) + return self._prepare_evaluator_output(result) + except Exception as e: + return EvaluationResult( + key=self.string_evaluator.evaluation_name, + comment=f"Error evaluating run {run.id}: {e}", + ) @classmethod def from_run_and_data_type( diff --git a/libs/langchain/tests/unit_tests/smith/evaluation/test_string_run_evaluator.py b/libs/langchain/tests/unit_tests/smith/evaluation/test_string_run_evaluator.py new file mode 100644 index 0000000000..7f9b103d33 --- /dev/null +++ b/libs/langchain/tests/unit_tests/smith/evaluation/test_string_run_evaluator.py @@ -0,0 +1,27 @@ +"""Tests for the string run evaluator.""" + +from unittest.mock import MagicMock + +from langchain.evaluation import criteria +from langchain.smith.evaluation.string_run_evaluator import ( + ChainStringRunMapper, + StringRunEvaluatorChain, +) +from tests.unit_tests.llms import fake_llm + + +def test_evaluate_run() -> None: + run_mapper = ChainStringRunMapper() + example_mapper = MagicMock() + string_evaluator = criteria.CriteriaEvalChain.from_llm(fake_llm.FakeLLM()) + evaluator = StringRunEvaluatorChain( + run_mapper=run_mapper, + example_mapper=example_mapper, + name="test_evaluator", + string_evaluator=string_evaluator, + ) + run = MagicMock() + example = MagicMock() + res = evaluator.evaluate_run(run, example) + assert res.comment.startswith("Error evaluating run ") + assert res.key == string_evaluator.evaluation_name