Add single run eval loader (#7390)

Plus 
- add evaluation name to make string and embedding validators work with
the run evaluator loader.
- Rm unused root validator
pull/7392/head
William FH 1 year ago committed by GitHub
parent 633b673b85
commit 4ba7396f96
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -3,7 +3,7 @@ from enum import Enum
from typing import Any, Dict, List, Optional from typing import Any, Dict, List, Optional
import numpy as np import numpy as np
from pydantic import Field, root_validator from pydantic import Field
from langchain.callbacks.manager import ( from langchain.callbacks.manager import (
AsyncCallbackManagerForChainRun, AsyncCallbackManagerForChainRun,
@ -61,19 +61,6 @@ class _EmbeddingDistanceChainMixin(Chain):
""" """
return ["score"] return ["score"]
@root_validator
def _validate_distance_metric(cls, values: dict) -> dict:
"""Validate the distance metric.
Args:
values (dict): The values to validate.
Returns:
dict: The validated values.
"""
values["distance_metric"] = values["distance_metric"].lower()
return values
def _get_metric(self, metric: EmbeddingDistance) -> Any: def _get_metric(self, metric: EmbeddingDistance) -> Any:
"""Get the metric function for the given metric name. """Get the metric function for the given metric name.
@ -194,6 +181,10 @@ class EmbeddingDistanceEvalChain(_EmbeddingDistanceChainMixin, StringEvaluator):
""" """
return True return True
@property
def evaluation_name(self) -> str:
return f"embedding_{self.distance_metric.value}_distance"
@property @property
def input_keys(self) -> List[str]: def input_keys(self) -> List[str]:
"""Return the input keys of the chain. """Return the input keys of the chain.
@ -219,9 +210,7 @@ class EmbeddingDistanceEvalChain(_EmbeddingDistanceChainMixin, StringEvaluator):
Dict[str, Any]: The computed score. Dict[str, Any]: The computed score.
""" """
vectors = np.array( vectors = np.array(
self.embeddings.embed_documents( self.embeddings.embed_documents([inputs["prediction"], inputs["reference"]])
[inputs["prediction"], inputs["prediction_b"]]
)
) )
score = self._compute_score(vectors) score = self._compute_score(vectors)
return {"score": score} return {"score": score}
@ -242,7 +231,7 @@ class EmbeddingDistanceEvalChain(_EmbeddingDistanceChainMixin, StringEvaluator):
Dict[str, Any]: The computed score. Dict[str, Any]: The computed score.
""" """
embedded = await self.embeddings.aembed_documents( embedded = await self.embeddings.aembed_documents(
[inputs["prediction"], inputs["prediction_b"]] [inputs["prediction"], inputs["reference"]]
) )
vectors = np.array(embedded) vectors = np.array(embedded)
score = self._compute_score(vectors) score = self._compute_score(vectors)
@ -324,6 +313,10 @@ class PairwiseEmbeddingDistanceEvalChain(
""" """
return ["prediction", "prediction_b"] return ["prediction", "prediction_b"]
@property
def evaluation_name(self) -> str:
return f"pairwise_embedding_{self.distance_metric.value}_distance"
def _call( def _call(
self, self,
inputs: Dict[str, Any], inputs: Dict[str, Any],

@ -11,6 +11,10 @@ from langchain.evaluation.run_evaluators.implementations import (
get_qa_evaluator, get_qa_evaluator,
get_trajectory_evaluator, get_trajectory_evaluator,
) )
from langchain.evaluation.run_evaluators.loading import (
load_run_evaluator_for_model,
load_run_evaluators_for_model,
)
from langchain.evaluation.run_evaluators.string_run_evaluator import ( from langchain.evaluation.run_evaluators.string_run_evaluator import (
StringRunEvaluatorChain, StringRunEvaluatorChain,
) )
@ -25,4 +29,6 @@ __all__ = [
"StringRunEvaluatorInputMapper", "StringRunEvaluatorInputMapper",
"ChoicesOutputParser", "ChoicesOutputParser",
"StringRunEvaluatorChain", "StringRunEvaluatorChain",
"load_run_evaluators_for_model",
"load_run_evaluator_for_model",
] ]

@ -1,13 +1,11 @@
""""Loading helpers for run evaluators.""" """"Loading helpers for run evaluators."""
from typing import Any, List, Optional, Sequence, Union from typing import Any, List, Optional, Sequence, Union
from langchainplus_sdk import RunEvaluator from langchainplus_sdk import RunEvaluator
from langchain.base_language import BaseLanguageModel from langchain.base_language import BaseLanguageModel
from langchain.chains.base import Chain from langchain.chains.base import Chain
from langchain.evaluation.loading import load_evaluators from langchain.evaluation.loading import load_evaluator
from langchain.evaluation.run_evaluators.string_run_evaluator import ( from langchain.evaluation.run_evaluators.string_run_evaluator import (
StringRunEvaluatorChain, StringRunEvaluatorChain,
) )
@ -15,6 +13,55 @@ from langchain.evaluation.schema import EvaluatorType, StringEvaluator
from langchain.tools.base import Tool from langchain.tools.base import Tool
def load_run_evaluator_for_model(
evaluator: EvaluatorType,
model: Union[Chain, BaseLanguageModel, Tool],
*,
input_key: Optional[str] = None,
prediction_key: Optional[str] = None,
reference_key: Optional[str] = None,
eval_llm: Optional[BaseLanguageModel] = None,
**kwargs: Any,
) -> List[RunEvaluator]:
"""Load evaluators specified by a list of evaluator types.
Parameters
----------
evaluator: EvaluatorType
The evaluator type to load.
model : Union[Chain, BaseLanguageModel, Tool]
The model to evaluate. Used to infer how to parse the run.
input_key : Optional[str], a chain run's input key to map
to the evaluator's input
prediction_key : Optional[str], the key in the run's outputs to
represent the Chain prediction
reference_key : Optional[str], the key in the dataset example (row)
outputs to represent the reference, or ground-truth label
eval_llm : BaseLanguageModel, optional
The language model to use for evaluation, if none is provided, a default
ChatOpenAI gpt-4 model will be used.
**kwargs : Any
Additional keyword arguments to pass to all evaluators.
Returns
-------
RunEvaluator
The loaded Run evaluator.
"""
evaluator_ = load_evaluator(evaluator, llm=eval_llm, **kwargs)
if isinstance(evaluator_, StringEvaluator):
run_evaluator = StringRunEvaluatorChain.from_model_and_evaluator(
model,
evaluator_,
input_key=input_key,
prediction_key=prediction_key,
reference_key=reference_key,
)
else:
raise NotImplementedError(f"Run evaluator for {evaluator} is not implemented")
return run_evaluator
def load_run_evaluators_for_model( def load_run_evaluators_for_model(
evaluators: Sequence[EvaluatorType], evaluators: Sequence[EvaluatorType],
model: Union[Chain, BaseLanguageModel, Tool], model: Union[Chain, BaseLanguageModel, Tool],
@ -23,6 +70,7 @@ def load_run_evaluators_for_model(
prediction_key: Optional[str] = None, prediction_key: Optional[str] = None,
reference_key: Optional[str] = None, reference_key: Optional[str] = None,
eval_llm: Optional[BaseLanguageModel] = None, eval_llm: Optional[BaseLanguageModel] = None,
config: Optional[dict] = None,
**kwargs: Any, **kwargs: Any,
) -> List[RunEvaluator]: ) -> List[RunEvaluator]:
"""Load evaluators specified by a list of evaluator types. """Load evaluators specified by a list of evaluator types.
@ -50,20 +98,18 @@ def load_run_evaluators_for_model(
List[RunEvaluator] List[RunEvaluator]
The loaded Run evaluators. The loaded Run evaluators.
""" """
evaluators_ = load_evaluators(evaluators, llm=eval_llm, **kwargs)
run_evaluators = [] run_evaluators = []
for evaluator in evaluators_: for evaluator in evaluators:
if isinstance(evaluator, StringEvaluator): _kwargs = config.get(evaluator, {}) if config else {}
run_evaluator = StringRunEvaluatorChain.from_model_and_evaluator( run_evaluators.append(
model, load_run_evaluator_for_model(
evaluator, evaluator,
model,
input_key=input_key, input_key=input_key,
prediction_key=prediction_key, prediction_key=prediction_key,
reference_key=reference_key, reference_key=reference_key,
eval_llm=eval_llm,
**{**kwargs, **_kwargs},
) )
else: )
raise NotImplementedError(
f"Run evaluator for {evaluator} is not implemented"
)
run_evaluators.append(run_evaluator)
return run_evaluators return run_evaluators

@ -141,6 +141,10 @@ class StringDistanceEvalChain(_RapidFuzzChainMixin, StringEvaluator):
""" """
return ["reference", "prediction"] return ["reference", "prediction"]
@property
def evaluation_name(self) -> str:
return f"{self.distance.value}_distance"
@staticmethod @staticmethod
def _get_metric(distance: str) -> Callable: def _get_metric(distance: str) -> Callable:
""" """
@ -275,6 +279,10 @@ class PairwiseStringDistanceEvalChain(_RapidFuzzChainMixin, PairwiseStringEvalua
""" """
return ["prediction", "prediction_b"] return ["prediction", "prediction_b"]
@property
def evaluation_name(self) -> str:
return f"pairwise_{self.distance.value}_distance"
def _call( def _call(
self, self,
inputs: Dict[str, Any], inputs: Dict[str, Any],

Loading…
Cancel
Save