Enhanced pairwise error (#11131)

pull/11201/head
William FH 1 year ago committed by GitHub
parent b7e9db5e73
commit d3c2ca5656
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -16,6 +16,10 @@ Here's a summary of the key methods and properties of a comparison evaluator:
- `requires_input`: This property indicates whether this evaluator requires an input string.
- `requires_reference`: This property specifies whether this evaluator requires a reference label.
:::note LangSmith Support
The [run_on_dataset](https://api.python.langchain.com/en/latest/api_reference.html#module-langchain.smith) evaluation method is designed to evaluate only a single model at a time, and thus, doesn't support these evaluators.
:::
Detailed information about creating custom evaluators and the available built-in comparison evaluators is provided in the following sections.
import DocCardList from "@theme/DocCardList";

@ -17,7 +17,7 @@
"source": [
"## Installation\n",
"\n",
"To get started with Iris Takeoff, all you need is to have docker and python installed on your local system. If you wish to use the server with gpu suport, then you will need to install docker with cuda support.\n",
"To get started with Iris Takeoff, all you need is to have docker and python installed on your local system. If you wish to use the server with gpu support, then you will need to install docker with cuda support.\n",
"\n",
"For Mac and Windows users, make sure you have the docker daemon running! You can check this by running docker ps in your terminal. To start the daemon, open the docker desktop app.\n",
"\n",
@ -157,7 +157,8 @@
"metadata": {},
"outputs": [],
"source": [
"from langchain.prompts import PromptTemplate\nfrom langchain.chains import LLMChain\n",
"from langchain.prompts import PromptTemplate\n",
"from langchain.chains import LLMChain\n",
"\n",
"llm = TitanTakeoff()\n",
"\n",

@ -28,7 +28,11 @@ from langchain.callbacks.tracers.evaluation import EvaluatorCallbackHandler
from langchain.callbacks.tracers.langchain import LangChainTracer, wait_for_all_tracers
from langchain.chains.base import Chain
from langchain.evaluation.loading import load_evaluator
from langchain.evaluation.schema import EvaluatorType, StringEvaluator
from langchain.evaluation.schema import (
EvaluatorType,
PairwiseStringEvaluator,
StringEvaluator,
)
from langchain.schema import ChatResult, LLMResult
from langchain.schema.language_model import BaseLanguageModel
from langchain.schema.messages import BaseMessage, messages_from_dict
@ -486,6 +490,15 @@ def _construct_run_evaluator(
reference_key=reference_key,
tags=[eval_type_tag],
)
elif isinstance(evaluator_, PairwiseStringEvaluator):
raise NotImplementedError(
f"Run evaluator for {eval_type_tag} is not implemented."
" PairwiseStringEvaluators compare the outputs of two different models"
" rather than the output of a single model."
" Did you mean to use a StringEvaluator instead?"
"\nSee: https://python.langchain.com/docs/guides/evaluation/string/"
)
else:
raise NotImplementedError(
f"Run evaluator for {eval_type_tag} is not implemented"

Loading…
Cancel
Save