mirror of
https://github.com/hwchase17/langchain
synced 2024-10-31 15:20:26 +00:00
c6f2d27789
Add links to reference docs
142 lines
4.6 KiB
Plaintext
142 lines
4.6 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "db9d627f-b234-4f7f-ab96-639fae474122",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Custom Trajectory Evaluator\n",
|
|
"\n",
|
|
"You can make your own custom trajectory evaluators by inheriting from the [AgentTrajectoryEvaluator](https://api.python.langchain.com/en/latest/evaluation/langchain.evaluation.schema.AgentTrajectoryEvaluator.html#langchain.evaluation.schema.AgentTrajectoryEvaluator) class and overwriting the `_evaluate_agent_trajectory` (and `_aevaluate_agent_action`) method.\n",
|
|
"\n",
|
|
"\n",
|
|
"In this example, you will make a simple trajectory evaluator that uses an LLM to determine if any actions were unnecessary."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"id": "ca84ab0c-e7e2-4c03-bd74-9cc4e6338eec",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from typing import Any, Optional, Sequence, Tuple\n",
|
|
"from langchain.chat_models import ChatOpenAI\n",
|
|
"from langchain.chains import LLMChain\n",
|
|
"from langchain.schema import AgentAction\n",
|
|
"from langchain.evaluation import AgentTrajectoryEvaluator\n",
|
|
"\n",
|
|
"\n",
|
|
"class StepNecessityEvaluator(AgentTrajectoryEvaluator):\n",
|
|
" \"\"\"Evaluate the perplexity of a predicted string.\"\"\"\n",
|
|
"\n",
|
|
" def __init__(self) -> None:\n",
|
|
" llm = ChatOpenAI(model=\"gpt-4\", temperature=0.0)\n",
|
|
" template = \"\"\"Are any of the following steps unnecessary in answering {input}? Provide the verdict on a new line as a single \"Y\" for yes or \"N\" for no.\n",
|
|
"\n",
|
|
" DATA\n",
|
|
" ------\n",
|
|
" Steps: {trajectory}\n",
|
|
" ------\n",
|
|
"\n",
|
|
" Verdict:\"\"\"\n",
|
|
" self.chain = LLMChain.from_string(llm, template)\n",
|
|
"\n",
|
|
" def _evaluate_agent_trajectory(\n",
|
|
" self,\n",
|
|
" *,\n",
|
|
" prediction: str,\n",
|
|
" input: str,\n",
|
|
" agent_trajectory: Sequence[Tuple[AgentAction, str]],\n",
|
|
" reference: Optional[str] = None,\n",
|
|
" **kwargs: Any,\n",
|
|
" ) -> dict:\n",
|
|
" vals = [\n",
|
|
" f\"{i}: Action=[{action.tool}] returned observation = [{observation}]\"\n",
|
|
" for i, (action, observation) in enumerate(agent_trajectory)\n",
|
|
" ]\n",
|
|
" trajectory = \"\\n\".join(vals)\n",
|
|
" response = self.chain.run(dict(trajectory=trajectory, input=input), **kwargs)\n",
|
|
" decision = response.split(\"\\n\")[-1].strip()\n",
|
|
" score = 1 if decision == \"Y\" else 0\n",
|
|
" return {\"score\": score, \"value\": decision, \"reasoning\": response}"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "297dea4b-fb28-4292-b6e0-1c769cfb9cbd",
|
|
"metadata": {},
|
|
"source": [
|
|
"The example above will return a score of 1 if the language model predicts that any of the actions were unnecessary, and it returns a score of 0 if all of them were predicted to be necessary.\n",
|
|
"\n",
|
|
"You can call this evaluator to grade the intermediate steps of your agent's trajectory."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"id": "a3fbcc1d-249f-4e00-8841-b6872c73c486",
|
|
"metadata": {
|
|
"tags": []
|
|
},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"{'score': 1, 'value': 'Y', 'reasoning': 'Y'}"
|
|
]
|
|
},
|
|
"execution_count": 3,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"evaluator = StepNecessityEvaluator()\n",
|
|
"\n",
|
|
"evaluator.evaluate_agent_trajectory(\n",
|
|
" prediction=\"The answer is pi\",\n",
|
|
" input=\"What is today?\",\n",
|
|
" agent_trajectory=[\n",
|
|
" (\n",
|
|
" AgentAction(tool=\"ask\", tool_input=\"What is today?\", log=\"\"),\n",
|
|
" \"tomorrow's yesterday\",\n",
|
|
" ),\n",
|
|
" (\n",
|
|
" AgentAction(tool=\"check_tv\", tool_input=\"Watch tv for half hour\", log=\"\"),\n",
|
|
" \"bzzz\",\n",
|
|
" ),\n",
|
|
" ],\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "77353528-723e-4075-939e-aebdb17c1e4f",
|
|
"metadata": {},
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3 (ipykernel)",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.11.2"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|