Add Agent Trajectory Interface (#7122)

pull/6942/head^2
William FH 1 year ago committed by GitHub
parent a6b39afe0e
commit 1f4a51cb9c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -21,7 +21,11 @@ from langchain.evaluation.agents.trajectory_eval_chain import TrajectoryEvalChai
from langchain.evaluation.comparison import PairwiseStringEvalChain
from langchain.evaluation.criteria.eval_chain import CriteriaEvalChain
from langchain.evaluation.qa import ContextQAEvalChain, CotQAEvalChain, QAEvalChain
from langchain.evaluation.schema import PairwiseStringEvaluator, StringEvaluator
from langchain.evaluation.schema import (
AgentTrajectoryEvaluator,
PairwiseStringEvaluator,
StringEvaluator,
)
__all__ = [
"PairwiseStringEvalChain",
@ -32,4 +36,5 @@ __all__ = [
"PairwiseStringEvaluator",
"TrajectoryEvalChain",
"CriteriaEvalChain",
"AgentTrajectoryEvaluator",
]

@ -22,6 +22,7 @@ from langchain.evaluation.agents.trajectory_eval_prompt import (
EVAL_CHAT_PROMPT,
TOOL_FREE_EVAL_CHAT_PROMPT,
)
from langchain.evaluation.schema import AgentTrajectoryEvaluator
from langchain.schema import AgentAction, BaseOutputParser, OutputParserException
from langchain.tools.base import BaseTool
@ -70,7 +71,7 @@ class TrajectoryOutputParser(BaseOutputParser):
return TrajectoryEval(score=int(score_str), reasoning=reasoning)
class TrajectoryEvalChain(Chain):
class TrajectoryEvalChain(AgentTrajectoryEvaluator, Chain):
"""A chain for evaluating ReAct style agents.
This chain is used to evaluate ReAct style agents by reasoning about
@ -142,7 +143,9 @@ Description: {tool.description}"""
)
@staticmethod
def get_agent_trajectory(steps: Union[str, List[Tuple[AgentAction, str]]]) -> str:
def get_agent_trajectory(
steps: Union[str, Sequence[Tuple[AgentAction, str]]]
) -> str:
"""Get the agent trajectory as a formatted string.
Args:
@ -308,12 +311,12 @@ The following is the expected answer. Use this to measure correctness:
return {"score": parsed_output.score}
def evaluate_agent_trajectory(
def _evaluate_agent_trajectory(
self,
*,
prediction: str,
input: str,
agent_trajectory: Union[str, List[Tuple[AgentAction, str]]],
agent_trajectory: Sequence[Tuple[AgentAction, str]],
reference: Optional[str] = None,
callbacks: Callbacks = None,
**kwargs: Any,
@ -321,11 +324,12 @@ The following is the expected answer. Use this to measure correctness:
"""Evaluate a trajectory.
Args:
input (str): The input question.
agent_trajectory (Union[str, List[Tuple[AgentAction, str]]]):
prediction (str): The final predicted response.
input (str): The input to the agent.
agent_trajectory (List[Tuple[AgentAction, str]]):
The intermediate steps forming the agent trajectory.
prediction (str): The expected prediction.
reference (Optional[str]): The reference answer.
callbacks (Callbacks): Callbacks to use for this chain run.
Returns:
dict: The evaluation result.
@ -338,12 +342,12 @@ The following is the expected answer. Use this to measure correctness:
}
return self(inputs=inputs, callbacks=callbacks, **kwargs)
async def aevaluate_agent_trajectory(
async def _aevaluate_agent_trajectory(
self,
*,
prediction: str,
input: str,
agent_trajectory: Union[str, List[Tuple[AgentAction, str]]],
agent_trajectory: Sequence[Tuple[AgentAction, str]],
reference: Optional[str] = None,
callbacks: Callbacks = None,
**kwargs: Any,
@ -351,11 +355,12 @@ The following is the expected answer. Use this to measure correctness:
"""Asynchronously evaluate a trajectory.
Args:
input (str): The input question.
agent_trajectory (Union[str, List[Tuple[AgentAction, str]]]):
prediction (str): The final predicted response.
input (str): The input to the agent.
agent_trajectory (List[Tuple[AgentAction, str]]):
The intermediate steps forming the agent trajectory.
prediction (str): The expected prediction.
reference (Optional[str]): The reference answer.
callbacks (Callbacks): Callbacks to use for this chain run.
Returns:
dict: The evaluation result.

@ -3,9 +3,11 @@ from __future__ import annotations
import logging
from abc import ABC, abstractmethod
from typing import Any, Optional
from typing import Any, Optional, Sequence, Tuple
from warnings import warn
from langchain.schema.agent import AgentAction
logger = logging.getLogger(__name__)
@ -275,3 +277,120 @@ class PairwiseStringEvaluator(_EvalArgsMixin, ABC):
input=input,
**kwargs,
)
class AgentTrajectoryEvaluator(_EvalArgsMixin, ABC):
"""Interface for evaluating agent trajectories."""
@property
def requires_input(self) -> bool:
return True
@abstractmethod
def _evaluate_agent_trajectory(
self,
*,
prediction: str,
agent_trajectory: Sequence[Tuple[AgentAction, str]],
input: str,
reference: Optional[str] = None,
**kwargs: Any,
) -> dict:
"""Evaluate a trajectory.
Args:
prediction (str): The final predicted response.
agent_trajectory (List[Tuple[AgentAction, str]]):
The intermediate steps forming the agent trajectory.
input (str): The input to the agent.
reference (Optional[str]): The reference answer.
Returns:
dict: The evaluation result.
"""
async def _aevaluate_agent_trajectory(
self,
*,
prediction: str,
agent_trajectory: Sequence[Tuple[AgentAction, str]],
input: str,
reference: Optional[str] = None,
**kwargs: Any,
) -> dict:
"""Asynchronously evaluate a trajectory.
Args:
prediction (str): The final predicted response.
agent_trajectory (List[Tuple[AgentAction, str]]):
The intermediate steps forming the agent trajectory.
input (str): The input to the agent.
reference (Optional[str]): The reference answer.
Returns:
dict: The evaluation result.
"""
raise NotImplementedError(
f"{self.__class__.__name__} hasn't implemented an async "
"aevaluate_agent_trajectory method."
)
def evaluate_agent_trajectory(
self,
*,
prediction: str,
agent_trajectory: Sequence[Tuple[AgentAction, str]],
input: str,
reference: Optional[str] = None,
**kwargs: Any,
) -> dict:
"""Evaluate a trajectory.
Args:
prediction (str): The final predicted response.
agent_trajectory (List[Tuple[AgentAction, str]]):
The intermediate steps forming the agent trajectory.
input (str): The input to the agent.
reference (Optional[str]): The reference answer.
Returns:
dict: The evaluation result.
"""
self._check_evaluation_args(reference=reference, input=input)
return self._evaluate_agent_trajectory(
prediction=prediction,
input=input,
agent_trajectory=agent_trajectory,
reference=reference,
**kwargs,
)
async def aevaluate_agent_trajectory(
self,
*,
prediction: str,
agent_trajectory: Sequence[Tuple[AgentAction, str]],
input: str,
reference: Optional[str] = None,
**kwargs: Any,
) -> dict:
"""Asynchronously evaluate a trajectory.
Args:
prediction (str): The final predicted response.
agent_trajectory (List[Tuple[AgentAction, str]]):
The intermediate steps forming the agent trajectory.
input (str): The input to the agent.
reference (Optional[str]): The reference answer.
Returns:
dict: The evaluation result.
"""
self._check_evaluation_args(reference=reference, input=input)
return await self._aevaluate_agent_trajectory(
prediction=prediction,
input=input,
agent_trajectory=agent_trajectory,
reference=reference,
**kwargs,
)

Loading…
Cancel
Save