mirror of
https://github.com/hwchase17/langchain
synced 2024-11-06 03:20:49 +00:00
Add Agent Trajectory Interface (#7122)
This commit is contained in:
parent
a6b39afe0e
commit
1f4a51cb9c
@ -21,7 +21,11 @@ from langchain.evaluation.agents.trajectory_eval_chain import TrajectoryEvalChai
|
||||
from langchain.evaluation.comparison import PairwiseStringEvalChain
|
||||
from langchain.evaluation.criteria.eval_chain import CriteriaEvalChain
|
||||
from langchain.evaluation.qa import ContextQAEvalChain, CotQAEvalChain, QAEvalChain
|
||||
from langchain.evaluation.schema import PairwiseStringEvaluator, StringEvaluator
|
||||
from langchain.evaluation.schema import (
|
||||
AgentTrajectoryEvaluator,
|
||||
PairwiseStringEvaluator,
|
||||
StringEvaluator,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"PairwiseStringEvalChain",
|
||||
@ -32,4 +36,5 @@ __all__ = [
|
||||
"PairwiseStringEvaluator",
|
||||
"TrajectoryEvalChain",
|
||||
"CriteriaEvalChain",
|
||||
"AgentTrajectoryEvaluator",
|
||||
]
|
||||
|
@ -22,6 +22,7 @@ from langchain.evaluation.agents.trajectory_eval_prompt import (
|
||||
EVAL_CHAT_PROMPT,
|
||||
TOOL_FREE_EVAL_CHAT_PROMPT,
|
||||
)
|
||||
from langchain.evaluation.schema import AgentTrajectoryEvaluator
|
||||
from langchain.schema import AgentAction, BaseOutputParser, OutputParserException
|
||||
from langchain.tools.base import BaseTool
|
||||
|
||||
@ -70,7 +71,7 @@ class TrajectoryOutputParser(BaseOutputParser):
|
||||
return TrajectoryEval(score=int(score_str), reasoning=reasoning)
|
||||
|
||||
|
||||
class TrajectoryEvalChain(Chain):
|
||||
class TrajectoryEvalChain(AgentTrajectoryEvaluator, Chain):
|
||||
"""A chain for evaluating ReAct style agents.
|
||||
|
||||
This chain is used to evaluate ReAct style agents by reasoning about
|
||||
@ -142,7 +143,9 @@ Description: {tool.description}"""
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def get_agent_trajectory(steps: Union[str, List[Tuple[AgentAction, str]]]) -> str:
|
||||
def get_agent_trajectory(
|
||||
steps: Union[str, Sequence[Tuple[AgentAction, str]]]
|
||||
) -> str:
|
||||
"""Get the agent trajectory as a formatted string.
|
||||
|
||||
Args:
|
||||
@ -308,12 +311,12 @@ The following is the expected answer. Use this to measure correctness:
|
||||
|
||||
return {"score": parsed_output.score}
|
||||
|
||||
def evaluate_agent_trajectory(
|
||||
def _evaluate_agent_trajectory(
|
||||
self,
|
||||
*,
|
||||
prediction: str,
|
||||
input: str,
|
||||
agent_trajectory: Union[str, List[Tuple[AgentAction, str]]],
|
||||
agent_trajectory: Sequence[Tuple[AgentAction, str]],
|
||||
reference: Optional[str] = None,
|
||||
callbacks: Callbacks = None,
|
||||
**kwargs: Any,
|
||||
@ -321,11 +324,12 @@ The following is the expected answer. Use this to measure correctness:
|
||||
"""Evaluate a trajectory.
|
||||
|
||||
Args:
|
||||
input (str): The input question.
|
||||
agent_trajectory (Union[str, List[Tuple[AgentAction, str]]]):
|
||||
prediction (str): The final predicted response.
|
||||
input (str): The input to the agent.
|
||||
agent_trajectory (List[Tuple[AgentAction, str]]):
|
||||
The intermediate steps forming the agent trajectory.
|
||||
prediction (str): The expected prediction.
|
||||
reference (Optional[str]): The reference answer.
|
||||
callbacks (Callbacks): Callbacks to use for this chain run.
|
||||
|
||||
Returns:
|
||||
dict: The evaluation result.
|
||||
@ -338,12 +342,12 @@ The following is the expected answer. Use this to measure correctness:
|
||||
}
|
||||
return self(inputs=inputs, callbacks=callbacks, **kwargs)
|
||||
|
||||
async def aevaluate_agent_trajectory(
|
||||
async def _aevaluate_agent_trajectory(
|
||||
self,
|
||||
*,
|
||||
prediction: str,
|
||||
input: str,
|
||||
agent_trajectory: Union[str, List[Tuple[AgentAction, str]]],
|
||||
agent_trajectory: Sequence[Tuple[AgentAction, str]],
|
||||
reference: Optional[str] = None,
|
||||
callbacks: Callbacks = None,
|
||||
**kwargs: Any,
|
||||
@ -351,11 +355,12 @@ The following is the expected answer. Use this to measure correctness:
|
||||
"""Asynchronously evaluate a trajectory.
|
||||
|
||||
Args:
|
||||
input (str): The input question.
|
||||
agent_trajectory (Union[str, List[Tuple[AgentAction, str]]]):
|
||||
prediction (str): The final predicted response.
|
||||
input (str): The input to the agent.
|
||||
agent_trajectory (List[Tuple[AgentAction, str]]):
|
||||
The intermediate steps forming the agent trajectory.
|
||||
prediction (str): The expected prediction.
|
||||
reference (Optional[str]): The reference answer.
|
||||
callbacks (Callbacks): Callbacks to use for this chain run.
|
||||
|
||||
Returns:
|
||||
dict: The evaluation result.
|
||||
|
@ -3,9 +3,11 @@ from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Any, Optional
|
||||
from typing import Any, Optional, Sequence, Tuple
|
||||
from warnings import warn
|
||||
|
||||
from langchain.schema.agent import AgentAction
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@ -275,3 +277,120 @@ class PairwiseStringEvaluator(_EvalArgsMixin, ABC):
|
||||
input=input,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
|
||||
class AgentTrajectoryEvaluator(_EvalArgsMixin, ABC):
|
||||
"""Interface for evaluating agent trajectories."""
|
||||
|
||||
@property
|
||||
def requires_input(self) -> bool:
|
||||
return True
|
||||
|
||||
@abstractmethod
|
||||
def _evaluate_agent_trajectory(
|
||||
self,
|
||||
*,
|
||||
prediction: str,
|
||||
agent_trajectory: Sequence[Tuple[AgentAction, str]],
|
||||
input: str,
|
||||
reference: Optional[str] = None,
|
||||
**kwargs: Any,
|
||||
) -> dict:
|
||||
"""Evaluate a trajectory.
|
||||
|
||||
Args:
|
||||
prediction (str): The final predicted response.
|
||||
agent_trajectory (List[Tuple[AgentAction, str]]):
|
||||
The intermediate steps forming the agent trajectory.
|
||||
input (str): The input to the agent.
|
||||
reference (Optional[str]): The reference answer.
|
||||
|
||||
Returns:
|
||||
dict: The evaluation result.
|
||||
"""
|
||||
|
||||
async def _aevaluate_agent_trajectory(
|
||||
self,
|
||||
*,
|
||||
prediction: str,
|
||||
agent_trajectory: Sequence[Tuple[AgentAction, str]],
|
||||
input: str,
|
||||
reference: Optional[str] = None,
|
||||
**kwargs: Any,
|
||||
) -> dict:
|
||||
"""Asynchronously evaluate a trajectory.
|
||||
|
||||
Args:
|
||||
prediction (str): The final predicted response.
|
||||
agent_trajectory (List[Tuple[AgentAction, str]]):
|
||||
The intermediate steps forming the agent trajectory.
|
||||
input (str): The input to the agent.
|
||||
reference (Optional[str]): The reference answer.
|
||||
|
||||
Returns:
|
||||
dict: The evaluation result.
|
||||
"""
|
||||
raise NotImplementedError(
|
||||
f"{self.__class__.__name__} hasn't implemented an async "
|
||||
"aevaluate_agent_trajectory method."
|
||||
)
|
||||
|
||||
def evaluate_agent_trajectory(
|
||||
self,
|
||||
*,
|
||||
prediction: str,
|
||||
agent_trajectory: Sequence[Tuple[AgentAction, str]],
|
||||
input: str,
|
||||
reference: Optional[str] = None,
|
||||
**kwargs: Any,
|
||||
) -> dict:
|
||||
"""Evaluate a trajectory.
|
||||
|
||||
Args:
|
||||
prediction (str): The final predicted response.
|
||||
agent_trajectory (List[Tuple[AgentAction, str]]):
|
||||
The intermediate steps forming the agent trajectory.
|
||||
input (str): The input to the agent.
|
||||
reference (Optional[str]): The reference answer.
|
||||
|
||||
Returns:
|
||||
dict: The evaluation result.
|
||||
"""
|
||||
self._check_evaluation_args(reference=reference, input=input)
|
||||
return self._evaluate_agent_trajectory(
|
||||
prediction=prediction,
|
||||
input=input,
|
||||
agent_trajectory=agent_trajectory,
|
||||
reference=reference,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
async def aevaluate_agent_trajectory(
|
||||
self,
|
||||
*,
|
||||
prediction: str,
|
||||
agent_trajectory: Sequence[Tuple[AgentAction, str]],
|
||||
input: str,
|
||||
reference: Optional[str] = None,
|
||||
**kwargs: Any,
|
||||
) -> dict:
|
||||
"""Asynchronously evaluate a trajectory.
|
||||
|
||||
Args:
|
||||
prediction (str): The final predicted response.
|
||||
agent_trajectory (List[Tuple[AgentAction, str]]):
|
||||
The intermediate steps forming the agent trajectory.
|
||||
input (str): The input to the agent.
|
||||
reference (Optional[str]): The reference answer.
|
||||
|
||||
Returns:
|
||||
dict: The evaluation result.
|
||||
"""
|
||||
self._check_evaluation_args(reference=reference, input=input)
|
||||
return await self._aevaluate_agent_trajectory(
|
||||
prediction=prediction,
|
||||
input=input,
|
||||
agent_trajectory=agent_trajectory,
|
||||
reference=reference,
|
||||
**kwargs,
|
||||
)
|
||||
|
Loading…
Reference in New Issue
Block a user