|
|
@ -237,18 +237,18 @@ async def _gather_with_concurrency(
|
|
|
|
return results
|
|
|
|
return results
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async def _tracer_initializer(session_name: Optional[str]) -> Optional[LangChainTracer]:
|
|
|
|
async def _tracer_initializer(project_name: Optional[str]) -> Optional[LangChainTracer]:
|
|
|
|
"""
|
|
|
|
"""
|
|
|
|
Initialize a tracer to share across tasks.
|
|
|
|
Initialize a tracer to share across tasks.
|
|
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
Args:
|
|
|
|
session_name: The session name for the tracer.
|
|
|
|
project_name: The project name for the tracer.
|
|
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
Returns:
|
|
|
|
A LangChainTracer instance with an active session.
|
|
|
|
A LangChainTracer instance with an active project.
|
|
|
|
"""
|
|
|
|
"""
|
|
|
|
if session_name:
|
|
|
|
if project_name:
|
|
|
|
tracer = LangChainTracer(session_name=session_name)
|
|
|
|
tracer = LangChainTracer(project_name=project_name)
|
|
|
|
return tracer
|
|
|
|
return tracer
|
|
|
|
else:
|
|
|
|
else:
|
|
|
|
return None
|
|
|
|
return None
|
|
|
@ -260,12 +260,12 @@ async def arun_on_examples(
|
|
|
|
*,
|
|
|
|
*,
|
|
|
|
concurrency_level: int = 5,
|
|
|
|
concurrency_level: int = 5,
|
|
|
|
num_repetitions: int = 1,
|
|
|
|
num_repetitions: int = 1,
|
|
|
|
session_name: Optional[str] = None,
|
|
|
|
project_name: Optional[str] = None,
|
|
|
|
verbose: bool = False,
|
|
|
|
verbose: bool = False,
|
|
|
|
tags: Optional[List[str]] = None,
|
|
|
|
tags: Optional[List[str]] = None,
|
|
|
|
) -> Dict[str, Any]:
|
|
|
|
) -> Dict[str, Any]:
|
|
|
|
"""
|
|
|
|
"""
|
|
|
|
Run the chain on examples and store traces to the specified session name.
|
|
|
|
Run the chain on examples and store traces to the specified project name.
|
|
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
Args:
|
|
|
|
examples: Examples to run the model or chain over
|
|
|
|
examples: Examples to run the model or chain over
|
|
|
@ -276,7 +276,7 @@ async def arun_on_examples(
|
|
|
|
num_repetitions: Number of times to run the model on each example.
|
|
|
|
num_repetitions: Number of times to run the model on each example.
|
|
|
|
This is useful when testing success rates or generating confidence
|
|
|
|
This is useful when testing success rates or generating confidence
|
|
|
|
intervals.
|
|
|
|
intervals.
|
|
|
|
session_name: Session name to use when tracing runs.
|
|
|
|
project_name: Project name to use when tracing runs.
|
|
|
|
verbose: Whether to print progress.
|
|
|
|
verbose: Whether to print progress.
|
|
|
|
tags: Tags to add to the traces.
|
|
|
|
tags: Tags to add to the traces.
|
|
|
|
|
|
|
|
|
|
|
@ -307,7 +307,7 @@ async def arun_on_examples(
|
|
|
|
|
|
|
|
|
|
|
|
await _gather_with_concurrency(
|
|
|
|
await _gather_with_concurrency(
|
|
|
|
concurrency_level,
|
|
|
|
concurrency_level,
|
|
|
|
functools.partial(_tracer_initializer, session_name),
|
|
|
|
functools.partial(_tracer_initializer, project_name),
|
|
|
|
*(functools.partial(process_example, e) for e in examples),
|
|
|
|
*(functools.partial(process_example, e) for e in examples),
|
|
|
|
)
|
|
|
|
)
|
|
|
|
return results
|
|
|
|
return results
|
|
|
@ -386,11 +386,11 @@ def run_on_examples(
|
|
|
|
llm_or_chain_factory: MODEL_OR_CHAIN_FACTORY,
|
|
|
|
llm_or_chain_factory: MODEL_OR_CHAIN_FACTORY,
|
|
|
|
*,
|
|
|
|
*,
|
|
|
|
num_repetitions: int = 1,
|
|
|
|
num_repetitions: int = 1,
|
|
|
|
session_name: Optional[str] = None,
|
|
|
|
project_name: Optional[str] = None,
|
|
|
|
verbose: bool = False,
|
|
|
|
verbose: bool = False,
|
|
|
|
tags: Optional[List[str]] = None,
|
|
|
|
tags: Optional[List[str]] = None,
|
|
|
|
) -> Dict[str, Any]:
|
|
|
|
) -> Dict[str, Any]:
|
|
|
|
"""Run the chain on examples and store traces to the specified session name.
|
|
|
|
"""Run the chain on examples and store traces to the specified project name.
|
|
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
Args:
|
|
|
|
examples: Examples to run model or chain over.
|
|
|
|
examples: Examples to run model or chain over.
|
|
|
@ -401,14 +401,14 @@ def run_on_examples(
|
|
|
|
num_repetitions: Number of times to run the model on each example.
|
|
|
|
num_repetitions: Number of times to run the model on each example.
|
|
|
|
This is useful when testing success rates or generating confidence
|
|
|
|
This is useful when testing success rates or generating confidence
|
|
|
|
intervals.
|
|
|
|
intervals.
|
|
|
|
session_name: Session name to use when tracing runs.
|
|
|
|
project_name: Project name to use when tracing runs.
|
|
|
|
verbose: Whether to print progress.
|
|
|
|
verbose: Whether to print progress.
|
|
|
|
tags: Tags to add to the run traces.
|
|
|
|
tags: Tags to add to the run traces.
|
|
|
|
Returns:
|
|
|
|
Returns:
|
|
|
|
A dictionary mapping example ids to the model outputs.
|
|
|
|
A dictionary mapping example ids to the model outputs.
|
|
|
|
"""
|
|
|
|
"""
|
|
|
|
results: Dict[str, Any] = {}
|
|
|
|
results: Dict[str, Any] = {}
|
|
|
|
tracer = LangChainTracer(session_name=session_name) if session_name else None
|
|
|
|
tracer = LangChainTracer(project_name=project_name) if project_name else None
|
|
|
|
for i, example in enumerate(examples):
|
|
|
|
for i, example in enumerate(examples):
|
|
|
|
result = run_llm_or_chain(
|
|
|
|
result = run_llm_or_chain(
|
|
|
|
example,
|
|
|
|
example,
|
|
|
@ -425,13 +425,13 @@ def run_on_examples(
|
|
|
|
return results
|
|
|
|
return results
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _get_session_name(
|
|
|
|
def _get_project_name(
|
|
|
|
session_name: Optional[str],
|
|
|
|
project_name: Optional[str],
|
|
|
|
llm_or_chain_factory: MODEL_OR_CHAIN_FACTORY,
|
|
|
|
llm_or_chain_factory: MODEL_OR_CHAIN_FACTORY,
|
|
|
|
dataset_name: str,
|
|
|
|
dataset_name: str,
|
|
|
|
) -> str:
|
|
|
|
) -> str:
|
|
|
|
if session_name is not None:
|
|
|
|
if project_name is not None:
|
|
|
|
return session_name
|
|
|
|
return project_name
|
|
|
|
current_time = datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
|
|
|
|
current_time = datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
|
|
|
|
if isinstance(llm_or_chain_factory, BaseLanguageModel):
|
|
|
|
if isinstance(llm_or_chain_factory, BaseLanguageModel):
|
|
|
|
model_name = llm_or_chain_factory.__class__.__name__
|
|
|
|
model_name = llm_or_chain_factory.__class__.__name__
|
|
|
@ -446,13 +446,13 @@ async def arun_on_dataset(
|
|
|
|
*,
|
|
|
|
*,
|
|
|
|
concurrency_level: int = 5,
|
|
|
|
concurrency_level: int = 5,
|
|
|
|
num_repetitions: int = 1,
|
|
|
|
num_repetitions: int = 1,
|
|
|
|
session_name: Optional[str] = None,
|
|
|
|
project_name: Optional[str] = None,
|
|
|
|
verbose: bool = False,
|
|
|
|
verbose: bool = False,
|
|
|
|
client: Optional[LangChainPlusClient] = None,
|
|
|
|
client: Optional[LangChainPlusClient] = None,
|
|
|
|
tags: Optional[List[str]] = None,
|
|
|
|
tags: Optional[List[str]] = None,
|
|
|
|
) -> Dict[str, Any]:
|
|
|
|
) -> Dict[str, Any]:
|
|
|
|
"""
|
|
|
|
"""
|
|
|
|
Run the chain on a dataset and store traces to the specified session name.
|
|
|
|
Run the chain on a dataset and store traces to the specified project name.
|
|
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
Args:
|
|
|
|
client: Client to use to read the dataset.
|
|
|
|
client: Client to use to read the dataset.
|
|
|
@ -464,7 +464,7 @@ async def arun_on_dataset(
|
|
|
|
num_repetitions: Number of times to run the model on each example.
|
|
|
|
num_repetitions: Number of times to run the model on each example.
|
|
|
|
This is useful when testing success rates or generating confidence
|
|
|
|
This is useful when testing success rates or generating confidence
|
|
|
|
intervals.
|
|
|
|
intervals.
|
|
|
|
session_name: Name of the session to store the traces in.
|
|
|
|
project_name: Name of the project to store the traces in.
|
|
|
|
Defaults to {dataset_name}-{chain class name}-{datetime}.
|
|
|
|
Defaults to {dataset_name}-{chain class name}-{datetime}.
|
|
|
|
verbose: Whether to print progress.
|
|
|
|
verbose: Whether to print progress.
|
|
|
|
client: Client to use to read the dataset. If not provided, a new
|
|
|
|
client: Client to use to read the dataset. If not provided, a new
|
|
|
@ -472,11 +472,10 @@ async def arun_on_dataset(
|
|
|
|
tags: Tags to add to each run in the sesssion.
|
|
|
|
tags: Tags to add to each run in the sesssion.
|
|
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
Returns:
|
|
|
|
A dictionary containing the run's session name and the resulting model outputs.
|
|
|
|
A dictionary containing the run's project name and the resulting model outputs.
|
|
|
|
"""
|
|
|
|
"""
|
|
|
|
client_ = client or LangChainPlusClient()
|
|
|
|
client_ = client or LangChainPlusClient()
|
|
|
|
session_name = _get_session_name(session_name, llm_or_chain_factory, dataset_name)
|
|
|
|
project_name = _get_project_name(project_name, llm_or_chain_factory, dataset_name)
|
|
|
|
client_.create_session(session_name, mode="eval")
|
|
|
|
|
|
|
|
dataset = client_.read_dataset(dataset_name=dataset_name)
|
|
|
|
dataset = client_.read_dataset(dataset_name=dataset_name)
|
|
|
|
examples = client_.list_examples(dataset_id=str(dataset.id))
|
|
|
|
examples = client_.list_examples(dataset_id=str(dataset.id))
|
|
|
|
|
|
|
|
|
|
|
@ -485,12 +484,12 @@ async def arun_on_dataset(
|
|
|
|
llm_or_chain_factory,
|
|
|
|
llm_or_chain_factory,
|
|
|
|
concurrency_level=concurrency_level,
|
|
|
|
concurrency_level=concurrency_level,
|
|
|
|
num_repetitions=num_repetitions,
|
|
|
|
num_repetitions=num_repetitions,
|
|
|
|
session_name=session_name,
|
|
|
|
project_name=project_name,
|
|
|
|
verbose=verbose,
|
|
|
|
verbose=verbose,
|
|
|
|
tags=tags,
|
|
|
|
tags=tags,
|
|
|
|
)
|
|
|
|
)
|
|
|
|
return {
|
|
|
|
return {
|
|
|
|
"session_name": session_name,
|
|
|
|
"project_name": project_name,
|
|
|
|
"results": results,
|
|
|
|
"results": results,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
@ -500,12 +499,12 @@ def run_on_dataset(
|
|
|
|
llm_or_chain_factory: MODEL_OR_CHAIN_FACTORY,
|
|
|
|
llm_or_chain_factory: MODEL_OR_CHAIN_FACTORY,
|
|
|
|
*,
|
|
|
|
*,
|
|
|
|
num_repetitions: int = 1,
|
|
|
|
num_repetitions: int = 1,
|
|
|
|
session_name: Optional[str] = None,
|
|
|
|
project_name: Optional[str] = None,
|
|
|
|
verbose: bool = False,
|
|
|
|
verbose: bool = False,
|
|
|
|
client: Optional[LangChainPlusClient] = None,
|
|
|
|
client: Optional[LangChainPlusClient] = None,
|
|
|
|
tags: Optional[List[str]] = None,
|
|
|
|
tags: Optional[List[str]] = None,
|
|
|
|
) -> Dict[str, Any]:
|
|
|
|
) -> Dict[str, Any]:
|
|
|
|
"""Run the chain on a dataset and store traces to the specified session name.
|
|
|
|
"""Run the chain on a dataset and store traces to the specified project name.
|
|
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
Args:
|
|
|
|
dataset_name: Name of the dataset to run the chain on.
|
|
|
|
dataset_name: Name of the dataset to run the chain on.
|
|
|
@ -516,7 +515,7 @@ def run_on_dataset(
|
|
|
|
num_repetitions: Number of times to run the model on each example.
|
|
|
|
num_repetitions: Number of times to run the model on each example.
|
|
|
|
This is useful when testing success rates or generating confidence
|
|
|
|
This is useful when testing success rates or generating confidence
|
|
|
|
intervals.
|
|
|
|
intervals.
|
|
|
|
session_name: Name of the session to store the traces in.
|
|
|
|
project_name: Name of the project to store the traces in.
|
|
|
|
Defaults to {dataset_name}-{chain class name}-{datetime}.
|
|
|
|
Defaults to {dataset_name}-{chain class name}-{datetime}.
|
|
|
|
verbose: Whether to print progress.
|
|
|
|
verbose: Whether to print progress.
|
|
|
|
client: Client to use to access the dataset. If None, a new client
|
|
|
|
client: Client to use to access the dataset. If None, a new client
|
|
|
@ -524,22 +523,21 @@ def run_on_dataset(
|
|
|
|
tags: Tags to add to each run in the sesssion.
|
|
|
|
tags: Tags to add to each run in the sesssion.
|
|
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
Returns:
|
|
|
|
A dictionary containing the run's session name and the resulting model outputs.
|
|
|
|
A dictionary containing the run's project name and the resulting model outputs.
|
|
|
|
"""
|
|
|
|
"""
|
|
|
|
client_ = client or LangChainPlusClient()
|
|
|
|
client_ = client or LangChainPlusClient()
|
|
|
|
session_name = _get_session_name(session_name, llm_or_chain_factory, dataset_name)
|
|
|
|
project_name = _get_project_name(project_name, llm_or_chain_factory, dataset_name)
|
|
|
|
client_.create_session(session_name, mode="eval")
|
|
|
|
|
|
|
|
dataset = client_.read_dataset(dataset_name=dataset_name)
|
|
|
|
dataset = client_.read_dataset(dataset_name=dataset_name)
|
|
|
|
examples = client_.list_examples(dataset_id=str(dataset.id))
|
|
|
|
examples = client_.list_examples(dataset_id=str(dataset.id))
|
|
|
|
results = run_on_examples(
|
|
|
|
results = run_on_examples(
|
|
|
|
examples,
|
|
|
|
examples,
|
|
|
|
llm_or_chain_factory,
|
|
|
|
llm_or_chain_factory,
|
|
|
|
num_repetitions=num_repetitions,
|
|
|
|
num_repetitions=num_repetitions,
|
|
|
|
session_name=session_name,
|
|
|
|
project_name=project_name,
|
|
|
|
verbose=verbose,
|
|
|
|
verbose=verbose,
|
|
|
|
tags=tags,
|
|
|
|
tags=tags,
|
|
|
|
)
|
|
|
|
)
|
|
|
|
return {
|
|
|
|
return {
|
|
|
|
"session_name": session_name,
|
|
|
|
"project_name": project_name,
|
|
|
|
"results": results,
|
|
|
|
"results": results,
|
|
|
|
}
|
|
|
|
}
|
|
|
|