diff --git a/libs/langchain/langchain/smith/evaluation/runner_utils.py b/libs/langchain/langchain/smith/evaluation/runner_utils.py index e6dfe827f6..0e2262875f 100644 --- a/libs/langchain/langchain/smith/evaluation/runner_utils.py +++ b/libs/langchain/langchain/smith/evaluation/runner_utils.py @@ -82,6 +82,8 @@ class TestResult(dict): _quantiles = df[feedback_cols].quantile( quantiles or [0.25, 0.5, 0.75], numeric_only=True ) + _quantiles.loc["mean"] = df[feedback_cols].mean() + _quantiles.loc["mode"] = df[feedback_cols].mode().iloc[0] return _quantiles.transpose() def to_dataframe(self) -> pd.DataFrame: diff --git a/libs/langchain/langchain/utils/utils.py b/libs/langchain/langchain/utils/utils.py index 77ccbf6891..26533514a6 100644 --- a/libs/langchain/langchain/utils/utils.py +++ b/libs/langchain/langchain/utils/utils.py @@ -1,6 +1,7 @@ """Generic utility functions.""" import contextlib import datetime +import functools import importlib import warnings from importlib.metadata import version @@ -14,7 +15,8 @@ def xor_args(*arg_groups: Tuple[str, ...]) -> Callable: """Validate specified keyword args are mutually exclusive.""" def decorator(func: Callable) -> Callable: - def wrapper(*args: Any, **kwargs: Any) -> Callable: + @functools.wraps(func) + def wrapper(*args: Any, **kwargs: Any) -> Any: """Validate exactly one arg in each group is not None.""" counts = [ sum(1 for arg in arg_group if kwargs.get(arg) is not None)