langchain/tests/unit_tests/evaluation/run_evaluators/test_implementations.py
Zander Chase c460b04c64
Update String Evaluator (#6615)
- Add protocol for `evaluate_strings` 
- Move the criteria evaluator out so it's not restricted to being
applied on traced runs
2023-06-26 14:16:14 -07:00

55 lines
1.8 KiB
Python

"""Test run evaluator implementations basic functionality."""
from uuid import UUID
import pytest
from langchainplus_sdk.schemas import Example, Run
from langchain.evaluation.run_evaluators import get_criteria_evaluator, get_qa_evaluator
from tests.unit_tests.llms.fake_llm import FakeLLM
@pytest.fixture
def run() -> Run:
return Run(
id=UUID("f77cd087-48f7-4c62-9e0e-297842202107"),
name="My Run",
inputs={"input": "What is the answer to life, the universe, and everything?"},
outputs={"output": "The answer is 42."},
start_time="2021-07-20T15:00:00.000000+00:00",
end_time="2021-07-20T15:00:00.000000+00:00",
run_type="chain",
execution_order=1,
)
@pytest.fixture
def example() -> Example:
return Example(
id=UUID("f77cd087-48f7-4c62-9e0e-297842202106"),
dataset_id=UUID("f77cd087-48f7-4c62-9e0e-297842202105"),
inputs={"input": "What is the answer to life, the universe, and everything?"},
outputs={"output": "The answer is 42."},
created_at="2021-07-20T15:00:00.000000+00:00",
)
def test_get_qa_evaluator(run: Run, example: Example) -> None:
"""Test get_qa_evaluator."""
eval_llm = FakeLLM(
queries={"a": "This checks out.\nCORRECT"}, sequential_responses=True
)
qa_evaluator = get_qa_evaluator(eval_llm)
res = qa_evaluator.evaluate_run(run, example)
assert res.value == "CORRECT"
assert res.score == 1
def test_get_criteria_evaluator(run: Run, example: Example) -> None:
"""Get a criteria evaluator."""
eval_llm = FakeLLM(queries={"a": "This checks out.\nY"}, sequential_responses=True)
criteria_evaluator = get_criteria_evaluator(eval_llm, criteria="conciseness")
res = criteria_evaluator.evaluate_run(run, example)
assert res.value == "Y"
assert res.score == 1