langchain/tests/unit_tests/evaluation/run_evaluators/test_implementations.py

"""Test run evaluator implementations basic functionality."""

from uuid import UUID

import pytest
from langchainplus_sdk.schemas import Example, Run

from langchain.evaluation.run_evaluators import get_criteria_evaluator, get_qa_evaluator
from tests.unit_tests.llms.fake_llm import FakeLLM


@pytest.fixture
def run() -> Run:
    return Run(
        id=UUID("f77cd087-48f7-4c62-9e0e-297842202107"),
        name="My Run",
        inputs={"input": "What is the answer to life, the universe, and everything?"},
        outputs={"output": "The answer is 42."},
        start_time="2021-07-20T15:00:00.000000+00:00",
        end_time="2021-07-20T15:00:00.000000+00:00",
        run_type="chain",
        execution_order=1,
    )


@pytest.fixture
def example() -> Example:
    return Example(
        id=UUID("f77cd087-48f7-4c62-9e0e-297842202106"),
        dataset_id=UUID("f77cd087-48f7-4c62-9e0e-297842202105"),
        inputs={"input": "What is the answer to life, the universe, and everything?"},
        outputs={"output": "The answer is 42."},
        created_at="2021-07-20T15:00:00.000000+00:00",
    )


def test_get_qa_evaluator(run: Run, example: Example) -> None:
    """Test get_qa_evaluator."""
    eval_llm = FakeLLM(
        queries={"a": "This checks out.\nCORRECT"}, sequential_responses=True
    )
    qa_evaluator = get_qa_evaluator(eval_llm)
    res = qa_evaluator.evaluate_run(run, example)
    assert res.value == "CORRECT"
    assert res.score == 1


def test_get_criteria_evaluator(run: Run, example: Example) -> None:
    """Get a criteria evaluator."""
    eval_llm = FakeLLM(queries={"a": "This checks out.\nY"}, sequential_responses=True)
    criteria_evaluator = get_criteria_evaluator(eval_llm, criteria="conciseness")
    res = criteria_evaluator.evaluate_run(run, example)
    assert res.value == "Y"
    assert res.score == 1