langchain/tests/unit_tests/evaluation/run_evaluators/test_implementations.py

"""Test run evaluator implementations basic functionality."""

from uuid import UUID

import pytest
from langchainplus_sdk.schemas import Example, Run

from langchain.evaluation.run_evaluators import get_criteria_evaluator, get_qa_evaluator
from tests.unit_tests.llms.fake_llm import FakeLLM


@pytest.fixture
def run() -> Run:
    return Run(
        id=UUID("f77cd087-48f7-4c62-9e0e-297842202107"),
        name="My Run",
        inputs={"input": "What is the answer to life, the universe, and everything?"},
        outputs={"output": "The answer is 42."},
        start_time="2021-07-20T15:00:00.000000+00:00",
        end_time="2021-07-20T15:00:00.000000+00:00",
        run_type="chain",
        execution_order=1,
    )


@pytest.fixture
def example() -> Example:
    return Example(
        id=UUID("f77cd087-48f7-4c62-9e0e-297842202106"),
        dataset_id=UUID("f77cd087-48f7-4c62-9e0e-297842202105"),
        inputs={"input": "What is the answer to life, the universe, and everything?"},
        outputs={"output": "The answer is 42."},
        created_at="2021-07-20T15:00:00.000000+00:00",
    )


def test_get_qa_evaluator(run: Run, example: Example) -> None:
    """Test get_qa_evaluator."""
    eval_llm = FakeLLM(
        queries={"a": "This checks out.\nCORRECT"}, sequential_responses=True
    )
    qa_evaluator = get_qa_evaluator(eval_llm)
    res = qa_evaluator.evaluate_run(run, example)
    assert res.value == "CORRECT"
    assert res.score == 1


def test_get_criteria_evaluator(run: Run, example: Example) -> None:
    """Get a criteria evaluator."""
    eval_llm = FakeLLM(queries={"a": "This checks out.\nY"}, sequential_responses=True)
    criteria_evaluator = get_criteria_evaluator(eval_llm, criteria="conciseness")
    res = criteria_evaluator.evaluate_run(run, example)
    assert res.value == "Y"
    assert res.score == 1
Update String Evaluator (#6615) - Add protocol for `evaluate_strings` - Move the criteria evaluator out so it's not restricted to being applied on traced runs 2023-06-26 21:16:14 +00:00			`"""Test run evaluator implementations basic functionality."""`

			`from uuid import UUID`

			`import pytest`
			`from langchainplus_sdk.schemas import Example, Run`

			`from langchain.evaluation.run_evaluators import get_criteria_evaluator, get_qa_evaluator`
			`from tests.unit_tests.llms.fake_llm import FakeLLM`


			`@pytest.fixture`
			`def run() -> Run:`
			`return Run(`
			`id=UUID("f77cd087-48f7-4c62-9e0e-297842202107"),`
			`name="My Run",`
			`inputs={"input": "What is the answer to life, the universe, and everything?"},`
			`outputs={"output": "The answer is 42."},`
			`start_time="2021-07-20T15:00:00.000000+00:00",`
			`end_time="2021-07-20T15:00:00.000000+00:00",`
			`run_type="chain",`
			`execution_order=1,`
			`)`


			`@pytest.fixture`
			`def example() -> Example:`
			`return Example(`
			`id=UUID("f77cd087-48f7-4c62-9e0e-297842202106"),`
			`dataset_id=UUID("f77cd087-48f7-4c62-9e0e-297842202105"),`
			`inputs={"input": "What is the answer to life, the universe, and everything?"},`
			`outputs={"output": "The answer is 42."},`
			`created_at="2021-07-20T15:00:00.000000+00:00",`
			`)`


			`def test_get_qa_evaluator(run: Run, example: Example) -> None:`
			`"""Test get_qa_evaluator."""`
			`eval_llm = FakeLLM(`
			`queries={"a": "This checks out.\nCORRECT"}, sequential_responses=True`
			`)`
			`qa_evaluator = get_qa_evaluator(eval_llm)`
			`res = qa_evaluator.evaluate_run(run, example)`
			`assert res.value == "CORRECT"`
			`assert res.score == 1`


			`def test_get_criteria_evaluator(run: Run, example: Example) -> None:`
			`"""Get a criteria evaluator."""`
			`eval_llm = FakeLLM(queries={"a": "This checks out.\nY"}, sequential_responses=True)`
			`criteria_evaluator = get_criteria_evaluator(eval_llm, criteria="conciseness")`
			`res = criteria_evaluator.evaluate_run(run, example)`
			`assert res.value == "Y"`
			`assert res.score == 1`