langchain/tests/unit_tests/client/test_runner_utils.py

"""Test the LangChain+ client."""
import uuid
from datetime import datetime
from typing import Any, Dict, List, Optional, Union
from unittest import mock

import pytest
from langchainplus_sdk.client import LangChainPlusClient
from langchainplus_sdk.schemas import Dataset, Example

from langchain.base_language import BaseLanguageModel
from langchain.chains.base import Chain
from langchain.client.runner_utils import (
    InputFormatError,
    _get_messages,
    _get_prompts,
    arun_on_dataset,
    run_llm,
)
from tests.unit_tests.llms.fake_chat_model import FakeChatModel
from tests.unit_tests.llms.fake_llm import FakeLLM

_CREATED_AT = datetime(2015, 1, 1, 0, 0, 0)
_TENANT_ID = "7a3d2b56-cd5b-44e5-846f-7eb6e8144ce4"
_EXAMPLE_MESSAGE = {
    "data": {"content": "Foo", "example": False, "additional_kwargs": {}},
    "type": "human",
}
_VALID_MESSAGES = [
    {"messages": [_EXAMPLE_MESSAGE], "other_key": "value"},
    {"messages": [], "other_key": "value"},
    {
        "messages": [[_EXAMPLE_MESSAGE, _EXAMPLE_MESSAGE], [_EXAMPLE_MESSAGE]],
        "other_key": "value",
    },
    {"any_key": [_EXAMPLE_MESSAGE]},
    {"any_key": [[_EXAMPLE_MESSAGE, _EXAMPLE_MESSAGE], [_EXAMPLE_MESSAGE]]},
]
_VALID_PROMPTS = [
    {"prompts": ["foo", "bar", "baz"], "other_key": "value"},
    {"prompt": "foo", "other_key": ["bar", "baz"]},
    {"some_key": "foo"},
    {"some_key": ["foo", "bar"]},
]


@pytest.mark.parametrize(
    "inputs",
    _VALID_MESSAGES,
)
def test__get_messages_valid(inputs: Dict[str, Any]) -> None:
    {"messages": []}
    _get_messages(inputs)


@pytest.mark.parametrize(
    "inputs",
    _VALID_PROMPTS,
)
def test__get_prompts_valid(inputs: Dict[str, Any]) -> None:
    _get_prompts(inputs)


@pytest.mark.parametrize(
    "inputs",
    [
        {"prompts": "foo"},
        {"prompt": ["foo"]},
        {"some_key": 3},
        {"some_key": "foo", "other_key": "bar"},
    ],
)
def test__get_prompts_invalid(inputs: Dict[str, Any]) -> None:
    with pytest.raises(InputFormatError):
        _get_prompts(inputs)


@pytest.mark.parametrize(
    "inputs",
    [
        {"one_key": [_EXAMPLE_MESSAGE], "other_key": "value"},
        {
            "messages": [[_EXAMPLE_MESSAGE, _EXAMPLE_MESSAGE], _EXAMPLE_MESSAGE],
            "other_key": "value",
        },
        {"prompts": "foo"},
        {},
    ],
)
def test__get_messages_invalid(inputs: Dict[str, Any]) -> None:
    with pytest.raises(InputFormatError):
        _get_messages(inputs)


@pytest.mark.parametrize("inputs", _VALID_PROMPTS + _VALID_MESSAGES)
def test_run_llm_all_formats(inputs: Dict[str, Any]) -> None:
    llm = FakeLLM()
    run_llm(llm, inputs, mock.MagicMock())


@pytest.mark.parametrize("inputs", _VALID_MESSAGES + _VALID_PROMPTS)
def test_run_chat_model_all_formats(inputs: Dict[str, Any]) -> None:
    llm = FakeChatModel()
    run_llm(llm, inputs, mock.MagicMock())


@pytest.mark.asyncio
async def test_arun_on_dataset(monkeypatch: pytest.MonkeyPatch) -> None:
    dataset = Dataset(
        id=uuid.uuid4(),
        name="test",
        description="Test dataset",
        owner_id="owner",
        created_at=_CREATED_AT,
        tenant_id=_TENANT_ID,
    )
    uuids = [
        "0c193153-2309-4704-9a47-17aee4fb25c8",
        "0d11b5fd-8e66-4485-b696-4b55155c0c05",
        "90d696f0-f10d-4fd0-b88b-bfee6df08b84",
        "4ce2c6d8-5124-4c0c-8292-db7bdebcf167",
        "7b5a524c-80fa-4960-888e-7d380f9a11ee",
    ]
    examples = [
        Example(
            id=uuids[0],
            created_at=_CREATED_AT,
            inputs={"input": "1"},
            outputs={"output": "2"},
            dataset_id=str(uuid.uuid4()),
        ),
        Example(
            id=uuids[1],
            created_at=_CREATED_AT,
            inputs={"input": "3"},
            outputs={"output": "4"},
            dataset_id=str(uuid.uuid4()),
        ),
        Example(
            id=uuids[2],
            created_at=_CREATED_AT,
            inputs={"input": "5"},
            outputs={"output": "6"},
            dataset_id=str(uuid.uuid4()),
        ),
        Example(
            id=uuids[3],
            created_at=_CREATED_AT,
            inputs={"input": "7"},
            outputs={"output": "8"},
            dataset_id=str(uuid.uuid4()),
        ),
        Example(
            id=uuids[4],
            created_at=_CREATED_AT,
            inputs={"input": "9"},
            outputs={"output": "10"},
            dataset_id=str(uuid.uuid4()),
        ),
    ]

    def mock_read_dataset(*args: Any, **kwargs: Any) -> Dataset:
        return dataset

    def mock_list_examples(*args: Any, **kwargs: Any) -> List[Example]:
        return examples

    async def mock_arun_chain(
        example: Example,
        llm_or_chain: Union[BaseLanguageModel, Chain],
        n_repetitions: int,
        tracer: Any,
        tags: Optional[List[str]] = None,
    ) -> List[Dict[str, Any]]:
        return [
            {"result": f"Result for example {example.id}"} for _ in range(n_repetitions)
        ]

    def mock_create_session(*args: Any, **kwargs: Any) -> None:
        pass

    with mock.patch.object(
        LangChainPlusClient, "read_dataset", new=mock_read_dataset
    ), mock.patch.object(
        LangChainPlusClient, "list_examples", new=mock_list_examples
    ), mock.patch(
        "langchain.client.runner_utils._arun_llm_or_chain", new=mock_arun_chain
    ), mock.patch.object(
        LangChainPlusClient, "create_session", new=mock_create_session
    ):
        client = LangChainPlusClient(api_url="http://localhost:1984", api_key="123")
        chain = mock.MagicMock()
        num_repetitions = 3
        results = await arun_on_dataset(
            dataset_name="test",
            llm_or_chain_factory=lambda: chain,
            concurrency_level=2,
            session_name="test_session",
            num_repetitions=num_repetitions,
            client=client,
        )

        expected = {
            uuid_: [
                {"result": f"Result for example {uuid.UUID(uuid_)}"}
                for _ in range(num_repetitions)
            ]
            for uuid_ in uuids
        }
        assert results["results"] == expected
Separate Runner Functions from Client (#5079) Extract the methods specific to running an LLM or Chain on a dataset to separate utility functions. This simplifies the client a bit and lets us separate concerns of LCP details from running examples (e.g., for evals) 2023-05-22 05:28:47 +00:00			`"""Test the LangChain+ client."""`
Use client from LCP-SDK (#5695) - Remove the client implementation (this breaks backwards compatibility for existing testers. I could keep the stub in that file if we want, but not many people are using it yet - Add SDK as dependency - Update the 'run_on_dataset' method to be a function that optionally accepts a client as an argument - Remove the langchain plus server implementation (you get it for free with the SDK now) We could make the SDK optional for now, but the plan is to use w/in the tracer so it would likely become a hard dependency at some point. 2023-06-06 13:51:05 +00:00			`import uuid`
			`from datetime import datetime`
Add Tags for LLMs (#6229) - [x] Add tracing tags to LLMs + Chat Models (both inheritable and local) - [x] Add tags for the run_on_dataset helper function(s) 2023-06-15 18:24:11 +00:00			`from typing import Any, Dict, List, Optional, Union`
Separate Runner Functions from Client (#5079) Extract the methods specific to running an LLM or Chain on a dataset to separate utility functions. This simplifies the client a bit and lets us separate concerns of LCP details from running examples (e.g., for evals) 2023-05-22 05:28:47 +00:00			`from unittest import mock`

			`import pytest`
Use client from LCP-SDK (#5695) - Remove the client implementation (this breaks backwards compatibility for existing testers. I could keep the stub in that file if we want, but not many people are using it yet - Add SDK as dependency - Update the 'run_on_dataset' method to be a function that optionally accepts a client as an argument - Remove the langchain plus server implementation (you get it for free with the SDK now) We could make the SDK optional for now, but the plan is to use w/in the tracer so it would likely become a hard dependency at some point. 2023-06-06 13:51:05 +00:00			`from langchainplus_sdk.client import LangChainPlusClient`
			`from langchainplus_sdk.schemas import Dataset, Example`
Separate Runner Functions from Client (#5079) Extract the methods specific to running an LLM or Chain on a dataset to separate utility functions. This simplifies the client a bit and lets us separate concerns of LCP details from running examples (e.g., for evals) 2023-05-22 05:28:47 +00:00
Use client from LCP-SDK (#5695) - Remove the client implementation (this breaks backwards compatibility for existing testers. I could keep the stub in that file if we want, but not many people are using it yet - Add SDK as dependency - Update the 'run_on_dataset' method to be a function that optionally accepts a client as an argument - Remove the langchain plus server implementation (you get it for free with the SDK now) We could make the SDK optional for now, but the plan is to use w/in the tracer so it would likely become a hard dependency at some point. 2023-06-06 13:51:05 +00:00			`from langchain.base_language import BaseLanguageModel`
			`from langchain.chains.base import Chain`
Separate Runner Functions from Client (#5079) Extract the methods specific to running an LLM or Chain on a dataset to separate utility functions. This simplifies the client a bit and lets us separate concerns of LCP details from running examples (e.g., for evals) 2023-05-22 05:28:47 +00:00			`from langchain.client.runner_utils import (`
			`InputFormatError,`
			`_get_messages,`
			`_get_prompts,`
Use client from LCP-SDK (#5695) - Remove the client implementation (this breaks backwards compatibility for existing testers. I could keep the stub in that file if we want, but not many people are using it yet - Add SDK as dependency - Update the 'run_on_dataset' method to be a function that optionally accepts a client as an argument - Remove the langchain plus server implementation (you get it for free with the SDK now) We could make the SDK optional for now, but the plan is to use w/in the tracer so it would likely become a hard dependency at some point. 2023-06-06 13:51:05 +00:00			`arun_on_dataset,`
Separate Runner Functions from Client (#5079) Extract the methods specific to running an LLM or Chain on a dataset to separate utility functions. This simplifies the client a bit and lets us separate concerns of LCP details from running examples (e.g., for evals) 2023-05-22 05:28:47 +00:00			`run_llm,`
			`)`
			`from tests.unit_tests.llms.fake_chat_model import FakeChatModel`
			`from tests.unit_tests.llms.fake_llm import FakeLLM`

Use client from LCP-SDK (#5695) - Remove the client implementation (this breaks backwards compatibility for existing testers. I could keep the stub in that file if we want, but not many people are using it yet - Add SDK as dependency - Update the 'run_on_dataset' method to be a function that optionally accepts a client as an argument - Remove the langchain plus server implementation (you get it for free with the SDK now) We could make the SDK optional for now, but the plan is to use w/in the tracer so it would likely become a hard dependency at some point. 2023-06-06 13:51:05 +00:00			`_CREATED_AT = datetime(2015, 1, 1, 0, 0, 0)`
			`_TENANT_ID = "7a3d2b56-cd5b-44e5-846f-7eb6e8144ce4"`
Separate Runner Functions from Client (#5079) Extract the methods specific to running an LLM or Chain on a dataset to separate utility functions. This simplifies the client a bit and lets us separate concerns of LCP details from running examples (e.g., for evals) 2023-05-22 05:28:47 +00:00			`_EXAMPLE_MESSAGE = {`
			`"data": {"content": "Foo", "example": False, "additional_kwargs": {}},`
			`"type": "human",`
			`}`
			`_VALID_MESSAGES = [`
			`{"messages": [_EXAMPLE_MESSAGE], "other_key": "value"},`
			`{"messages": [], "other_key": "value"},`
			`{`
			`"messages": [[_EXAMPLE_MESSAGE, _EXAMPLE_MESSAGE], [_EXAMPLE_MESSAGE]],`
			`"other_key": "value",`
			`},`
			`{"any_key": [_EXAMPLE_MESSAGE]},`
			`{"any_key": [[_EXAMPLE_MESSAGE, _EXAMPLE_MESSAGE], [_EXAMPLE_MESSAGE]]},`
			`]`
			`_VALID_PROMPTS = [`
			`{"prompts": ["foo", "bar", "baz"], "other_key": "value"},`
			`{"prompt": "foo", "other_key": ["bar", "baz"]},`
			`{"some_key": "foo"},`
			`{"some_key": ["foo", "bar"]},`
			`]`


			`@pytest.mark.parametrize(`
			`"inputs",`
			`_VALID_MESSAGES,`
			`)`
			`def test__get_messages_valid(inputs: Dict[str, Any]) -> None:`
			`{"messages": []}`
			`_get_messages(inputs)`


			`@pytest.mark.parametrize(`
			`"inputs",`
			`_VALID_PROMPTS,`
			`)`
			`def test__get_prompts_valid(inputs: Dict[str, Any]) -> None:`
			`_get_prompts(inputs)`


			`@pytest.mark.parametrize(`
			`"inputs",`
			`[`
			`{"prompts": "foo"},`
			`{"prompt": ["foo"]},`
			`{"some_key": 3},`
			`{"some_key": "foo", "other_key": "bar"},`
			`],`
			`)`
			`def test__get_prompts_invalid(inputs: Dict[str, Any]) -> None:`
			`with pytest.raises(InputFormatError):`
			`_get_prompts(inputs)`


			`@pytest.mark.parametrize(`
			`"inputs",`
			`[`
			`{"one_key": [_EXAMPLE_MESSAGE], "other_key": "value"},`
			`{`
			`"messages": [[_EXAMPLE_MESSAGE, _EXAMPLE_MESSAGE], _EXAMPLE_MESSAGE],`
			`"other_key": "value",`
			`},`
			`{"prompts": "foo"},`
			`{},`
			`],`
			`)`
			`def test__get_messages_invalid(inputs: Dict[str, Any]) -> None:`
			`with pytest.raises(InputFormatError):`
			`_get_messages(inputs)`


			`@pytest.mark.parametrize("inputs", _VALID_PROMPTS + _VALID_MESSAGES)`
			`def test_run_llm_all_formats(inputs: Dict[str, Any]) -> None:`
			`llm = FakeLLM()`
			`run_llm(llm, inputs, mock.MagicMock())`


			`@pytest.mark.parametrize("inputs", _VALID_MESSAGES + _VALID_PROMPTS)`
			`def test_run_chat_model_all_formats(inputs: Dict[str, Any]) -> None:`
			`llm = FakeChatModel()`
			`run_llm(llm, inputs, mock.MagicMock())`
Use client from LCP-SDK (#5695) - Remove the client implementation (this breaks backwards compatibility for existing testers. I could keep the stub in that file if we want, but not many people are using it yet - Add SDK as dependency - Update the 'run_on_dataset' method to be a function that optionally accepts a client as an argument - Remove the langchain plus server implementation (you get it for free with the SDK now) We could make the SDK optional for now, but the plan is to use w/in the tracer so it would likely become a hard dependency at some point. 2023-06-06 13:51:05 +00:00

			`@pytest.mark.asyncio`
			`async def test_arun_on_dataset(monkeypatch: pytest.MonkeyPatch) -> None:`
			`dataset = Dataset(`
			`id=uuid.uuid4(),`
			`name="test",`
			`description="Test dataset",`
			`owner_id="owner",`
			`created_at=_CREATED_AT,`
			`tenant_id=_TENANT_ID,`
			`)`
			`uuids = [`
			`"0c193153-2309-4704-9a47-17aee4fb25c8",`
			`"0d11b5fd-8e66-4485-b696-4b55155c0c05",`
			`"90d696f0-f10d-4fd0-b88b-bfee6df08b84",`
			`"4ce2c6d8-5124-4c0c-8292-db7bdebcf167",`
			`"7b5a524c-80fa-4960-888e-7d380f9a11ee",`
			`]`
			`examples = [`
			`Example(`
			`id=uuids[0],`
			`created_at=_CREATED_AT,`
			`inputs={"input": "1"},`
			`outputs={"output": "2"},`
			`dataset_id=str(uuid.uuid4()),`
			`),`
			`Example(`
			`id=uuids[1],`
			`created_at=_CREATED_AT,`
			`inputs={"input": "3"},`
			`outputs={"output": "4"},`
			`dataset_id=str(uuid.uuid4()),`
			`),`
			`Example(`
			`id=uuids[2],`
			`created_at=_CREATED_AT,`
			`inputs={"input": "5"},`
			`outputs={"output": "6"},`
			`dataset_id=str(uuid.uuid4()),`
			`),`
			`Example(`
			`id=uuids[3],`
			`created_at=_CREATED_AT,`
			`inputs={"input": "7"},`
			`outputs={"output": "8"},`
			`dataset_id=str(uuid.uuid4()),`
			`),`
			`Example(`
			`id=uuids[4],`
			`created_at=_CREATED_AT,`
			`inputs={"input": "9"},`
			`outputs={"output": "10"},`
			`dataset_id=str(uuid.uuid4()),`
			`),`
			`]`

			`def mock_read_dataset(args: Any, *kwargs: Any) -> Dataset:`
			`return dataset`

			`def mock_list_examples(args: Any, *kwargs: Any) -> List[Example]:`
			`return examples`

			`async def mock_arun_chain(`
			`example: Example,`
			`llm_or_chain: Union[BaseLanguageModel, Chain],`
			`n_repetitions: int,`
			`tracer: Any,`
Add Tags for LLMs (#6229) - [x] Add tracing tags to LLMs + Chat Models (both inheritable and local) - [x] Add tags for the run_on_dataset helper function(s) 2023-06-15 18:24:11 +00:00			`tags: Optional[List[str]] = None,`
Use client from LCP-SDK (#5695) - Remove the client implementation (this breaks backwards compatibility for existing testers. I could keep the stub in that file if we want, but not many people are using it yet - Add SDK as dependency - Update the 'run_on_dataset' method to be a function that optionally accepts a client as an argument - Remove the langchain plus server implementation (you get it for free with the SDK now) We could make the SDK optional for now, but the plan is to use w/in the tracer so it would likely become a hard dependency at some point. 2023-06-06 13:51:05 +00:00			`) -> List[Dict[str, Any]]:`
			`return [`
			`{"result": f"Result for example {example.id}"} for _ in range(n_repetitions)`
			`]`

Run eval in eval mode (#6447) For the `run_on_dataset` sessions 2023-06-20 01:31:38 +00:00			`def mock_create_session(args: Any, *kwargs: Any) -> None:`
			`pass`

Use client from LCP-SDK (#5695) - Remove the client implementation (this breaks backwards compatibility for existing testers. I could keep the stub in that file if we want, but not many people are using it yet - Add SDK as dependency - Update the 'run_on_dataset' method to be a function that optionally accepts a client as an argument - Remove the langchain plus server implementation (you get it for free with the SDK now) We could make the SDK optional for now, but the plan is to use w/in the tracer so it would likely become a hard dependency at some point. 2023-06-06 13:51:05 +00:00			`with mock.patch.object(`
			`LangChainPlusClient, "read_dataset", new=mock_read_dataset`
			`), mock.patch.object(`
			`LangChainPlusClient, "list_examples", new=mock_list_examples`
			`), mock.patch(`
			`"langchain.client.runner_utils._arun_llm_or_chain", new=mock_arun_chain`
Run eval in eval mode (#6447) For the `run_on_dataset` sessions 2023-06-20 01:31:38 +00:00			`), mock.patch.object(`
			`LangChainPlusClient, "create_session", new=mock_create_session`
Use client from LCP-SDK (#5695) - Remove the client implementation (this breaks backwards compatibility for existing testers. I could keep the stub in that file if we want, but not many people are using it yet - Add SDK as dependency - Update the 'run_on_dataset' method to be a function that optionally accepts a client as an argument - Remove the langchain plus server implementation (you get it for free with the SDK now) We could make the SDK optional for now, but the plan is to use w/in the tracer so it would likely become a hard dependency at some point. 2023-06-06 13:51:05 +00:00			`):`
			`client = LangChainPlusClient(api_url="http://localhost:1984", api_key="123")`
			`chain = mock.MagicMock()`
			`num_repetitions = 3`
			`results = await arun_on_dataset(`
			`dataset_name="test",`
			`llm_or_chain_factory=lambda: chain,`
			`concurrency_level=2,`
			`session_name="test_session",`
			`num_repetitions=num_repetitions,`
			`client=client,`
			`)`

			`expected = {`
			`uuid_: [`
			`{"result": f"Result for example {uuid.UUID(uuid_)}"}`
			`for _ in range(num_repetitions)`
			`]`
			`for uuid_ in uuids`
			`}`
Return session name in runner response (#6112) Makes it easier to then run evals w/o thinking about specifying a session 2023-06-13 23:59:43 +00:00			`assert results["results"] == expected`