Add caching to BaseChatModel (issue #1644) (#5089)

# Add caching to BaseChatModel Fixes #1644 (Sidenote: While testing, I noticed we have multiple implementations of Fake LLMs, used for testing. I consolidated them.) ## Who can review? Community members can review the PR once tests pass. Tag maintainers/contributors who might be interested: Models - @hwchase17 - @agola11 Twitter: [@UmerHAdil](https://twitter.com/@UmerHAdil) | Discord: RicChilligerDude#7589 --------- Co-authored-by: Harrison Chase <hw.chase.17@gmail.com>
2024-11-06 03:20:49 +00:00 · 2023-06-24 20:45:09 +02:00 · 2023-06-24 20:45:09 +02:00 · 068142fce2
commit 068142fce2
parent c289cc891a
11 changed files with 465 additions and 63 deletions
--- a/docs/docs_skeleton/docs/modules/model_io/models/chat/how_to/chat_model_caching.mdx
+++ b/docs/docs_skeleton/docs/modules/model_io/models/chat/how_to/chat_model_caching.mdx
@ -0,0 +1,9 @@
 # Caching
 LangChain provides an optional caching layer for Chat Models. This is useful for two reasons:
 It can save you money by reducing the number of API calls you make to the LLM provider, if you're often requesting the same completion multiple times.
 It can speed up your application by reducing the number of API calls you make to the LLM provider.
 import CachingChat from "@snippets/modules/model_io/models/chat/how_to/chat_model_caching.mdx"
 <CachingChat/>
--- a/docs/snippets/modules/model_io/models/chat/how_to/chat_model_caching.mdx
+++ b/docs/snippets/modules/model_io/models/chat/how_to/chat_model_caching.mdx
@ -0,0 +1,97 @@
 ```python
 import langchain
 from langchain.chat_models import ChatOpenAI
 llm = ChatOpenAI()
 ```
 ## In Memory Cache
 ```python
 from langchain.cache import InMemoryCache
 langchain.llm_cache = InMemoryCache()
 # The first time, it is not yet in cache, so it should take longer
 llm.predict("Tell me a joke")
 ```
 <CodeOutputBlock lang="python">
 ```
    CPU times: user 35.9 ms, sys: 28.6 ms, total: 64.6 ms
    Wall time: 4.83 s
    "\n\nWhy couldn't the bicycle stand up by itself? It was...two tired!"
 ```
 </CodeOutputBlock>
 ```python
 # The second time it is, so it goes faster
 llm.predict("Tell me a joke")
 ```
 <CodeOutputBlock lang="python">
 ```
    CPU times: user 238 µs, sys: 143 µs, total: 381 µs
    Wall time: 1.76 ms
    '\n\nWhy did the chicken cross the road?\n\nTo get to the other side.'
 ```
 </CodeOutputBlock>
 ## SQLite Cache
 ```bash
 rm .langchain.db
 ```
 ```python
 # We can do the same thing with a SQLite cache
 from langchain.cache import SQLiteCache
 langchain.llm_cache = SQLiteCache(database_path=".langchain.db")
 ```
 ```python
 # The first time, it is not yet in cache, so it should take longer
 llm.predict("Tell me a joke")
 ```
 <CodeOutputBlock lang="python">
 ```
    CPU times: user 17 ms, sys: 9.76 ms, total: 26.7 ms
    Wall time: 825 ms
    '\n\nWhy did the chicken cross the road?\n\nTo get to the other side.'
 ```
 </CodeOutputBlock>
 ```python
 # The second time it is, so it goes faster
 llm.predict("Tell me a joke")
 ```
 <CodeOutputBlock lang="python">
 ```
    CPU times: user 2.46 ms, sys: 1.23 ms, total: 3.7 ms
    Wall time: 2.67 ms
    '\n\nWhy did the chicken cross the road?\n\nTo get to the other side.'
 ```
 </CodeOutputBlock>
--- a/docs/snippets/modules/model_io/models/llms/how_to/llm_caching.mdx
+++ b/docs/snippets/modules/model_io/models/llms/how_to/llm_caching.mdx
@ -14,7 +14,7 @@ from langchain.cache import InMemoryCache
 langchain.llm_cache = InMemoryCache()
 # The first time, it is not yet in cache, so it should take longer
-llm("Tell me a joke")
+llm.predict("Tell me a joke")
 ```
 <CodeOutputBlock lang="python">
@ -32,7 +32,7 @@ llm("Tell me a joke")
 ```python
 # The second time it is, so it goes faster
-llm("Tell me a joke")
+llm.predict("Tell me a joke")
 ```
 <CodeOutputBlock lang="python">
@ -64,7 +64,7 @@ langchain.llm_cache = SQLiteCache(database_path=".langchain.db")
 ```python
 # The first time, it is not yet in cache, so it should take longer
-llm("Tell me a joke")
+llm.predict("Tell me a joke")
 ```
 <CodeOutputBlock lang="python">
@ -82,7 +82,7 @@ llm("Tell me a joke")
 ```python
 # The second time it is, so it goes faster
-llm("Tell me a joke")
+llm.predict("Tell me a joke")
 ```
 <CodeOutputBlock lang="python">
--- a/langchain/cache.py
+++ b/langchain/cache.py
@ -4,6 +4,7 @@ from __future__ import annotations
 import hashlib
 import inspect
 import json
 import logging
 from abc import ABC, abstractmethod
 from datetime import timedelta
 from typing import (
@ -11,8 +12,8 @@ from typing import (
    Any,
    Callable,
    Dict,
    List,
    Optional,
    Sequence,
    Tuple,
    Type,
    Union,
@ -31,13 +32,17 @@ except ImportError:
    from sqlalchemy.ext.declarative import declarative_base
 from langchain.embeddings.base import Embeddings
 from langchain.load.dump import dumps
 from langchain.load.load import loads
 from langchain.schema import Generation
 from langchain.vectorstores.redis import Redis as RedisVectorstore
 logger = logging.getLogger(__file__)
 if TYPE_CHECKING:
    import momento
-RETURN_VAL_TYPE = List[Generation]
+RETURN_VAL_TYPE = Sequence[Generation]
 def _hash(_input: str) -> str:
@ -147,13 +152,24 @@ class SQLAlchemyCache(BaseCache):
        with Session(self.engine) as session:
            rows = session.execute(stmt).fetchall()
            if rows:
-                return [Generation(text=row[0]) for row in rows]
+                try:
                    return [loads(row[0]) for row in rows]
                except Exception:
                    logger.warning(
                        "Retrieving a cache value that could not be deserialized "
                        "properly. This is likely due to the cache being in an "
                        "older format. Please recreate your cache to avoid this "
                        "error."
                    )
                    # In a previous life we stored the raw text directly
                    # in the table, so assume it's in that format.
                    return [Generation(text=row[0]) for row in rows]
        return None
    def update(self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE) -> None:
        """Update based on prompt and llm_string."""
        items = [
-            self.cache_schema(prompt=prompt, llm=llm_string, response=gen.text, idx=i)
+            self.cache_schema(prompt=prompt, llm=llm_string, response=dumps(gen), idx=i)
            for i, gen in enumerate(return_val)
        ]
        with Session(self.engine) as session, session.begin():
@ -163,7 +179,7 @@ class SQLAlchemyCache(BaseCache):
    def clear(self, **kwargs: Any) -> None:
        """Clear cache."""
        with Session(self.engine) as session:
-            session.execute(self.cache_schema.delete())
+            session.query(self.cache_schema).delete()
 class SQLiteCache(SQLAlchemyCache):
@ -209,6 +225,12 @@ class RedisCache(BaseCache):
    def update(self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE) -> None:
        """Update cache based on prompt and llm_string."""
        for gen in return_val:
            if not isinstance(return_val, Generation):
                raise ValueError(
                    "RedisCache only supports caching of normal LLM generations, "
                    f"got {type(gen)}"
                )
        # Write to a Redis HASH
        key = self._key(prompt, llm_string)
        self.redis.hset(
@ -314,6 +336,12 @@ class RedisSemanticCache(BaseCache):
    def update(self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE) -> None:
        """Update cache based on prompt and llm_string."""
        for gen in return_val:
            if not isinstance(return_val, Generation):
                raise ValueError(
                    "RedisSemanticCache only supports caching of "
                    f"normal LLM generations, got {type(gen)}"
                )
        llm_cache = self._get_llm_cache(llm_string)
        # Write to vectorstore
        metadata = {
@ -426,6 +454,12 @@ class GPTCache(BaseCache):
        First, retrieve the corresponding cache object using the `llm_string` parameter,
        and then store the `prompt` and `return_val` in the cache object.
        """
        for gen in return_val:
            if not isinstance(return_val, Generation):
                raise ValueError(
                    "GPTCache only supports caching of normal LLM generations, "
                    f"got {type(gen)}"
                )
        from gptcache.adapter.api import put
        _gptcache = self._get_gptcache(llm_string)
@ -567,7 +601,7 @@ class MomentoCache(BaseCache):
        """
        from momento.responses import CacheGet
-        generations = []
+        generations: RETURN_VAL_TYPE = []
        get_response = self.cache_client.get(
            self.cache_name, self.__key(prompt, llm_string)
@ -593,6 +627,12 @@ class MomentoCache(BaseCache):
            SdkException: Momento service or network error
            Exception: Unexpected response
        """
        for gen in return_val:
            if not isinstance(return_val, Generation):
                raise ValueError(
                    "Momento only supports caching of normal LLM generations, "
                    f"got {type(gen)}"
                )
        key = self.__key(prompt, llm_string)
        value = _dump_generations_to_json(return_val)
        set_response = self.cache_client.set(self.cache_name, key, value, self.ttl)
--- a/langchain/chat_models/init.py
+++ b/langchain/chat_models/init.py
@ -1,5 +1,6 @@
 from langchain.chat_models.anthropic import ChatAnthropic
 from langchain.chat_models.azure_openai import AzureChatOpenAI
 from langchain.chat_models.fake import FakeListChatModel
 from langchain.chat_models.google_palm import ChatGooglePalm
 from langchain.chat_models.openai import ChatOpenAI
 from langchain.chat_models.promptlayer_openai import PromptLayerChatOpenAI
@ -8,6 +9,7 @@ from langchain.chat_models.vertexai import ChatVertexAI
 __all__ = [
    "ChatOpenAI",
    "AzureChatOpenAI",
    "FakeListChatModel",
    "PromptLayerChatOpenAI",
    "ChatAnthropic",
    "ChatGooglePalm",
--- a/langchain/chat_models/base.py
+++ b/langchain/chat_models/base.py
@ -17,7 +17,7 @@ from langchain.callbacks.manager import (
    CallbackManagerForLLMRun,
    Callbacks,
 )
-from langchain.load.dump import dumpd
+from langchain.load.dump import dumpd, dumps
 from langchain.schema import (
    AIMessage,
    BaseMessage,
@ -35,6 +35,7 @@ def _get_verbosity() -> bool:
 class BaseChatModel(BaseLanguageModel, ABC):
    cache: Optional[bool] = None
    verbose: bool = Field(default_factory=_get_verbosity)
    """Whether to print out response text."""
    callbacks: Callbacks = Field(default=None, exclude=True)
@ -61,6 +62,25 @@ class BaseChatModel(BaseLanguageModel, ABC):
    def _combine_llm_outputs(self, llm_outputs: List[Optional[dict]]) -> dict:
        return {}
    def _get_invocation_params(
        self,
        stop: Optional[List[str]] = None,
    ) -> dict:
        params = self.dict()
        params["stop"] = stop
        return params
    def _get_llm_string(self, stop: Optional[List[str]] = None, **kwargs: Any) -> str:
        if self.lc_serializable:
            params = {**kwargs, **{"stop": stop}}
            param_string = str(sorted([(k, v) for k, v in params.items()]))
            llm_string = dumps(self)
            return llm_string + "---" + param_string
        else:
            params = self._get_invocation_params(stop=stop)
            params = {**params, **kwargs}
            return str(sorted([(k, v) for k, v in params.items()]))
    def generate(
        self,
        messages: List[List[BaseMessage]],
@ -71,9 +91,7 @@ class BaseChatModel(BaseLanguageModel, ABC):
        **kwargs: Any,
    ) -> LLMResult:
        """Top Level call"""
-
+        params = self._get_invocation_params(stop=stop)
        params = self.dict()
        params["stop"] = stop
        options = {"stop": stop}
        callback_manager = CallbackManager.configure(
@ -87,14 +105,11 @@ class BaseChatModel(BaseLanguageModel, ABC):
            dumpd(self), messages, invocation_params=params, options=options
        )
        new_arg_supported = inspect.signature(self._generate).parameters.get(
            "run_manager"
        )
        try:
            results = [
-                self._generate(m, stop=stop, run_manager=run_manager, **kwargs)
+                self._generate_with_cache(
-                if new_arg_supported
+                    m, stop=stop, run_manager=run_manager, **kwargs
-                else self._generate(m, stop=stop)
+                )
                for m in messages
            ]
        except (KeyboardInterrupt, Exception) as e:
@ -118,8 +133,7 @@ class BaseChatModel(BaseLanguageModel, ABC):
        **kwargs: Any,
    ) -> LLMResult:
        """Top Level call"""
-        params = self.dict()
+        params = self._get_invocation_params(stop=stop)
        params["stop"] = stop
        options = {"stop": stop}
        callback_manager = AsyncCallbackManager.configure(
@ -133,15 +147,12 @@ class BaseChatModel(BaseLanguageModel, ABC):
            dumpd(self), messages, invocation_params=params, options=options
        )
        new_arg_supported = inspect.signature(self._agenerate).parameters.get(
            "run_manager"
        )
        try:
            results = await asyncio.gather(
                *[
-                    self._agenerate(m, stop=stop, run_manager=run_manager, **kwargs)
+                    self._agenerate_with_cache(
-                    if new_arg_supported
+                        m, stop=stop, run_manager=run_manager, **kwargs
-                    else self._agenerate(m, stop=stop)
+                    )
                    for m in messages
                ]
            )
@ -178,6 +189,84 @@ class BaseChatModel(BaseLanguageModel, ABC):
            prompt_messages, stop=stop, callbacks=callbacks, **kwargs
        )
    def _generate_with_cache(
        self,
        messages: List[BaseMessage],
        stop: Optional[List[str]] = None,
        run_manager: Optional[CallbackManagerForLLMRun] = None,
        **kwargs: Any,
    ) -> ChatResult:
        new_arg_supported = inspect.signature(self._generate).parameters.get(
            "run_manager"
        )
        disregard_cache = self.cache is not None and not self.cache
        if langchain.llm_cache is None or disregard_cache:
            # This happens when langchain.cache is None, but self.cache is True
            if self.cache is not None and self.cache:
                raise ValueError(
                    "Asked to cache, but no cache found at `langchain.cache`."
                )
            if new_arg_supported:
                return self._generate(
                    messages, stop=stop, run_manager=run_manager, **kwargs
                )
            else:
                return self._generate(messages, stop=stop, **kwargs)
        else:
            llm_string = self._get_llm_string(stop=stop, **kwargs)
            prompt = dumps(messages)
            cache_val = langchain.llm_cache.lookup(prompt, llm_string)
            if isinstance(cache_val, list):
                return ChatResult(generations=cache_val)
            else:
                if new_arg_supported:
                    result = self._generate(
                        messages, stop=stop, run_manager=run_manager, **kwargs
                    )
                else:
                    result = self._generate(messages, stop=stop, **kwargs)
                langchain.llm_cache.update(prompt, llm_string, result.generations)
                return result
    async def _agenerate_with_cache(
        self,
        messages: List[BaseMessage],
        stop: Optional[List[str]] = None,
        run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
        **kwargs: Any,
    ) -> ChatResult:
        new_arg_supported = inspect.signature(self._agenerate).parameters.get(
            "run_manager"
        )
        disregard_cache = self.cache is not None and not self.cache
        if langchain.llm_cache is None or disregard_cache:
            # This happens when langchain.cache is None, but self.cache is True
            if self.cache is not None and self.cache:
                raise ValueError(
                    "Asked to cache, but no cache found at `langchain.cache`."
                )
            if new_arg_supported:
                return await self._agenerate(
                    messages, stop=stop, run_manager=run_manager, **kwargs
                )
            else:
                return await self._agenerate(messages, stop=stop, **kwargs)
        else:
            llm_string = self._get_llm_string(stop=stop, **kwargs)
            prompt = dumps(messages)
            cache_val = langchain.llm_cache.lookup(prompt, llm_string)
            if isinstance(cache_val, list):
                return ChatResult(generations=cache_val)
            else:
                if new_arg_supported:
                    result = await self._agenerate(
                        messages, stop=stop, run_manager=run_manager, **kwargs
                    )
                else:
                    result = await self._agenerate(messages, stop=stop, **kwargs)
                langchain.llm_cache.update(prompt, llm_string, result.generations)
                return result
    @abstractmethod
    def _generate(
        self,
--- a/langchain/chat_models/fake.py
+++ b/langchain/chat_models/fake.py
@ -0,0 +1,33 @@
 """Fake ChatModel for testing purposes."""
 from typing import Any, List, Mapping, Optional
 from langchain.callbacks.manager import CallbackManagerForLLMRun
 from langchain.chat_models.base import SimpleChatModel
 from langchain.schema import BaseMessage
 class FakeListChatModel(SimpleChatModel):
    """Fake ChatModel for testing purposes."""
    responses: List
    i: int = 0
    @property
    def _llm_type(self) -> str:
        return "fake-list-chat-model"
    def _call(
        self,
        messages: List[BaseMessage],
        stop: Optional[List[str]] = None,
        run_manager: Optional[CallbackManagerForLLMRun] = None,
        **kwargs: Any,
    ) -> str:
        """First try to lookup in queries, else return 'foo' or 'bar'."""
        response = self.responses[self.i]
        self.i += 1
        return response
    @property
    def _identifying_params(self) -> Mapping[str, Any]:
        return {"responses": self.responses}
--- a/langchain/schema.py
+++ b/langchain/schema.py
@ -76,6 +76,11 @@ class Generation(Serializable):
    """May include things like reason for finishing (e.g. in OpenAI)"""
    # TODO: add log probs
    @property
    def lc_serializable(self) -> bool:
        """This class is LangChain serializable."""
        return True
 class BaseMessage(Serializable):
    """Message object."""
@ -88,6 +93,11 @@ class BaseMessage(Serializable):
    def type(self) -> str:
        """Type of the message, used for serialization."""
    @property
    def lc_serializable(self) -> bool:
        """This class is LangChain serializable."""
        return True
 class HumanMessage(BaseMessage):
    """Type of message that is spoken by the human."""
--- a/tests/unit_tests/agents/test_react.py
+++ b/tests/unit_tests/agents/test_react.py
@ -1,13 +1,12 @@
 """Unit tests for ReAct."""
-from typing import Any, List, Mapping, Optional, Union
+from typing import Union
 from langchain.agents.react.base import ReActChain, ReActDocstoreAgent
 from langchain.agents.tools import Tool
 from langchain.callbacks.manager import CallbackManagerForLLMRun
 from langchain.docstore.base import Docstore
 from langchain.docstore.document import Document
-from langchain.llms.base import LLM
+from langchain.llms.fake import FakeListLLM
 from langchain.prompts.prompt import PromptTemplate
 from langchain.schema import AgentAction
@ -22,33 +21,6 @@ Made in 2022."""
 _FAKE_PROMPT = PromptTemplate(input_variables=["input"], template="{input}")
 class FakeListLLM(LLM):
    """Fake LLM for testing that outputs elements of a list."""
    responses: List[str]
    i: int = -1
    @property
    def _llm_type(self) -> str:
        """Return type of llm."""
        return "fake_list"
    def _call(
        self,
        prompt: str,
        stop: Optional[List[str]] = None,
        run_manager: Optional[CallbackManagerForLLMRun] = None,
        **kwargs: Any,
    ) -> str:
        """Increment counter, and then return response in that index."""
        self.i += 1
        return self.responses[self.i]
    @property
    def _identifying_params(self) -> Mapping[str, Any]:
        return {}
 class FakeDocstore(Docstore):
    """Fake docstore for testing purposes."""
--- a/tests/unit_tests/llms/test_callbacks.py
+++ b/tests/unit_tests/llms/test_callbacks.py
@ -1,17 +1,17 @@
 """Test LLM callbacks."""
 from langchain.chat_models.fake import FakeListChatModel
 from langchain.llms.fake import FakeListLLM
 from langchain.schema import HumanMessage
 from tests.unit_tests.callbacks.fake_callback_handler import (
    FakeCallbackHandler,
    FakeCallbackHandlerWithChatStart,
 )
 from tests.unit_tests.llms.fake_chat_model import FakeChatModel
 from tests.unit_tests.llms.fake_llm import FakeLLM
 def test_llm_with_callbacks() -> None:
    """Test LLM callbacks."""
    handler = FakeCallbackHandler()
-    llm = FakeLLM(callbacks=[handler], verbose=True)
+    llm = FakeListLLM(callbacks=[handler], verbose=True, responses=["foo"])
    output = llm("foo")
    assert output == "foo"
    assert handler.starts == 1
@ -22,7 +22,9 @@ def test_llm_with_callbacks() -> None:
 def test_chat_model_with_v1_callbacks() -> None:
    """Test chat model callbacks fall back to on_llm_start."""
    handler = FakeCallbackHandler()
-    llm = FakeChatModel(callbacks=[handler], verbose=True)
+    llm = FakeListChatModel(
        callbacks=[handler], verbose=True, responses=["fake response"]
    )
    output = llm([HumanMessage(content="foo")])
    assert output.content == "fake response"
    assert handler.starts == 1
@ -35,7 +37,9 @@ def test_chat_model_with_v1_callbacks() -> None:
 def test_chat_model_with_v2_callbacks() -> None:
    """Test chat model callbacks fall back to on_llm_start."""
    handler = FakeCallbackHandlerWithChatStart()
-    llm = FakeChatModel(callbacks=[handler], verbose=True)
+    llm = FakeListChatModel(
        callbacks=[handler], verbose=True, responses=["fake response"]
    )
    output = llm([HumanMessage(content="foo")])
    assert output.content == "fake response"
    assert handler.starts == 1
--- a/tests/unit_tests/test_cache.py
+++ b/tests/unit_tests/test_cache.py
@ -0,0 +1,146 @@
 """Test caching for LLMs and ChatModels."""
 from typing import Dict, Generator, List, Union
 import pytest
 from _pytest.fixtures import FixtureRequest
 from sqlalchemy import create_engine
 from sqlalchemy.orm import Session
 import langchain
 from langchain.cache import (
    InMemoryCache,
    SQLAlchemyCache,
 )
 from langchain.chat_models import FakeListChatModel
 from langchain.chat_models.base import BaseChatModel, dumps
 from langchain.llms import FakeListLLM
 from langchain.llms.base import BaseLLM
 from langchain.schema import (
    AIMessage,
    BaseMessage,
    ChatGeneration,
    Generation,
    HumanMessage,
 )
 def get_sqlite_cache() -> SQLAlchemyCache:
    return SQLAlchemyCache(engine=create_engine("sqlite://"))
 CACHE_OPTIONS = [
    InMemoryCache,
    get_sqlite_cache,
 ]
@pytest.fixture(autouse=True, params=CACHE_OPTIONS)
 def set_cache_and_teardown(request: FixtureRequest) -> Generator[None, None, None]:
    # Will be run before each test
    cache_instance = request.param
    langchain.llm_cache = cache_instance()
    if langchain.llm_cache:
        langchain.llm_cache.clear()
    else:
        raise ValueError("Cache not set. This should never happen.")
    yield
    # Will be run after each test
    if langchain.llm_cache:
        langchain.llm_cache.clear()
    else:
        raise ValueError("Cache not set. This should never happen.")
 def test_llm_caching() -> None:
    prompt = "How are you?"
    response = "Test response"
    cached_response = "Cached test response"
    llm = FakeListLLM(responses=[response])
    if langchain.llm_cache:
        langchain.llm_cache.update(
            prompt=prompt,
            llm_string=create_llm_string(llm),
            return_val=[Generation(text=cached_response)],
        )
        assert llm(prompt) == cached_response
    else:
        raise ValueError(
            "The cache not set. This should never happen, as the pytest fixture "
            "`set_cache_and_teardown` always sets the cache."
        )
 def test_old_sqlite_llm_caching() -> None:
    if isinstance(langchain.llm_cache, SQLAlchemyCache):
        prompt = "How are you?"
        response = "Test response"
        cached_response = "Cached test response"
        llm = FakeListLLM(responses=[response])
        items = [
            langchain.llm_cache.cache_schema(
                prompt=prompt,
                llm=create_llm_string(llm),
                response=cached_response,
                idx=0,
            )
        ]
        with Session(langchain.llm_cache.engine) as session, session.begin():
            for item in items:
                session.merge(item)
        assert llm(prompt) == cached_response
 def test_chat_model_caching() -> None:
    prompt: List[BaseMessage] = [HumanMessage(content="How are you?")]
    response = "Test response"
    cached_response = "Cached test response"
    cached_message = AIMessage(content=cached_response)
    llm = FakeListChatModel(responses=[response])
    if langchain.llm_cache:
        langchain.llm_cache.update(
            prompt=dumps(prompt),
            llm_string=llm._get_llm_string(),
            return_val=[ChatGeneration(message=cached_message)],
        )
        result = llm(prompt)
        assert isinstance(result, AIMessage)
        assert result.content == cached_response
    else:
        raise ValueError(
            "The cache not set. This should never happen, as the pytest fixture "
            "`set_cache_and_teardown` always sets the cache."
        )
 def test_chat_model_caching_params() -> None:
    prompt: List[BaseMessage] = [HumanMessage(content="How are you?")]
    response = "Test response"
    cached_response = "Cached test response"
    cached_message = AIMessage(content=cached_response)
    llm = FakeListChatModel(responses=[response])
    if langchain.llm_cache:
        langchain.llm_cache.update(
            prompt=dumps(prompt),
            llm_string=llm._get_llm_string(functions=[]),
            return_val=[ChatGeneration(message=cached_message)],
        )
        result = llm(prompt, functions=[])
        assert isinstance(result, AIMessage)
        assert result.content == cached_response
        result_no_params = llm(prompt)
        assert isinstance(result_no_params, AIMessage)
        assert result_no_params.content == response
    else:
        raise ValueError(
            "The cache not set. This should never happen, as the pytest fixture "
            "`set_cache_and_teardown` always sets the cache."
        )
 def create_llm_string(llm: Union[BaseLLM, BaseChatModel]) -> str:
    _dict: Dict = llm.dict()
    _dict["stop"] = None
    return str(sorted([(k, v) for k, v in _dict.items()]))