mirror of https://github.com/hwchase17/langchain
# Add caching to BaseChatModel Fixes #1644 (Sidenote: While testing, I noticed we have multiple implementations of Fake LLMs, used for testing. I consolidated them.) ## Who can review? Community members can review the PR once tests pass. Tag maintainers/contributors who might be interested: Models - @hwchase17 - @agola11 Twitter: [@UmerHAdil](https://twitter.com/@UmerHAdil) | Discord: RicChilligerDude#7589 --------- Co-authored-by: Harrison Chase <hw.chase.17@gmail.com>pull/6688/head
parent
c289cc891a
commit
068142fce2
@ -0,0 +1,9 @@
|
||||
# Caching
|
||||
LangChain provides an optional caching layer for Chat Models. This is useful for two reasons:
|
||||
|
||||
It can save you money by reducing the number of API calls you make to the LLM provider, if you're often requesting the same completion multiple times.
|
||||
It can speed up your application by reducing the number of API calls you make to the LLM provider.
|
||||
|
||||
import CachingChat from "@snippets/modules/model_io/models/chat/how_to/chat_model_caching.mdx"
|
||||
|
||||
<CachingChat/>
|
@ -0,0 +1,97 @@
|
||||
```python
|
||||
import langchain
|
||||
from langchain.chat_models import ChatOpenAI
|
||||
|
||||
llm = ChatOpenAI()
|
||||
```
|
||||
|
||||
## In Memory Cache
|
||||
|
||||
|
||||
```python
|
||||
from langchain.cache import InMemoryCache
|
||||
langchain.llm_cache = InMemoryCache()
|
||||
|
||||
# The first time, it is not yet in cache, so it should take longer
|
||||
llm.predict("Tell me a joke")
|
||||
```
|
||||
|
||||
<CodeOutputBlock lang="python">
|
||||
|
||||
```
|
||||
CPU times: user 35.9 ms, sys: 28.6 ms, total: 64.6 ms
|
||||
Wall time: 4.83 s
|
||||
|
||||
|
||||
"\n\nWhy couldn't the bicycle stand up by itself? It was...two tired!"
|
||||
```
|
||||
|
||||
</CodeOutputBlock>
|
||||
|
||||
|
||||
```python
|
||||
# The second time it is, so it goes faster
|
||||
llm.predict("Tell me a joke")
|
||||
```
|
||||
|
||||
<CodeOutputBlock lang="python">
|
||||
|
||||
```
|
||||
CPU times: user 238 µs, sys: 143 µs, total: 381 µs
|
||||
Wall time: 1.76 ms
|
||||
|
||||
|
||||
'\n\nWhy did the chicken cross the road?\n\nTo get to the other side.'
|
||||
```
|
||||
|
||||
</CodeOutputBlock>
|
||||
|
||||
## SQLite Cache
|
||||
|
||||
|
||||
```bash
|
||||
rm .langchain.db
|
||||
```
|
||||
|
||||
|
||||
```python
|
||||
# We can do the same thing with a SQLite cache
|
||||
from langchain.cache import SQLiteCache
|
||||
langchain.llm_cache = SQLiteCache(database_path=".langchain.db")
|
||||
```
|
||||
|
||||
|
||||
```python
|
||||
# The first time, it is not yet in cache, so it should take longer
|
||||
llm.predict("Tell me a joke")
|
||||
```
|
||||
|
||||
<CodeOutputBlock lang="python">
|
||||
|
||||
```
|
||||
CPU times: user 17 ms, sys: 9.76 ms, total: 26.7 ms
|
||||
Wall time: 825 ms
|
||||
|
||||
|
||||
'\n\nWhy did the chicken cross the road?\n\nTo get to the other side.'
|
||||
```
|
||||
|
||||
</CodeOutputBlock>
|
||||
|
||||
|
||||
```python
|
||||
# The second time it is, so it goes faster
|
||||
llm.predict("Tell me a joke")
|
||||
```
|
||||
|
||||
<CodeOutputBlock lang="python">
|
||||
|
||||
```
|
||||
CPU times: user 2.46 ms, sys: 1.23 ms, total: 3.7 ms
|
||||
Wall time: 2.67 ms
|
||||
|
||||
|
||||
'\n\nWhy did the chicken cross the road?\n\nTo get to the other side.'
|
||||
```
|
||||
|
||||
</CodeOutputBlock>
|
@ -0,0 +1,33 @@
|
||||
"""Fake ChatModel for testing purposes."""
|
||||
from typing import Any, List, Mapping, Optional
|
||||
|
||||
from langchain.callbacks.manager import CallbackManagerForLLMRun
|
||||
from langchain.chat_models.base import SimpleChatModel
|
||||
from langchain.schema import BaseMessage
|
||||
|
||||
|
||||
class FakeListChatModel(SimpleChatModel):
|
||||
"""Fake ChatModel for testing purposes."""
|
||||
|
||||
responses: List
|
||||
i: int = 0
|
||||
|
||||
@property
|
||||
def _llm_type(self) -> str:
|
||||
return "fake-list-chat-model"
|
||||
|
||||
def _call(
|
||||
self,
|
||||
messages: List[BaseMessage],
|
||||
stop: Optional[List[str]] = None,
|
||||
run_manager: Optional[CallbackManagerForLLMRun] = None,
|
||||
**kwargs: Any,
|
||||
) -> str:
|
||||
"""First try to lookup in queries, else return 'foo' or 'bar'."""
|
||||
response = self.responses[self.i]
|
||||
self.i += 1
|
||||
return response
|
||||
|
||||
@property
|
||||
def _identifying_params(self) -> Mapping[str, Any]:
|
||||
return {"responses": self.responses}
|
@ -0,0 +1,146 @@
|
||||
"""Test caching for LLMs and ChatModels."""
|
||||
from typing import Dict, Generator, List, Union
|
||||
|
||||
import pytest
|
||||
from _pytest.fixtures import FixtureRequest
|
||||
from sqlalchemy import create_engine
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
import langchain
|
||||
from langchain.cache import (
|
||||
InMemoryCache,
|
||||
SQLAlchemyCache,
|
||||
)
|
||||
from langchain.chat_models import FakeListChatModel
|
||||
from langchain.chat_models.base import BaseChatModel, dumps
|
||||
from langchain.llms import FakeListLLM
|
||||
from langchain.llms.base import BaseLLM
|
||||
from langchain.schema import (
|
||||
AIMessage,
|
||||
BaseMessage,
|
||||
ChatGeneration,
|
||||
Generation,
|
||||
HumanMessage,
|
||||
)
|
||||
|
||||
|
||||
def get_sqlite_cache() -> SQLAlchemyCache:
|
||||
return SQLAlchemyCache(engine=create_engine("sqlite://"))
|
||||
|
||||
|
||||
CACHE_OPTIONS = [
|
||||
InMemoryCache,
|
||||
get_sqlite_cache,
|
||||
]
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True, params=CACHE_OPTIONS)
|
||||
def set_cache_and_teardown(request: FixtureRequest) -> Generator[None, None, None]:
|
||||
# Will be run before each test
|
||||
cache_instance = request.param
|
||||
langchain.llm_cache = cache_instance()
|
||||
if langchain.llm_cache:
|
||||
langchain.llm_cache.clear()
|
||||
else:
|
||||
raise ValueError("Cache not set. This should never happen.")
|
||||
|
||||
yield
|
||||
|
||||
# Will be run after each test
|
||||
if langchain.llm_cache:
|
||||
langchain.llm_cache.clear()
|
||||
else:
|
||||
raise ValueError("Cache not set. This should never happen.")
|
||||
|
||||
|
||||
def test_llm_caching() -> None:
|
||||
prompt = "How are you?"
|
||||
response = "Test response"
|
||||
cached_response = "Cached test response"
|
||||
llm = FakeListLLM(responses=[response])
|
||||
if langchain.llm_cache:
|
||||
langchain.llm_cache.update(
|
||||
prompt=prompt,
|
||||
llm_string=create_llm_string(llm),
|
||||
return_val=[Generation(text=cached_response)],
|
||||
)
|
||||
assert llm(prompt) == cached_response
|
||||
else:
|
||||
raise ValueError(
|
||||
"The cache not set. This should never happen, as the pytest fixture "
|
||||
"`set_cache_and_teardown` always sets the cache."
|
||||
)
|
||||
|
||||
|
||||
def test_old_sqlite_llm_caching() -> None:
|
||||
if isinstance(langchain.llm_cache, SQLAlchemyCache):
|
||||
prompt = "How are you?"
|
||||
response = "Test response"
|
||||
cached_response = "Cached test response"
|
||||
llm = FakeListLLM(responses=[response])
|
||||
items = [
|
||||
langchain.llm_cache.cache_schema(
|
||||
prompt=prompt,
|
||||
llm=create_llm_string(llm),
|
||||
response=cached_response,
|
||||
idx=0,
|
||||
)
|
||||
]
|
||||
with Session(langchain.llm_cache.engine) as session, session.begin():
|
||||
for item in items:
|
||||
session.merge(item)
|
||||
assert llm(prompt) == cached_response
|
||||
|
||||
|
||||
def test_chat_model_caching() -> None:
|
||||
prompt: List[BaseMessage] = [HumanMessage(content="How are you?")]
|
||||
response = "Test response"
|
||||
cached_response = "Cached test response"
|
||||
cached_message = AIMessage(content=cached_response)
|
||||
llm = FakeListChatModel(responses=[response])
|
||||
if langchain.llm_cache:
|
||||
langchain.llm_cache.update(
|
||||
prompt=dumps(prompt),
|
||||
llm_string=llm._get_llm_string(),
|
||||
return_val=[ChatGeneration(message=cached_message)],
|
||||
)
|
||||
result = llm(prompt)
|
||||
assert isinstance(result, AIMessage)
|
||||
assert result.content == cached_response
|
||||
else:
|
||||
raise ValueError(
|
||||
"The cache not set. This should never happen, as the pytest fixture "
|
||||
"`set_cache_and_teardown` always sets the cache."
|
||||
)
|
||||
|
||||
|
||||
def test_chat_model_caching_params() -> None:
|
||||
prompt: List[BaseMessage] = [HumanMessage(content="How are you?")]
|
||||
response = "Test response"
|
||||
cached_response = "Cached test response"
|
||||
cached_message = AIMessage(content=cached_response)
|
||||
llm = FakeListChatModel(responses=[response])
|
||||
if langchain.llm_cache:
|
||||
langchain.llm_cache.update(
|
||||
prompt=dumps(prompt),
|
||||
llm_string=llm._get_llm_string(functions=[]),
|
||||
return_val=[ChatGeneration(message=cached_message)],
|
||||
)
|
||||
result = llm(prompt, functions=[])
|
||||
assert isinstance(result, AIMessage)
|
||||
assert result.content == cached_response
|
||||
result_no_params = llm(prompt)
|
||||
assert isinstance(result_no_params, AIMessage)
|
||||
assert result_no_params.content == response
|
||||
|
||||
else:
|
||||
raise ValueError(
|
||||
"The cache not set. This should never happen, as the pytest fixture "
|
||||
"`set_cache_and_teardown` always sets the cache."
|
||||
)
|
||||
|
||||
|
||||
def create_llm_string(llm: Union[BaseLLM, BaseChatModel]) -> str:
|
||||
_dict: Dict = llm.dict()
|
||||
_dict["stop"] = None
|
||||
return str(sorted([(k, v) for k, v in _dict.items()]))
|
Loading…
Reference in New Issue