langchain/libs/community/tests/integration_tests/cache/test_cassandra.py
Eugene Yurtsev f92006de3c
multiple: langchain 0.2 in master (#21191)
0.2rc 

migrations

- [x] Move memory
- [x] Move remaining retrievers
- [x] graph_qa chains
- [x] some dependency from evaluation code potentially on math utils
- [x] Move openapi chain from `langchain.chains.api.openapi` to
`langchain_community.chains.openapi`
- [x] Migrate `langchain.chains.ernie_functions` to
`langchain_community.chains.ernie_functions`
- [x] migrate `langchain/chains/llm_requests.py` to
`langchain_community.chains.llm_requests`
- [x] Moving `langchain_community.cross_enoders.base:BaseCrossEncoder`
->
`langchain_community.retrievers.document_compressors.cross_encoder:BaseCrossEncoder`
(namespace not ideal, but it needs to be moved to `langchain` to avoid
circular deps)
- [x] unit tests langchain -- add pytest.mark.community to some unit
tests that will stay in langchain
- [x] unit tests community -- move unit tests that depend on community
to community
- [x] mv integration tests that depend on community to community
- [x] mypy checks

Other todo

- [x] Make deprecation warnings not noisy (need to use warn deprecated
and check that things are implemented properly)
- [x] Update deprecation messages with timeline for code removal (likely
we actually won't be removing things until 0.4 release) -- will give
people more time to transition their code.
- [ ] Add information to deprecation warning to show users how to
migrate their code base using langchain-cli
- [ ] Remove any unnecessary requirements in langchain (e.g., is
SQLALchemy required?)

---------

Co-authored-by: Erick Friis <erick@langchain.dev>
2024-05-08 16:46:52 -04:00

178 lines
5.9 KiB
Python

"""Test Cassandra caches. Requires a running vector-capable Cassandra cluster."""
import asyncio
import os
import time
from typing import Any, Iterator, Tuple
import pytest
from langchain.globals import get_llm_cache, set_llm_cache
from langchain_core.outputs import Generation, LLMResult
from langchain_community.cache import CassandraCache, CassandraSemanticCache
from langchain_community.utilities.cassandra import SetupMode
from tests.integration_tests.cache.fake_embeddings import FakeEmbeddings
from tests.unit_tests.llms.fake_llm import FakeLLM
@pytest.fixture(scope="module")
def cassandra_connection() -> Iterator[Tuple[Any, str]]:
from cassandra.cluster import Cluster
keyspace = "langchain_cache_test_keyspace"
# get db connection
if "CASSANDRA_CONTACT_POINTS" in os.environ:
contact_points = os.environ["CONTACT_POINTS"].split(",")
cluster = Cluster(contact_points)
else:
cluster = Cluster()
#
session = cluster.connect()
# ensure keyspace exists
session.execute(
(
f"CREATE KEYSPACE IF NOT EXISTS {keyspace} "
f"WITH replication = {{'class': 'SimpleStrategy', 'replication_factor': 1}}"
)
)
yield (session, keyspace)
def test_cassandra_cache(cassandra_connection: Tuple[Any, str]) -> None:
session, keyspace = cassandra_connection
cache = CassandraCache(session=session, keyspace=keyspace)
set_llm_cache(cache)
llm = FakeLLM()
params = llm.dict()
params["stop"] = None
llm_string = str(sorted([(k, v) for k, v in params.items()]))
get_llm_cache().update("foo", llm_string, [Generation(text="fizz")])
output = llm.generate(["foo"])
expected_output = LLMResult(
generations=[[Generation(text="fizz")]],
llm_output={},
)
assert output == expected_output
cache.clear()
async def test_cassandra_cache_async(cassandra_connection: Tuple[Any, str]) -> None:
session, keyspace = cassandra_connection
cache = CassandraCache(
session=session, keyspace=keyspace, setup_mode=SetupMode.ASYNC
)
set_llm_cache(cache)
llm = FakeLLM()
params = llm.dict()
params["stop"] = None
llm_string = str(sorted([(k, v) for k, v in params.items()]))
await get_llm_cache().aupdate("foo", llm_string, [Generation(text="fizz")])
output = await llm.agenerate(["foo"])
expected_output = LLMResult(
generations=[[Generation(text="fizz")]],
llm_output={},
)
assert output == expected_output
await cache.aclear()
def test_cassandra_cache_ttl(cassandra_connection: Tuple[Any, str]) -> None:
session, keyspace = cassandra_connection
cache = CassandraCache(session=session, keyspace=keyspace, ttl_seconds=2)
set_llm_cache(cache)
llm = FakeLLM()
params = llm.dict()
params["stop"] = None
llm_string = str(sorted([(k, v) for k, v in params.items()]))
get_llm_cache().update("foo", llm_string, [Generation(text="fizz")])
expected_output = LLMResult(
generations=[[Generation(text="fizz")]],
llm_output={},
)
output = llm.generate(["foo"])
assert output == expected_output
time.sleep(2.5)
# entry has expired away.
output = llm.generate(["foo"])
assert output != expected_output
cache.clear()
async def test_cassandra_cache_ttl_async(cassandra_connection: Tuple[Any, str]) -> None:
session, keyspace = cassandra_connection
cache = CassandraCache(
session=session, keyspace=keyspace, ttl_seconds=2, setup_mode=SetupMode.ASYNC
)
set_llm_cache(cache)
llm = FakeLLM()
params = llm.dict()
params["stop"] = None
llm_string = str(sorted([(k, v) for k, v in params.items()]))
await get_llm_cache().aupdate("foo", llm_string, [Generation(text="fizz")])
expected_output = LLMResult(
generations=[[Generation(text="fizz")]],
llm_output={},
)
output = await llm.agenerate(["foo"])
assert output == expected_output
await asyncio.sleep(2.5)
# entry has expired away.
output = await llm.agenerate(["foo"])
assert output != expected_output
await cache.aclear()
def test_cassandra_semantic_cache(cassandra_connection: Tuple[Any, str]) -> None:
session, keyspace = cassandra_connection
sem_cache = CassandraSemanticCache(
session=session,
keyspace=keyspace,
embedding=FakeEmbeddings(),
)
set_llm_cache(sem_cache)
llm = FakeLLM()
params = llm.dict()
params["stop"] = None
llm_string = str(sorted([(k, v) for k, v in params.items()]))
get_llm_cache().update("foo", llm_string, [Generation(text="fizz")])
output = llm.generate(["bar"]) # same embedding as 'foo'
expected_output = LLMResult(
generations=[[Generation(text="fizz")]],
llm_output={},
)
assert output == expected_output
# clear the cache
sem_cache.clear()
output = llm.generate(["bar"]) # 'fizz' is erased away now
assert output != expected_output
sem_cache.clear()
async def test_cassandra_semantic_cache_async(
cassandra_connection: Tuple[Any, str],
) -> None:
session, keyspace = cassandra_connection
sem_cache = CassandraSemanticCache(
session=session,
keyspace=keyspace,
embedding=FakeEmbeddings(),
setup_mode=SetupMode.ASYNC,
)
set_llm_cache(sem_cache)
llm = FakeLLM()
params = llm.dict()
params["stop"] = None
llm_string = str(sorted([(k, v) for k, v in params.items()]))
await get_llm_cache().aupdate("foo", llm_string, [Generation(text="fizz")])
output = await llm.agenerate(["bar"]) # same embedding as 'foo'
expected_output = LLMResult(
generations=[[Generation(text="fizz")]],
llm_output={},
)
assert output == expected_output
# clear the cache
await sem_cache.aclear()
output = await llm.agenerate(["bar"]) # 'fizz' is erased away now
assert output != expected_output
await sem_cache.aclear()