mirror of
https://github.com/hwchase17/langchain
synced 2024-11-10 01:10:59 +00:00
community: add SingleStoreDB semantic cache (#23218)
This PR adds a `SingleStoreDBSemanticCache` class that implements a cache based on SingleStoreDB vector store, integration tests, and a notebook example. Additionally, this PR contains minor changes to SingleStoreDB vector store: - change add texts/documents methods to return a list of inserted ids - implement delete(ids) method to delete documents by list of ids - added drop() method to drop a correspondent database table - updated integration tests to use and check functionality implemented above CC: @baskaryan, @hwchase17 --------- Co-authored-by: Volodymyr Tkachuk <vtkachuk-ua@singlestore.com>
This commit is contained in:
parent
bb597b1286
commit
a4eb6d0fb1
@ -60,7 +60,7 @@
|
||||
" * document addition by id (`add_documents` method with `ids` argument)\n",
|
||||
" * delete by id (`delete` method with `ids` argument)\n",
|
||||
"\n",
|
||||
"Compatible Vectorstores: `Aerospike`, `AnalyticDB`, `AstraDB`, `AwaDB`, `AzureCosmosDBNoSqlVectorSearch`, `AzureCosmosDBVectorSearch`, `Bagel`, `Cassandra`, `Chroma`, `CouchbaseVectorStore`, `DashVector`, `DatabricksVectorSearch`, `DeepLake`, `Dingo`, `ElasticVectorSearch`, `ElasticsearchStore`, `FAISS`, `HanaDB`, `Milvus`, `MyScale`, `OpenSearchVectorSearch`, `PGVector`, `Pinecone`, `Qdrant`, `Redis`, `Rockset`, `ScaNN`, `SupabaseVectorStore`, `SurrealDBStore`, `TimescaleVector`, `Vald`, `VDMS`, `Vearch`, `VespaStore`, `Weaviate`, `Yellowbrick`, `ZepVectorStore`, `TencentVectorDB`, `OpenSearchVectorSearch`.\n",
|
||||
"Compatible Vectorstores: `Aerospike`, `AnalyticDB`, `AstraDB`, `AwaDB`, `AzureCosmosDBNoSqlVectorSearch`, `AzureCosmosDBVectorSearch`, `Bagel`, `Cassandra`, `Chroma`, `CouchbaseVectorStore`, `DashVector`, `DatabricksVectorSearch`, `DeepLake`, `Dingo`, `ElasticVectorSearch`, `ElasticsearchStore`, `FAISS`, `HanaDB`, `Milvus`, `MyScale`, `OpenSearchVectorSearch`, `PGVector`, `Pinecone`, `Qdrant`, `Redis`, `Rockset`, `ScaNN`, `SingleStoreDB`, `SupabaseVectorStore`, `SurrealDBStore`, `TimescaleVector`, `Vald`, `VDMS`, `Vearch`, `VespaStore`, `Weaviate`, `Yellowbrick`, `ZepVectorStore`, `TencentVectorDB`, `OpenSearchVectorSearch`.\n",
|
||||
" \n",
|
||||
"## Caution\n",
|
||||
"\n",
|
||||
|
@ -2147,6 +2147,32 @@
|
||||
"llm(\"Tell me one joke\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## SingleStoreDB Semantic Cache\n",
|
||||
"You can use [SingleStoreDB](https://python.langchain.com/docs/integrations/vectorstores/singlestoredb/) as a semantic cache to cache prompts and responses."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d82f1bdc",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.cache import SingleStoreDBSemanticCache\n",
|
||||
"from langchain_openai import OpenAIEmbeddings\n",
|
||||
"\n",
|
||||
"set_llm_cache(\n",
|
||||
" SingleStoreDBSemanticCache(\n",
|
||||
" embedding=OpenAIEmbeddings(),\n",
|
||||
" host=\"root:pass@localhost:3306/db\",\n",
|
||||
" )\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ae1f5e1c-085e-4998-9f2d-b5867d2c3d5b",
|
||||
@ -2178,7 +2204,7 @@
|
||||
"source": [
|
||||
"**Cache** classes are implemented by inheriting the [BaseCache](https://api.python.langchain.com/en/latest/caches/langchain_core.caches.BaseCache.html) class.\n",
|
||||
"\n",
|
||||
"This table lists all 20 derived classes with links to the API Reference.\n",
|
||||
"This table lists all 21 derived classes with links to the API Reference.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"| Namespace 🔻 | Class |\n",
|
||||
@ -2195,6 +2221,7 @@
|
||||
"| langchain_community.cache | [MomentoCache](https://api.python.langchain.com/en/latest/cache/langchain_community.cache.MomentoCache.html) |\n",
|
||||
"| langchain_community.cache | [OpenSearchSemanticCache](https://api.python.langchain.com/en/latest/cache/langchain_community.cache.OpenSearchSemanticCache.html) |\n",
|
||||
"| langchain_community.cache | [RedisSemanticCache](https://api.python.langchain.com/en/latest/cache/langchain_community.cache.RedisSemanticCache.html) |\n",
|
||||
"| langchain_community.cache | [SingleStoreDBSemanticCache](https://api.python.langchain.com/en/latest/cache/langchain_community.cache.SingleStoreDBSemanticCache.html) |\n",
|
||||
"| langchain_community.cache | [SQLAlchemyCache](https://api.python.langchain.com/en/latest/cache/langchain_community.cache.SQLAlchemyCache.html) |\n",
|
||||
"| langchain_community.cache | [SQLAlchemyMd5Cache](https://api.python.langchain.com/en/latest/cache/langchain_community.cache.SQLAlchemyMd5Cache.html) |\n",
|
||||
"| langchain_community.cache | [UpstashRedisCache](https://api.python.langchain.com/en/latest/cache/langchain_community.cache.UpstashRedisCache.html) |\n",
|
||||
|
@ -58,6 +58,7 @@ from langchain_community.vectorstores.azure_cosmos_db import (
|
||||
CosmosDBSimilarityType,
|
||||
CosmosDBVectorSearchType,
|
||||
)
|
||||
from langchain_community.vectorstores.utils import DistanceStrategy
|
||||
|
||||
try:
|
||||
from sqlalchemy.orm import declarative_base
|
||||
@ -84,6 +85,7 @@ from langchain_community.vectorstores import (
|
||||
OpenSearchVectorSearch as OpenSearchVectorStore,
|
||||
)
|
||||
from langchain_community.vectorstores.redis import Redis as RedisVectorstore
|
||||
from langchain_community.vectorstores.singlestoredb import SingleStoreDB
|
||||
|
||||
logger = logging.getLogger(__file__)
|
||||
|
||||
@ -2373,3 +2375,221 @@ class OpenSearchSemanticCache(BaseCache):
|
||||
if index_name in self._cache_dict:
|
||||
self._cache_dict[index_name].delete_index(index_name=index_name)
|
||||
del self._cache_dict[index_name]
|
||||
|
||||
|
||||
class SingleStoreDBSemanticCache(BaseCache):
|
||||
"""Cache that uses SingleStore DB as a backend"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
embedding: Embeddings,
|
||||
*,
|
||||
cache_table_prefix: str = "cache_",
|
||||
search_threshold: float = 0.2,
|
||||
**kwargs: Any,
|
||||
):
|
||||
"""Initialize with necessary components.
|
||||
|
||||
Args:
|
||||
embedding (Embeddings): A text embedding model.
|
||||
cache_table_prefix (str, optional): Prefix for the cache table name.
|
||||
Defaults to "cache_".
|
||||
search_threshold (float, optional): The minimum similarity score for
|
||||
a search result to be considered a match. Defaults to 0.2.
|
||||
|
||||
Following arguments pertrain to the SingleStoreDB vector store:
|
||||
|
||||
distance_strategy (DistanceStrategy, optional):
|
||||
Determines the strategy employed for calculating
|
||||
the distance between vectors in the embedding space.
|
||||
Defaults to DOT_PRODUCT.
|
||||
Available options are:
|
||||
- DOT_PRODUCT: Computes the scalar product of two vectors.
|
||||
This is the default behavior
|
||||
- EUCLIDEAN_DISTANCE: Computes the Euclidean distance between
|
||||
two vectors. This metric considers the geometric distance in
|
||||
the vector space, and might be more suitable for embeddings
|
||||
that rely on spatial relationships. This metric is not
|
||||
compatible with the WEIGHTED_SUM search strategy.
|
||||
|
||||
content_field (str, optional): Specifies the field to store the content.
|
||||
Defaults to "content".
|
||||
metadata_field (str, optional): Specifies the field to store metadata.
|
||||
Defaults to "metadata".
|
||||
vector_field (str, optional): Specifies the field to store the vector.
|
||||
Defaults to "vector".
|
||||
id_field (str, optional): Specifies the field to store the id.
|
||||
Defaults to "id".
|
||||
|
||||
use_vector_index (bool, optional): Toggles the use of a vector index.
|
||||
Works only with SingleStoreDB 8.5 or later. Defaults to False.
|
||||
If set to True, vector_size parameter is required to be set to
|
||||
a proper value.
|
||||
|
||||
vector_index_name (str, optional): Specifies the name of the vector index.
|
||||
Defaults to empty. Will be ignored if use_vector_index is set to False.
|
||||
|
||||
vector_index_options (dict, optional): Specifies the options for
|
||||
the vector index. Defaults to {}.
|
||||
Will be ignored if use_vector_index is set to False. The options are:
|
||||
index_type (str, optional): Specifies the type of the index.
|
||||
Defaults to IVF_PQFS.
|
||||
For more options, please refer to the SingleStoreDB documentation:
|
||||
https://docs.singlestore.com/cloud/reference/sql-reference/vector-functions/vector-indexing/
|
||||
|
||||
vector_size (int, optional): Specifies the size of the vector.
|
||||
Defaults to 1536. Required if use_vector_index is set to True.
|
||||
Should be set to the same value as the size of the vectors
|
||||
stored in the vector_field.
|
||||
|
||||
Following arguments pertain to the connection pool:
|
||||
|
||||
pool_size (int, optional): Determines the number of active connections in
|
||||
the pool. Defaults to 5.
|
||||
max_overflow (int, optional): Determines the maximum number of connections
|
||||
allowed beyond the pool_size. Defaults to 10.
|
||||
timeout (float, optional): Specifies the maximum wait time in seconds for
|
||||
establishing a connection. Defaults to 30.
|
||||
|
||||
Following arguments pertain to the database connection:
|
||||
|
||||
host (str, optional): Specifies the hostname, IP address, or URL for the
|
||||
database connection. The default scheme is "mysql".
|
||||
user (str, optional): Database username.
|
||||
password (str, optional): Database password.
|
||||
port (int, optional): Database port. Defaults to 3306 for non-HTTP
|
||||
connections, 80 for HTTP connections, and 443 for HTTPS connections.
|
||||
database (str, optional): Database name.
|
||||
|
||||
Additional optional arguments provide further customization over the
|
||||
database connection:
|
||||
|
||||
pure_python (bool, optional): Toggles the connector mode. If True,
|
||||
operates in pure Python mode.
|
||||
local_infile (bool, optional): Allows local file uploads.
|
||||
charset (str, optional): Specifies the character set for string values.
|
||||
ssl_key (str, optional): Specifies the path of the file containing the SSL
|
||||
key.
|
||||
ssl_cert (str, optional): Specifies the path of the file containing the SSL
|
||||
certificate.
|
||||
ssl_ca (str, optional): Specifies the path of the file containing the SSL
|
||||
certificate authority.
|
||||
ssl_cipher (str, optional): Sets the SSL cipher list.
|
||||
ssl_disabled (bool, optional): Disables SSL usage.
|
||||
ssl_verify_cert (bool, optional): Verifies the server's certificate.
|
||||
Automatically enabled if ``ssl_ca`` is specified.
|
||||
ssl_verify_identity (bool, optional): Verifies the server's identity.
|
||||
conv (dict[int, Callable], optional): A dictionary of data conversion
|
||||
functions.
|
||||
credential_type (str, optional): Specifies the type of authentication to
|
||||
use: auth.PASSWORD, auth.JWT, or auth.BROWSER_SSO.
|
||||
autocommit (bool, optional): Enables autocommits.
|
||||
results_type (str, optional): Determines the structure of the query results:
|
||||
tuples, namedtuples, dicts.
|
||||
results_format (str, optional): Deprecated. This option has been renamed to
|
||||
results_type.
|
||||
|
||||
Examples:
|
||||
Basic Usage:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
import langchain
|
||||
from langchain.cache import SingleStoreDBSemanticCache
|
||||
from langchain.embeddings import OpenAIEmbeddings
|
||||
|
||||
langchain.llm_cache = SingleStoreDBSemanticCache(
|
||||
embedding=OpenAIEmbeddings(),
|
||||
host="https://user:password@127.0.0.1:3306/database"
|
||||
)
|
||||
|
||||
Advanced Usage:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
import langchain
|
||||
from langchain.cache import SingleStoreDBSemanticCache
|
||||
from langchain.embeddings import OpenAIEmbeddings
|
||||
|
||||
langchain.llm_cache = = SingleStoreDBSemanticCache(
|
||||
embeddings=OpenAIEmbeddings(),
|
||||
use_vector_index=True,
|
||||
host="127.0.0.1",
|
||||
port=3306,
|
||||
user="user",
|
||||
password="password",
|
||||
database="db",
|
||||
table_name="my_custom_table",
|
||||
pool_size=10,
|
||||
timeout=60,
|
||||
)
|
||||
"""
|
||||
|
||||
self._cache_dict: Dict[str, SingleStoreDB] = {}
|
||||
self.embedding = embedding
|
||||
self.cache_table_prefix = cache_table_prefix
|
||||
self.search_threshold = search_threshold
|
||||
|
||||
# Pass the rest of the kwargs to the connection.
|
||||
self.connection_kwargs = kwargs
|
||||
|
||||
def _index_name(self, llm_string: str) -> str:
|
||||
hashed_index = _hash(llm_string)
|
||||
return f"{self.cache_table_prefix}{hashed_index}"
|
||||
|
||||
def _get_llm_cache(self, llm_string: str) -> SingleStoreDB:
|
||||
index_name = self._index_name(llm_string)
|
||||
|
||||
# return vectorstore client for the specific llm string
|
||||
if index_name not in self._cache_dict:
|
||||
self._cache_dict[index_name] = SingleStoreDB(
|
||||
embedding=self.embedding,
|
||||
table_name=index_name,
|
||||
**self.connection_kwargs,
|
||||
)
|
||||
return self._cache_dict[index_name]
|
||||
|
||||
def lookup(self, prompt: str, llm_string: str) -> Optional[RETURN_VAL_TYPE]:
|
||||
"""Look up based on prompt and llm_string."""
|
||||
llm_cache = self._get_llm_cache(llm_string)
|
||||
generations: List = []
|
||||
# Read from a Hash
|
||||
results = llm_cache.similarity_search_with_score(
|
||||
query=prompt,
|
||||
k=1,
|
||||
)
|
||||
if results:
|
||||
for document_score in results:
|
||||
if (
|
||||
document_score[1] > self.search_threshold
|
||||
and llm_cache.distance_strategy == DistanceStrategy.DOT_PRODUCT
|
||||
) or (
|
||||
document_score[1] < self.search_threshold
|
||||
and llm_cache.distance_strategy
|
||||
== DistanceStrategy.EUCLIDEAN_DISTANCE
|
||||
):
|
||||
generations.extend(loads(document_score[0].metadata["return_val"]))
|
||||
return generations if generations else None
|
||||
|
||||
def update(self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE) -> None:
|
||||
"""Update cache based on prompt and llm_string."""
|
||||
for gen in return_val:
|
||||
if not isinstance(gen, Generation):
|
||||
raise ValueError(
|
||||
"SingleStoreDBSemanticCache only supports caching of "
|
||||
f"normal LLM generations, got {type(gen)}"
|
||||
)
|
||||
llm_cache = self._get_llm_cache(llm_string)
|
||||
metadata = {
|
||||
"llm_string": llm_string,
|
||||
"prompt": prompt,
|
||||
"return_val": dumps([g for g in return_val]),
|
||||
}
|
||||
llm_cache.add_texts(texts=[prompt], metadatas=[metadata])
|
||||
|
||||
def clear(self, **kwargs: Any) -> None:
|
||||
"""Clear semantic cache for a given llm_string."""
|
||||
index_name = self._index_name(kwargs["llm_string"])
|
||||
if index_name in self._cache_dict:
|
||||
self._cache_dict[index_name].drop()
|
||||
del self._cache_dict[index_name]
|
||||
|
@ -153,7 +153,7 @@ class SingleStoreDBChatMessageHistory(BaseChatMessageHistory):
|
||||
self.connection_kwargs["conn_attrs"] = dict()
|
||||
|
||||
self.connection_kwargs["conn_attrs"]["_connector_name"] = "langchain python sdk"
|
||||
self.connection_kwargs["conn_attrs"]["_connector_version"] = "1.0.1"
|
||||
self.connection_kwargs["conn_attrs"]["_connector_version"] = "2.1.0"
|
||||
|
||||
# Create a connection pool.
|
||||
try:
|
||||
|
@ -276,7 +276,7 @@ class SingleStoreDB(VectorStore):
|
||||
self.connection_kwargs["conn_attrs"] = dict()
|
||||
|
||||
self.connection_kwargs["conn_attrs"]["_connector_name"] = "langchain python sdk"
|
||||
self.connection_kwargs["conn_attrs"]["_connector_version"] = "2.0.0"
|
||||
self.connection_kwargs["conn_attrs"]["_connector_version"] = "2.1.0"
|
||||
|
||||
# Create connection pool.
|
||||
self.connection_pool = QueuePool(
|
||||
@ -354,6 +354,7 @@ class SingleStoreDB(VectorStore):
|
||||
uris: List[str],
|
||||
metadatas: Optional[List[dict]] = None,
|
||||
embeddings: Optional[List[List[float]]] = None,
|
||||
return_ids: bool = False,
|
||||
**kwargs: Any,
|
||||
) -> List[str]:
|
||||
"""Run images through the embeddings and add to the vectorstore.
|
||||
@ -367,7 +368,8 @@ class SingleStoreDB(VectorStore):
|
||||
embeddings. Defaults to None.
|
||||
|
||||
Returns:
|
||||
List[str]: empty list
|
||||
List[str]: list of document ids added to the vectorstore
|
||||
if return_ids is True. Otherwise, an empty list.
|
||||
"""
|
||||
# Set embeddings
|
||||
if (
|
||||
@ -376,13 +378,16 @@ class SingleStoreDB(VectorStore):
|
||||
and hasattr(self.embedding, "embed_image")
|
||||
):
|
||||
embeddings = self.embedding.embed_image(uris=uris)
|
||||
return self.add_texts(uris, metadatas, embeddings, **kwargs)
|
||||
return self.add_texts(
|
||||
uris, metadatas, embeddings, return_ids=return_ids, **kwargs
|
||||
)
|
||||
|
||||
def add_texts(
|
||||
self,
|
||||
texts: Iterable[str],
|
||||
metadatas: Optional[List[dict]] = None,
|
||||
embeddings: Optional[List[List[float]]] = None,
|
||||
return_ids: bool = False,
|
||||
**kwargs: Any,
|
||||
) -> List[str]:
|
||||
"""Add more texts to the vectorstore.
|
||||
@ -395,8 +400,10 @@ class SingleStoreDB(VectorStore):
|
||||
embeddings. Defaults to None.
|
||||
|
||||
Returns:
|
||||
List[str]: empty list
|
||||
List[str]: list of document ids added to the vectorstore
|
||||
if return_ids is True. Otherwise, an empty list.
|
||||
"""
|
||||
ids: List[str] = []
|
||||
conn = self.connection_pool.connect()
|
||||
try:
|
||||
cur = conn.cursor()
|
||||
@ -424,13 +431,48 @@ class SingleStoreDB(VectorStore):
|
||||
json.dumps(metadata),
|
||||
),
|
||||
)
|
||||
if return_ids:
|
||||
cur.execute("SELECT LAST_INSERT_ID();")
|
||||
row = cur.fetchone()
|
||||
if row:
|
||||
ids.append(str(row[0]))
|
||||
if self.use_vector_index or self.use_full_text_search:
|
||||
cur.execute("OPTIMIZE TABLE {} FLUSH;".format(self.table_name))
|
||||
finally:
|
||||
cur.close()
|
||||
finally:
|
||||
conn.close()
|
||||
return []
|
||||
return ids
|
||||
|
||||
def delete(self, ids: Optional[List[str]] = None, **kwargs: Any) -> bool | None:
|
||||
"""Delete documents from the vectorstore.
|
||||
|
||||
Args:
|
||||
ids (List[str], optional): List of document ids to delete.
|
||||
If None, all documents will be deleted. Defaults to None.
|
||||
|
||||
Returns:
|
||||
bool: True if deletion was successful, False otherwise.
|
||||
"""
|
||||
if ids is None:
|
||||
return True
|
||||
|
||||
conn = self.connection_pool.connect()
|
||||
try:
|
||||
cur = conn.cursor()
|
||||
try:
|
||||
cur.execute(
|
||||
"DELETE FROM {} WHERE {} IN ({})".format(
|
||||
self.table_name, self.id_field, ",".join(ids)
|
||||
)
|
||||
)
|
||||
if self.use_vector_index or self.use_full_text_search:
|
||||
cur.execute("OPTIMIZE TABLE {} FLUSH;".format(self.table_name))
|
||||
finally:
|
||||
cur.close()
|
||||
finally:
|
||||
conn.close()
|
||||
return True
|
||||
|
||||
def similarity_search(
|
||||
self,
|
||||
@ -995,6 +1037,20 @@ class SingleStoreDB(VectorStore):
|
||||
instance.add_texts(texts, metadatas, embedding.embed_documents(texts), **kwargs)
|
||||
return instance
|
||||
|
||||
def drop(self) -> None:
|
||||
"""Drop the table and delete all data from the vectorstore.
|
||||
Vector store will be unusable after this operation.
|
||||
"""
|
||||
conn = self.connection_pool.connect()
|
||||
try:
|
||||
cur = conn.cursor()
|
||||
try:
|
||||
cur.execute("DROP TABLE IF EXISTS {}".format(self.table_name))
|
||||
finally:
|
||||
cur.close()
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
# SingleStoreDBRetriever is not needed, but we keep it for backwards compatibility
|
||||
SingleStoreDBRetriever = VectorStoreRetriever
|
||||
|
43
libs/community/tests/integration_tests/cache/test_singlestoredb_cache.py
vendored
Normal file
43
libs/community/tests/integration_tests/cache/test_singlestoredb_cache.py
vendored
Normal file
@ -0,0 +1,43 @@
|
||||
"""Test SingleStoreDB semantic cache. Requires a SingleStore DB database.
|
||||
|
||||
Required to run this test:
|
||||
- a recent `singlestoredb` Python package available
|
||||
- a SingleStore DB instance;
|
||||
"""
|
||||
|
||||
from importlib.util import find_spec
|
||||
|
||||
import pytest
|
||||
from langchain_core.globals import get_llm_cache, set_llm_cache
|
||||
from langchain_core.outputs import Generation
|
||||
|
||||
from langchain_community.cache import SingleStoreDBSemanticCache
|
||||
from tests.integration_tests.cache.fake_embeddings import FakeEmbeddings
|
||||
from tests.unit_tests.llms.fake_llm import FakeLLM
|
||||
|
||||
TEST_SINGLESTOREDB_URL = "root:pass@localhost:3306/db"
|
||||
|
||||
singlestoredb_installed = find_spec("singlestoredb") is not None
|
||||
|
||||
|
||||
@pytest.mark.skipif(not singlestoredb_installed, reason="singlestoredb not installed")
|
||||
def test_tinglestoredb_semantic_cache() -> None:
|
||||
"""Test opensearch semantic cache functionality."""
|
||||
set_llm_cache(
|
||||
SingleStoreDBSemanticCache(
|
||||
embedding=FakeEmbeddings(),
|
||||
host=TEST_SINGLESTOREDB_URL,
|
||||
search_threshold=0.0,
|
||||
)
|
||||
)
|
||||
llm = FakeLLM()
|
||||
params = llm.dict()
|
||||
params["stop"] = None
|
||||
llm_string = str(sorted([(k, v) for k, v in params.items()]))
|
||||
get_llm_cache().update("foo", llm_string, [Generation(text="fizz")])
|
||||
cache_output = get_llm_cache().lookup("bar", llm_string)
|
||||
assert cache_output == [Generation(text="fizz")]
|
||||
|
||||
get_llm_cache().clear(llm_string=llm_string)
|
||||
output = get_llm_cache().lookup("bar", llm_string)
|
||||
assert output != [Generation(text="fizz")]
|
@ -152,7 +152,7 @@ def test_singlestoredb(texts: List[str]) -> None:
|
||||
)
|
||||
output = docsearch.similarity_search("foo", k=1)
|
||||
assert output == TEST_SINGLE_RESULT
|
||||
drop(table_name)
|
||||
docsearch.drop()
|
||||
|
||||
|
||||
@pytest.mark.skipif(not singlestoredb_installed, reason="singlestoredb not installed")
|
||||
@ -169,7 +169,7 @@ def test_singlestoredb_new_vector(texts: List[str]) -> None:
|
||||
docsearch.add_texts(["foo"])
|
||||
output = docsearch.similarity_search("foo", k=2)
|
||||
assert output == TEST_RESULT
|
||||
drop(table_name)
|
||||
docsearch.drop()
|
||||
|
||||
|
||||
@pytest.mark.skipif(not singlestoredb_installed, reason="singlestoredb not installed")
|
||||
@ -187,7 +187,7 @@ def test_singlestoredb_euclidean_distance(texts: List[str]) -> None:
|
||||
docsearch.add_texts(["foo"])
|
||||
output = docsearch.similarity_search("foo", k=2)
|
||||
assert output == TEST_RESULT
|
||||
drop(table_name)
|
||||
docsearch.drop()
|
||||
|
||||
|
||||
@pytest.mark.skipif(not singlestoredb_installed, reason="singlestoredb not installed")
|
||||
@ -207,7 +207,7 @@ def test_singlestoredb_vector_index_1(texts: List[str]) -> None:
|
||||
docsearch.add_texts(["foo"])
|
||||
output = docsearch.similarity_search("foo", k=2)
|
||||
assert output == TEST_RESULT
|
||||
drop(table_name)
|
||||
docsearch.drop()
|
||||
|
||||
|
||||
@pytest.mark.skipif(not singlestoredb_installed, reason="singlestoredb not installed")
|
||||
@ -227,7 +227,7 @@ def test_singlestoredb_vector_index_2(texts: List[str]) -> None:
|
||||
docsearch.add_texts(["foo"])
|
||||
output = docsearch.similarity_search("foo", k=1)
|
||||
output[0].page_content == "foo"
|
||||
drop(table_name)
|
||||
docsearch.drop()
|
||||
|
||||
|
||||
@pytest.mark.skipif(not singlestoredb_installed, reason="singlestoredb not installed")
|
||||
@ -247,7 +247,7 @@ def test_singlestoredb_vector_index_large() -> None:
|
||||
)
|
||||
output = docsearch.similarity_search("foo", k=1)
|
||||
assert output[0].page_content == "foo"
|
||||
drop(table_name)
|
||||
docsearch.drop()
|
||||
|
||||
|
||||
@pytest.mark.skipif(not singlestoredb_installed, reason="singlestoredb not installed")
|
||||
@ -269,7 +269,7 @@ def test_singlestoredb_from_existing(texts: List[str]) -> None:
|
||||
)
|
||||
output = docsearch2.similarity_search("foo", k=1)
|
||||
assert output == TEST_SINGLE_RESULT
|
||||
drop(table_name)
|
||||
docsearch2.drop()
|
||||
|
||||
|
||||
@pytest.mark.skipif(not singlestoredb_installed, reason="singlestoredb not installed")
|
||||
@ -286,7 +286,7 @@ def test_singlestoredb_from_documents(texts: List[str]) -> None:
|
||||
)
|
||||
output = docsearch.similarity_search("foo", k=1)
|
||||
assert output == TEST_SINGLE_WITH_METADATA_RESULT
|
||||
drop(table_name)
|
||||
docsearch.drop()
|
||||
|
||||
|
||||
@pytest.mark.skipif(not singlestoredb_installed, reason="singlestoredb not installed")
|
||||
@ -309,7 +309,7 @@ def test_singlestoredb_add_texts_to_existing(texts: List[str]) -> None:
|
||||
docsearch.add_texts(["foo"])
|
||||
output = docsearch.similarity_search("foo", k=2)
|
||||
assert output == TEST_RESULT
|
||||
drop(table_name)
|
||||
docsearch.drop()
|
||||
|
||||
|
||||
@pytest.mark.skipif(not singlestoredb_installed, reason="singlestoredb not installed")
|
||||
@ -329,7 +329,7 @@ def test_singlestoredb_filter_metadata(texts: List[str]) -> None:
|
||||
)
|
||||
output = docsearch.similarity_search("foo", k=1, filter={"index": 2})
|
||||
assert output == [Document(page_content="baz", metadata={"index": 2})]
|
||||
drop(table_name)
|
||||
docsearch.drop()
|
||||
|
||||
|
||||
@pytest.mark.skipif(not singlestoredb_installed, reason="singlestoredb not installed")
|
||||
@ -352,7 +352,7 @@ def test_singlestoredb_filter_metadata_2(texts: List[str]) -> None:
|
||||
assert output == [
|
||||
Document(page_content="foo", metadata={"index": 0, "category": "budget"})
|
||||
]
|
||||
drop(table_name)
|
||||
docsearch.drop()
|
||||
|
||||
|
||||
@pytest.mark.skipif(not singlestoredb_installed, reason="singlestoredb not installed")
|
||||
@ -377,7 +377,7 @@ def test_singlestoredb_filter_metadata_3(texts: List[str]) -> None:
|
||||
assert output == [
|
||||
Document(page_content="bar", metadata={"index": 1, "category": "budget"})
|
||||
]
|
||||
drop(table_name)
|
||||
docsearch.drop()
|
||||
|
||||
|
||||
@pytest.mark.skipif(not singlestoredb_installed, reason="singlestoredb not installed")
|
||||
@ -398,7 +398,7 @@ def test_singlestoredb_filter_metadata_4(texts: List[str]) -> None:
|
||||
)
|
||||
output = docsearch.similarity_search("foo", k=1, filter={"category": "vacation"})
|
||||
assert output == []
|
||||
drop(table_name)
|
||||
docsearch.drop()
|
||||
|
||||
|
||||
@pytest.mark.skipif(not singlestoredb_installed, reason="singlestoredb not installed")
|
||||
@ -437,7 +437,7 @@ def test_singlestoredb_filter_metadata_5(texts: List[str]) -> None:
|
||||
},
|
||||
)
|
||||
]
|
||||
drop(table_name)
|
||||
docsearch.drop()
|
||||
|
||||
|
||||
@pytest.mark.skipif(not singlestoredb_installed, reason="singlestoredb not installed")
|
||||
@ -468,7 +468,7 @@ def test_singlestoredb_filter_metadata_6(texts: List[str]) -> None:
|
||||
metadata={"index": 1, "category": "budget", "is_good": True},
|
||||
)
|
||||
]
|
||||
drop(table_name)
|
||||
docsearch.drop()
|
||||
|
||||
|
||||
@pytest.mark.skipif(not singlestoredb_installed, reason="singlestoredb not installed")
|
||||
@ -499,7 +499,7 @@ def test_singlestoredb_filter_metadata_7(texts: List[str]) -> None:
|
||||
metadata={"index": 2, "category": "budget", "score": 2.5},
|
||||
)
|
||||
]
|
||||
drop(table_name)
|
||||
docsearch.drop()
|
||||
|
||||
|
||||
@pytest.mark.skipif(not singlestoredb_installed, reason="singlestoredb not installed")
|
||||
@ -523,7 +523,7 @@ def test_singlestoredb_as_retriever(texts: List[str]) -> None:
|
||||
page_content="bar",
|
||||
),
|
||||
]
|
||||
drop(table_name)
|
||||
docsearch.drop()
|
||||
|
||||
|
||||
@pytest.mark.skipif(not singlestoredb_installed, reason="singlestoredb not installed")
|
||||
@ -546,7 +546,7 @@ def test_singlestoredb_add_image(texts: List[str]) -> None:
|
||||
docsearch.add_images(temp_files)
|
||||
output = docsearch.similarity_search("foo", k=1)
|
||||
assert output[0].page_content in temp_files
|
||||
drop(table_name)
|
||||
docsearch.drop()
|
||||
|
||||
|
||||
@pytest.mark.skipif(not singlestoredb_installed, reason="singlestoredb not installed")
|
||||
@ -571,7 +571,7 @@ def test_singestoredb_add_image2() -> None:
|
||||
docsearch.add_images(image_uris)
|
||||
output = docsearch.similarity_search("horse", k=1)
|
||||
assert "horse" in output[0].page_content
|
||||
drop(table_name)
|
||||
docsearch.drop()
|
||||
|
||||
|
||||
@pytest.mark.skipif(not singlestoredb_installed, reason="singlestoredb not installed")
|
||||
@ -610,7 +610,7 @@ def test_singlestoredb_text_only_search(snow_rain_docs: List[Document]) -> None:
|
||||
"Blanketing the countryside in a soft, pristine layer,"
|
||||
in output[0].page_content
|
||||
)
|
||||
drop(table_name)
|
||||
docsearch.drop()
|
||||
|
||||
|
||||
@pytest.mark.skipif(not singlestoredb_installed, reason="singlestoredb not installed")
|
||||
@ -667,7 +667,7 @@ def test_singlestoredb_filter_by_vector_search1(snow_rain_docs: List[Document])
|
||||
"High in the mountains, the rain transformed into a delicate"
|
||||
in output[0].page_content
|
||||
)
|
||||
drop(table_name)
|
||||
docsearch.drop()
|
||||
|
||||
|
||||
@pytest.mark.skipif(not singlestoredb_installed, reason="singlestoredb not installed")
|
||||
@ -697,7 +697,7 @@ def test_singlestoredb_filter_by_vector_search2(snow_rain_docs: List[Document])
|
||||
"Amidst the bustling cityscape, the rain fell relentlessly"
|
||||
in output[0].page_content
|
||||
)
|
||||
drop(table_name)
|
||||
docsearch.drop()
|
||||
|
||||
|
||||
@pytest.mark.skipif(not singlestoredb_installed, reason="singlestoredb not installed")
|
||||
@ -725,7 +725,7 @@ def test_singlestoredb_weighted_sum_search_unsupported_strategy(
|
||||
)
|
||||
except ValueError as e:
|
||||
assert "Search strategy WEIGHTED_SUM is not" in str(e)
|
||||
drop(table_name)
|
||||
docsearch.drop()
|
||||
|
||||
|
||||
@pytest.mark.skipif(not singlestoredb_installed, reason="singlestoredb not installed")
|
||||
@ -752,4 +752,49 @@ def test_singlestoredb_weighted_sum_search(snow_rain_docs: List[Document]) -> No
|
||||
assert (
|
||||
"Atop the rugged peaks, snow fell with an unyielding" in output[0].page_content
|
||||
)
|
||||
docsearch.drop()
|
||||
|
||||
|
||||
@pytest.mark.skipif(not singlestoredb_installed, reason="singlestoredb not installed")
|
||||
def test_insert(snow_rain_docs: List[Document]) -> None:
|
||||
table_name = "test_singlestoredb_insert"
|
||||
drop(table_name)
|
||||
embeddings = IncrementalEmbeddings()
|
||||
docsearch = SingleStoreDB(
|
||||
embeddings,
|
||||
table_name=table_name,
|
||||
host=TEST_SINGLESTOREDB_URL,
|
||||
)
|
||||
ids = docsearch.add_documents(snow_rain_docs, return_ids=True)
|
||||
assert len(ids) == len(snow_rain_docs)
|
||||
for i, id1 in enumerate(ids):
|
||||
for j, id2 in enumerate(ids):
|
||||
if i != j:
|
||||
assert id1 != id2
|
||||
docsearch.drop()
|
||||
|
||||
|
||||
@pytest.mark.skipif(not singlestoredb_installed, reason="singlestoredb not installed")
|
||||
def test_delete(snow_rain_docs: List[Document]) -> None:
|
||||
table_name = "test_singlestoredb_delete"
|
||||
drop(table_name)
|
||||
embeddings = IncrementalEmbeddings()
|
||||
docsearch = SingleStoreDB(
|
||||
embeddings,
|
||||
table_name=table_name,
|
||||
host=TEST_SINGLESTOREDB_URL,
|
||||
)
|
||||
ids = docsearch.add_documents(snow_rain_docs, return_ids=True)
|
||||
output = docsearch.similarity_search(
|
||||
"rainstorm in parched desert",
|
||||
k=3,
|
||||
filter={"count": "1"},
|
||||
)
|
||||
assert len(output) == 2
|
||||
docsearch.delete(ids)
|
||||
output = docsearch.similarity_search(
|
||||
"rainstorm in parched desert",
|
||||
k=3,
|
||||
)
|
||||
assert len(output) == 0
|
||||
docsearch.drop()
|
||||
|
@ -84,6 +84,7 @@ def test_compatible_vectorstore_documentation() -> None:
|
||||
"Rockset",
|
||||
"ScaNN",
|
||||
"SemaDB",
|
||||
"SingleStoreDB",
|
||||
"SupabaseVectorStore",
|
||||
"SurrealDBStore",
|
||||
"TileDB",
|
||||
|
Loading…
Reference in New Issue
Block a user