From a4eb6d0fb1f895779ca0a8fa83738afcd98db2c3 Mon Sep 17 00:00:00 2001 From: volodymyr-memsql <57520563+volodymyr-memsql@users.noreply.github.com> Date: Fri, 5 Jul 2024 16:26:06 +0300 Subject: [PATCH] community: add SingleStoreDB semantic cache (#23218) This PR adds a `SingleStoreDBSemanticCache` class that implements a cache based on SingleStoreDB vector store, integration tests, and a notebook example. Additionally, this PR contains minor changes to SingleStoreDB vector store: - change add texts/documents methods to return a list of inserted ids - implement delete(ids) method to delete documents by list of ids - added drop() method to drop a correspondent database table - updated integration tests to use and check functionality implemented above CC: @baskaryan, @hwchase17 --------- Co-authored-by: Volodymyr Tkachuk --- docs/docs/how_to/indexing.ipynb | 2 +- docs/docs/integrations/llm_caching.ipynb | 29 ++- libs/community/langchain_community/cache.py | 220 ++++++++++++++++++ .../chat_message_histories/singlestoredb.py | 2 +- .../vectorstores/singlestoredb.py | 66 +++++- .../cache/test_singlestoredb_cache.py | 43 ++++ .../vectorstores/test_singlestoredb.py | 91 ++++++-- .../vectorstores/test_indexing_docs.py | 1 + 8 files changed, 423 insertions(+), 31 deletions(-) create mode 100644 libs/community/tests/integration_tests/cache/test_singlestoredb_cache.py diff --git a/docs/docs/how_to/indexing.ipynb b/docs/docs/how_to/indexing.ipynb index dfff2945a9..519640880b 100644 --- a/docs/docs/how_to/indexing.ipynb +++ b/docs/docs/how_to/indexing.ipynb @@ -60,7 +60,7 @@ " * document addition by id (`add_documents` method with `ids` argument)\n", " * delete by id (`delete` method with `ids` argument)\n", "\n", - "Compatible Vectorstores: `Aerospike`, `AnalyticDB`, `AstraDB`, `AwaDB`, `AzureCosmosDBNoSqlVectorSearch`, `AzureCosmosDBVectorSearch`, `Bagel`, `Cassandra`, `Chroma`, `CouchbaseVectorStore`, `DashVector`, `DatabricksVectorSearch`, `DeepLake`, `Dingo`, `ElasticVectorSearch`, `ElasticsearchStore`, `FAISS`, `HanaDB`, `Milvus`, `MyScale`, `OpenSearchVectorSearch`, `PGVector`, `Pinecone`, `Qdrant`, `Redis`, `Rockset`, `ScaNN`, `SupabaseVectorStore`, `SurrealDBStore`, `TimescaleVector`, `Vald`, `VDMS`, `Vearch`, `VespaStore`, `Weaviate`, `Yellowbrick`, `ZepVectorStore`, `TencentVectorDB`, `OpenSearchVectorSearch`.\n", + "Compatible Vectorstores: `Aerospike`, `AnalyticDB`, `AstraDB`, `AwaDB`, `AzureCosmosDBNoSqlVectorSearch`, `AzureCosmosDBVectorSearch`, `Bagel`, `Cassandra`, `Chroma`, `CouchbaseVectorStore`, `DashVector`, `DatabricksVectorSearch`, `DeepLake`, `Dingo`, `ElasticVectorSearch`, `ElasticsearchStore`, `FAISS`, `HanaDB`, `Milvus`, `MyScale`, `OpenSearchVectorSearch`, `PGVector`, `Pinecone`, `Qdrant`, `Redis`, `Rockset`, `ScaNN`, `SingleStoreDB`, `SupabaseVectorStore`, `SurrealDBStore`, `TimescaleVector`, `Vald`, `VDMS`, `Vearch`, `VespaStore`, `Weaviate`, `Yellowbrick`, `ZepVectorStore`, `TencentVectorDB`, `OpenSearchVectorSearch`.\n", " \n", "## Caution\n", "\n", diff --git a/docs/docs/integrations/llm_caching.ipynb b/docs/docs/integrations/llm_caching.ipynb index dba454c231..7c47a28767 100644 --- a/docs/docs/integrations/llm_caching.ipynb +++ b/docs/docs/integrations/llm_caching.ipynb @@ -2147,6 +2147,32 @@ "llm(\"Tell me one joke\")" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## SingleStoreDB Semantic Cache\n", + "You can use [SingleStoreDB](https://python.langchain.com/docs/integrations/vectorstores/singlestoredb/) as a semantic cache to cache prompts and responses." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d82f1bdc", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_community.cache import SingleStoreDBSemanticCache\n", + "from langchain_openai import OpenAIEmbeddings\n", + "\n", + "set_llm_cache(\n", + " SingleStoreDBSemanticCache(\n", + " embedding=OpenAIEmbeddings(),\n", + " host=\"root:pass@localhost:3306/db\",\n", + " )\n", + ")" + ] + }, { "cell_type": "markdown", "id": "ae1f5e1c-085e-4998-9f2d-b5867d2c3d5b", @@ -2178,7 +2204,7 @@ "source": [ "**Cache** classes are implemented by inheriting the [BaseCache](https://api.python.langchain.com/en/latest/caches/langchain_core.caches.BaseCache.html) class.\n", "\n", - "This table lists all 20 derived classes with links to the API Reference.\n", + "This table lists all 21 derived classes with links to the API Reference.\n", "\n", "\n", "| Namespace 🔻 | Class |\n", @@ -2195,6 +2221,7 @@ "| langchain_community.cache | [MomentoCache](https://api.python.langchain.com/en/latest/cache/langchain_community.cache.MomentoCache.html) |\n", "| langchain_community.cache | [OpenSearchSemanticCache](https://api.python.langchain.com/en/latest/cache/langchain_community.cache.OpenSearchSemanticCache.html) |\n", "| langchain_community.cache | [RedisSemanticCache](https://api.python.langchain.com/en/latest/cache/langchain_community.cache.RedisSemanticCache.html) |\n", + "| langchain_community.cache | [SingleStoreDBSemanticCache](https://api.python.langchain.com/en/latest/cache/langchain_community.cache.SingleStoreDBSemanticCache.html) |\n", "| langchain_community.cache | [SQLAlchemyCache](https://api.python.langchain.com/en/latest/cache/langchain_community.cache.SQLAlchemyCache.html) |\n", "| langchain_community.cache | [SQLAlchemyMd5Cache](https://api.python.langchain.com/en/latest/cache/langchain_community.cache.SQLAlchemyMd5Cache.html) |\n", "| langchain_community.cache | [UpstashRedisCache](https://api.python.langchain.com/en/latest/cache/langchain_community.cache.UpstashRedisCache.html) |\n", diff --git a/libs/community/langchain_community/cache.py b/libs/community/langchain_community/cache.py index 88ab7b10e5..22dfa18096 100644 --- a/libs/community/langchain_community/cache.py +++ b/libs/community/langchain_community/cache.py @@ -58,6 +58,7 @@ from langchain_community.vectorstores.azure_cosmos_db import ( CosmosDBSimilarityType, CosmosDBVectorSearchType, ) +from langchain_community.vectorstores.utils import DistanceStrategy try: from sqlalchemy.orm import declarative_base @@ -84,6 +85,7 @@ from langchain_community.vectorstores import ( OpenSearchVectorSearch as OpenSearchVectorStore, ) from langchain_community.vectorstores.redis import Redis as RedisVectorstore +from langchain_community.vectorstores.singlestoredb import SingleStoreDB logger = logging.getLogger(__file__) @@ -2373,3 +2375,221 @@ class OpenSearchSemanticCache(BaseCache): if index_name in self._cache_dict: self._cache_dict[index_name].delete_index(index_name=index_name) del self._cache_dict[index_name] + + +class SingleStoreDBSemanticCache(BaseCache): + """Cache that uses SingleStore DB as a backend""" + + def __init__( + self, + embedding: Embeddings, + *, + cache_table_prefix: str = "cache_", + search_threshold: float = 0.2, + **kwargs: Any, + ): + """Initialize with necessary components. + + Args: + embedding (Embeddings): A text embedding model. + cache_table_prefix (str, optional): Prefix for the cache table name. + Defaults to "cache_". + search_threshold (float, optional): The minimum similarity score for + a search result to be considered a match. Defaults to 0.2. + + Following arguments pertrain to the SingleStoreDB vector store: + + distance_strategy (DistanceStrategy, optional): + Determines the strategy employed for calculating + the distance between vectors in the embedding space. + Defaults to DOT_PRODUCT. + Available options are: + - DOT_PRODUCT: Computes the scalar product of two vectors. + This is the default behavior + - EUCLIDEAN_DISTANCE: Computes the Euclidean distance between + two vectors. This metric considers the geometric distance in + the vector space, and might be more suitable for embeddings + that rely on spatial relationships. This metric is not + compatible with the WEIGHTED_SUM search strategy. + + content_field (str, optional): Specifies the field to store the content. + Defaults to "content". + metadata_field (str, optional): Specifies the field to store metadata. + Defaults to "metadata". + vector_field (str, optional): Specifies the field to store the vector. + Defaults to "vector". + id_field (str, optional): Specifies the field to store the id. + Defaults to "id". + + use_vector_index (bool, optional): Toggles the use of a vector index. + Works only with SingleStoreDB 8.5 or later. Defaults to False. + If set to True, vector_size parameter is required to be set to + a proper value. + + vector_index_name (str, optional): Specifies the name of the vector index. + Defaults to empty. Will be ignored if use_vector_index is set to False. + + vector_index_options (dict, optional): Specifies the options for + the vector index. Defaults to {}. + Will be ignored if use_vector_index is set to False. The options are: + index_type (str, optional): Specifies the type of the index. + Defaults to IVF_PQFS. + For more options, please refer to the SingleStoreDB documentation: + https://docs.singlestore.com/cloud/reference/sql-reference/vector-functions/vector-indexing/ + + vector_size (int, optional): Specifies the size of the vector. + Defaults to 1536. Required if use_vector_index is set to True. + Should be set to the same value as the size of the vectors + stored in the vector_field. + + Following arguments pertain to the connection pool: + + pool_size (int, optional): Determines the number of active connections in + the pool. Defaults to 5. + max_overflow (int, optional): Determines the maximum number of connections + allowed beyond the pool_size. Defaults to 10. + timeout (float, optional): Specifies the maximum wait time in seconds for + establishing a connection. Defaults to 30. + + Following arguments pertain to the database connection: + + host (str, optional): Specifies the hostname, IP address, or URL for the + database connection. The default scheme is "mysql". + user (str, optional): Database username. + password (str, optional): Database password. + port (int, optional): Database port. Defaults to 3306 for non-HTTP + connections, 80 for HTTP connections, and 443 for HTTPS connections. + database (str, optional): Database name. + + Additional optional arguments provide further customization over the + database connection: + + pure_python (bool, optional): Toggles the connector mode. If True, + operates in pure Python mode. + local_infile (bool, optional): Allows local file uploads. + charset (str, optional): Specifies the character set for string values. + ssl_key (str, optional): Specifies the path of the file containing the SSL + key. + ssl_cert (str, optional): Specifies the path of the file containing the SSL + certificate. + ssl_ca (str, optional): Specifies the path of the file containing the SSL + certificate authority. + ssl_cipher (str, optional): Sets the SSL cipher list. + ssl_disabled (bool, optional): Disables SSL usage. + ssl_verify_cert (bool, optional): Verifies the server's certificate. + Automatically enabled if ``ssl_ca`` is specified. + ssl_verify_identity (bool, optional): Verifies the server's identity. + conv (dict[int, Callable], optional): A dictionary of data conversion + functions. + credential_type (str, optional): Specifies the type of authentication to + use: auth.PASSWORD, auth.JWT, or auth.BROWSER_SSO. + autocommit (bool, optional): Enables autocommits. + results_type (str, optional): Determines the structure of the query results: + tuples, namedtuples, dicts. + results_format (str, optional): Deprecated. This option has been renamed to + results_type. + + Examples: + Basic Usage: + + .. code-block:: python + + import langchain + from langchain.cache import SingleStoreDBSemanticCache + from langchain.embeddings import OpenAIEmbeddings + + langchain.llm_cache = SingleStoreDBSemanticCache( + embedding=OpenAIEmbeddings(), + host="https://user:password@127.0.0.1:3306/database" + ) + + Advanced Usage: + + .. code-block:: python + + import langchain + from langchain.cache import SingleStoreDBSemanticCache + from langchain.embeddings import OpenAIEmbeddings + + langchain.llm_cache = = SingleStoreDBSemanticCache( + embeddings=OpenAIEmbeddings(), + use_vector_index=True, + host="127.0.0.1", + port=3306, + user="user", + password="password", + database="db", + table_name="my_custom_table", + pool_size=10, + timeout=60, + ) + """ + + self._cache_dict: Dict[str, SingleStoreDB] = {} + self.embedding = embedding + self.cache_table_prefix = cache_table_prefix + self.search_threshold = search_threshold + + # Pass the rest of the kwargs to the connection. + self.connection_kwargs = kwargs + + def _index_name(self, llm_string: str) -> str: + hashed_index = _hash(llm_string) + return f"{self.cache_table_prefix}{hashed_index}" + + def _get_llm_cache(self, llm_string: str) -> SingleStoreDB: + index_name = self._index_name(llm_string) + + # return vectorstore client for the specific llm string + if index_name not in self._cache_dict: + self._cache_dict[index_name] = SingleStoreDB( + embedding=self.embedding, + table_name=index_name, + **self.connection_kwargs, + ) + return self._cache_dict[index_name] + + def lookup(self, prompt: str, llm_string: str) -> Optional[RETURN_VAL_TYPE]: + """Look up based on prompt and llm_string.""" + llm_cache = self._get_llm_cache(llm_string) + generations: List = [] + # Read from a Hash + results = llm_cache.similarity_search_with_score( + query=prompt, + k=1, + ) + if results: + for document_score in results: + if ( + document_score[1] > self.search_threshold + and llm_cache.distance_strategy == DistanceStrategy.DOT_PRODUCT + ) or ( + document_score[1] < self.search_threshold + and llm_cache.distance_strategy + == DistanceStrategy.EUCLIDEAN_DISTANCE + ): + generations.extend(loads(document_score[0].metadata["return_val"])) + return generations if generations else None + + def update(self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE) -> None: + """Update cache based on prompt and llm_string.""" + for gen in return_val: + if not isinstance(gen, Generation): + raise ValueError( + "SingleStoreDBSemanticCache only supports caching of " + f"normal LLM generations, got {type(gen)}" + ) + llm_cache = self._get_llm_cache(llm_string) + metadata = { + "llm_string": llm_string, + "prompt": prompt, + "return_val": dumps([g for g in return_val]), + } + llm_cache.add_texts(texts=[prompt], metadatas=[metadata]) + + def clear(self, **kwargs: Any) -> None: + """Clear semantic cache for a given llm_string.""" + index_name = self._index_name(kwargs["llm_string"]) + if index_name in self._cache_dict: + self._cache_dict[index_name].drop() + del self._cache_dict[index_name] diff --git a/libs/community/langchain_community/chat_message_histories/singlestoredb.py b/libs/community/langchain_community/chat_message_histories/singlestoredb.py index a2fc1af137..7dc1e5285c 100644 --- a/libs/community/langchain_community/chat_message_histories/singlestoredb.py +++ b/libs/community/langchain_community/chat_message_histories/singlestoredb.py @@ -153,7 +153,7 @@ class SingleStoreDBChatMessageHistory(BaseChatMessageHistory): self.connection_kwargs["conn_attrs"] = dict() self.connection_kwargs["conn_attrs"]["_connector_name"] = "langchain python sdk" - self.connection_kwargs["conn_attrs"]["_connector_version"] = "1.0.1" + self.connection_kwargs["conn_attrs"]["_connector_version"] = "2.1.0" # Create a connection pool. try: diff --git a/libs/community/langchain_community/vectorstores/singlestoredb.py b/libs/community/langchain_community/vectorstores/singlestoredb.py index 58f481feaf..6a812542cd 100644 --- a/libs/community/langchain_community/vectorstores/singlestoredb.py +++ b/libs/community/langchain_community/vectorstores/singlestoredb.py @@ -276,7 +276,7 @@ class SingleStoreDB(VectorStore): self.connection_kwargs["conn_attrs"] = dict() self.connection_kwargs["conn_attrs"]["_connector_name"] = "langchain python sdk" - self.connection_kwargs["conn_attrs"]["_connector_version"] = "2.0.0" + self.connection_kwargs["conn_attrs"]["_connector_version"] = "2.1.0" # Create connection pool. self.connection_pool = QueuePool( @@ -354,6 +354,7 @@ class SingleStoreDB(VectorStore): uris: List[str], metadatas: Optional[List[dict]] = None, embeddings: Optional[List[List[float]]] = None, + return_ids: bool = False, **kwargs: Any, ) -> List[str]: """Run images through the embeddings and add to the vectorstore. @@ -367,7 +368,8 @@ class SingleStoreDB(VectorStore): embeddings. Defaults to None. Returns: - List[str]: empty list + List[str]: list of document ids added to the vectorstore + if return_ids is True. Otherwise, an empty list. """ # Set embeddings if ( @@ -376,13 +378,16 @@ class SingleStoreDB(VectorStore): and hasattr(self.embedding, "embed_image") ): embeddings = self.embedding.embed_image(uris=uris) - return self.add_texts(uris, metadatas, embeddings, **kwargs) + return self.add_texts( + uris, metadatas, embeddings, return_ids=return_ids, **kwargs + ) def add_texts( self, texts: Iterable[str], metadatas: Optional[List[dict]] = None, embeddings: Optional[List[List[float]]] = None, + return_ids: bool = False, **kwargs: Any, ) -> List[str]: """Add more texts to the vectorstore. @@ -395,8 +400,10 @@ class SingleStoreDB(VectorStore): embeddings. Defaults to None. Returns: - List[str]: empty list + List[str]: list of document ids added to the vectorstore + if return_ids is True. Otherwise, an empty list. """ + ids: List[str] = [] conn = self.connection_pool.connect() try: cur = conn.cursor() @@ -424,13 +431,48 @@ class SingleStoreDB(VectorStore): json.dumps(metadata), ), ) + if return_ids: + cur.execute("SELECT LAST_INSERT_ID();") + row = cur.fetchone() + if row: + ids.append(str(row[0])) + if self.use_vector_index or self.use_full_text_search: + cur.execute("OPTIMIZE TABLE {} FLUSH;".format(self.table_name)) + finally: + cur.close() + finally: + conn.close() + return ids + + def delete(self, ids: Optional[List[str]] = None, **kwargs: Any) -> bool | None: + """Delete documents from the vectorstore. + + Args: + ids (List[str], optional): List of document ids to delete. + If None, all documents will be deleted. Defaults to None. + + Returns: + bool: True if deletion was successful, False otherwise. + """ + if ids is None: + return True + + conn = self.connection_pool.connect() + try: + cur = conn.cursor() + try: + cur.execute( + "DELETE FROM {} WHERE {} IN ({})".format( + self.table_name, self.id_field, ",".join(ids) + ) + ) if self.use_vector_index or self.use_full_text_search: cur.execute("OPTIMIZE TABLE {} FLUSH;".format(self.table_name)) finally: cur.close() finally: conn.close() - return [] + return True def similarity_search( self, @@ -995,6 +1037,20 @@ class SingleStoreDB(VectorStore): instance.add_texts(texts, metadatas, embedding.embed_documents(texts), **kwargs) return instance + def drop(self) -> None: + """Drop the table and delete all data from the vectorstore. + Vector store will be unusable after this operation. + """ + conn = self.connection_pool.connect() + try: + cur = conn.cursor() + try: + cur.execute("DROP TABLE IF EXISTS {}".format(self.table_name)) + finally: + cur.close() + finally: + conn.close() + # SingleStoreDBRetriever is not needed, but we keep it for backwards compatibility SingleStoreDBRetriever = VectorStoreRetriever diff --git a/libs/community/tests/integration_tests/cache/test_singlestoredb_cache.py b/libs/community/tests/integration_tests/cache/test_singlestoredb_cache.py new file mode 100644 index 0000000000..238dee4bac --- /dev/null +++ b/libs/community/tests/integration_tests/cache/test_singlestoredb_cache.py @@ -0,0 +1,43 @@ +"""Test SingleStoreDB semantic cache. Requires a SingleStore DB database. + +Required to run this test: + - a recent `singlestoredb` Python package available + - a SingleStore DB instance; +""" + +from importlib.util import find_spec + +import pytest +from langchain_core.globals import get_llm_cache, set_llm_cache +from langchain_core.outputs import Generation + +from langchain_community.cache import SingleStoreDBSemanticCache +from tests.integration_tests.cache.fake_embeddings import FakeEmbeddings +from tests.unit_tests.llms.fake_llm import FakeLLM + +TEST_SINGLESTOREDB_URL = "root:pass@localhost:3306/db" + +singlestoredb_installed = find_spec("singlestoredb") is not None + + +@pytest.mark.skipif(not singlestoredb_installed, reason="singlestoredb not installed") +def test_tinglestoredb_semantic_cache() -> None: + """Test opensearch semantic cache functionality.""" + set_llm_cache( + SingleStoreDBSemanticCache( + embedding=FakeEmbeddings(), + host=TEST_SINGLESTOREDB_URL, + search_threshold=0.0, + ) + ) + llm = FakeLLM() + params = llm.dict() + params["stop"] = None + llm_string = str(sorted([(k, v) for k, v in params.items()])) + get_llm_cache().update("foo", llm_string, [Generation(text="fizz")]) + cache_output = get_llm_cache().lookup("bar", llm_string) + assert cache_output == [Generation(text="fizz")] + + get_llm_cache().clear(llm_string=llm_string) + output = get_llm_cache().lookup("bar", llm_string) + assert output != [Generation(text="fizz")] diff --git a/libs/community/tests/integration_tests/vectorstores/test_singlestoredb.py b/libs/community/tests/integration_tests/vectorstores/test_singlestoredb.py index eb7e4da529..b59ac2dee8 100644 --- a/libs/community/tests/integration_tests/vectorstores/test_singlestoredb.py +++ b/libs/community/tests/integration_tests/vectorstores/test_singlestoredb.py @@ -152,7 +152,7 @@ def test_singlestoredb(texts: List[str]) -> None: ) output = docsearch.similarity_search("foo", k=1) assert output == TEST_SINGLE_RESULT - drop(table_name) + docsearch.drop() @pytest.mark.skipif(not singlestoredb_installed, reason="singlestoredb not installed") @@ -169,7 +169,7 @@ def test_singlestoredb_new_vector(texts: List[str]) -> None: docsearch.add_texts(["foo"]) output = docsearch.similarity_search("foo", k=2) assert output == TEST_RESULT - drop(table_name) + docsearch.drop() @pytest.mark.skipif(not singlestoredb_installed, reason="singlestoredb not installed") @@ -187,7 +187,7 @@ def test_singlestoredb_euclidean_distance(texts: List[str]) -> None: docsearch.add_texts(["foo"]) output = docsearch.similarity_search("foo", k=2) assert output == TEST_RESULT - drop(table_name) + docsearch.drop() @pytest.mark.skipif(not singlestoredb_installed, reason="singlestoredb not installed") @@ -207,7 +207,7 @@ def test_singlestoredb_vector_index_1(texts: List[str]) -> None: docsearch.add_texts(["foo"]) output = docsearch.similarity_search("foo", k=2) assert output == TEST_RESULT - drop(table_name) + docsearch.drop() @pytest.mark.skipif(not singlestoredb_installed, reason="singlestoredb not installed") @@ -227,7 +227,7 @@ def test_singlestoredb_vector_index_2(texts: List[str]) -> None: docsearch.add_texts(["foo"]) output = docsearch.similarity_search("foo", k=1) output[0].page_content == "foo" - drop(table_name) + docsearch.drop() @pytest.mark.skipif(not singlestoredb_installed, reason="singlestoredb not installed") @@ -247,7 +247,7 @@ def test_singlestoredb_vector_index_large() -> None: ) output = docsearch.similarity_search("foo", k=1) assert output[0].page_content == "foo" - drop(table_name) + docsearch.drop() @pytest.mark.skipif(not singlestoredb_installed, reason="singlestoredb not installed") @@ -269,7 +269,7 @@ def test_singlestoredb_from_existing(texts: List[str]) -> None: ) output = docsearch2.similarity_search("foo", k=1) assert output == TEST_SINGLE_RESULT - drop(table_name) + docsearch2.drop() @pytest.mark.skipif(not singlestoredb_installed, reason="singlestoredb not installed") @@ -286,7 +286,7 @@ def test_singlestoredb_from_documents(texts: List[str]) -> None: ) output = docsearch.similarity_search("foo", k=1) assert output == TEST_SINGLE_WITH_METADATA_RESULT - drop(table_name) + docsearch.drop() @pytest.mark.skipif(not singlestoredb_installed, reason="singlestoredb not installed") @@ -309,7 +309,7 @@ def test_singlestoredb_add_texts_to_existing(texts: List[str]) -> None: docsearch.add_texts(["foo"]) output = docsearch.similarity_search("foo", k=2) assert output == TEST_RESULT - drop(table_name) + docsearch.drop() @pytest.mark.skipif(not singlestoredb_installed, reason="singlestoredb not installed") @@ -329,7 +329,7 @@ def test_singlestoredb_filter_metadata(texts: List[str]) -> None: ) output = docsearch.similarity_search("foo", k=1, filter={"index": 2}) assert output == [Document(page_content="baz", metadata={"index": 2})] - drop(table_name) + docsearch.drop() @pytest.mark.skipif(not singlestoredb_installed, reason="singlestoredb not installed") @@ -352,7 +352,7 @@ def test_singlestoredb_filter_metadata_2(texts: List[str]) -> None: assert output == [ Document(page_content="foo", metadata={"index": 0, "category": "budget"}) ] - drop(table_name) + docsearch.drop() @pytest.mark.skipif(not singlestoredb_installed, reason="singlestoredb not installed") @@ -377,7 +377,7 @@ def test_singlestoredb_filter_metadata_3(texts: List[str]) -> None: assert output == [ Document(page_content="bar", metadata={"index": 1, "category": "budget"}) ] - drop(table_name) + docsearch.drop() @pytest.mark.skipif(not singlestoredb_installed, reason="singlestoredb not installed") @@ -398,7 +398,7 @@ def test_singlestoredb_filter_metadata_4(texts: List[str]) -> None: ) output = docsearch.similarity_search("foo", k=1, filter={"category": "vacation"}) assert output == [] - drop(table_name) + docsearch.drop() @pytest.mark.skipif(not singlestoredb_installed, reason="singlestoredb not installed") @@ -437,7 +437,7 @@ def test_singlestoredb_filter_metadata_5(texts: List[str]) -> None: }, ) ] - drop(table_name) + docsearch.drop() @pytest.mark.skipif(not singlestoredb_installed, reason="singlestoredb not installed") @@ -468,7 +468,7 @@ def test_singlestoredb_filter_metadata_6(texts: List[str]) -> None: metadata={"index": 1, "category": "budget", "is_good": True}, ) ] - drop(table_name) + docsearch.drop() @pytest.mark.skipif(not singlestoredb_installed, reason="singlestoredb not installed") @@ -499,7 +499,7 @@ def test_singlestoredb_filter_metadata_7(texts: List[str]) -> None: metadata={"index": 2, "category": "budget", "score": 2.5}, ) ] - drop(table_name) + docsearch.drop() @pytest.mark.skipif(not singlestoredb_installed, reason="singlestoredb not installed") @@ -523,7 +523,7 @@ def test_singlestoredb_as_retriever(texts: List[str]) -> None: page_content="bar", ), ] - drop(table_name) + docsearch.drop() @pytest.mark.skipif(not singlestoredb_installed, reason="singlestoredb not installed") @@ -546,7 +546,7 @@ def test_singlestoredb_add_image(texts: List[str]) -> None: docsearch.add_images(temp_files) output = docsearch.similarity_search("foo", k=1) assert output[0].page_content in temp_files - drop(table_name) + docsearch.drop() @pytest.mark.skipif(not singlestoredb_installed, reason="singlestoredb not installed") @@ -571,7 +571,7 @@ def test_singestoredb_add_image2() -> None: docsearch.add_images(image_uris) output = docsearch.similarity_search("horse", k=1) assert "horse" in output[0].page_content - drop(table_name) + docsearch.drop() @pytest.mark.skipif(not singlestoredb_installed, reason="singlestoredb not installed") @@ -610,7 +610,7 @@ def test_singlestoredb_text_only_search(snow_rain_docs: List[Document]) -> None: "Blanketing the countryside in a soft, pristine layer," in output[0].page_content ) - drop(table_name) + docsearch.drop() @pytest.mark.skipif(not singlestoredb_installed, reason="singlestoredb not installed") @@ -667,7 +667,7 @@ def test_singlestoredb_filter_by_vector_search1(snow_rain_docs: List[Document]) "High in the mountains, the rain transformed into a delicate" in output[0].page_content ) - drop(table_name) + docsearch.drop() @pytest.mark.skipif(not singlestoredb_installed, reason="singlestoredb not installed") @@ -697,7 +697,7 @@ def test_singlestoredb_filter_by_vector_search2(snow_rain_docs: List[Document]) "Amidst the bustling cityscape, the rain fell relentlessly" in output[0].page_content ) - drop(table_name) + docsearch.drop() @pytest.mark.skipif(not singlestoredb_installed, reason="singlestoredb not installed") @@ -725,7 +725,7 @@ def test_singlestoredb_weighted_sum_search_unsupported_strategy( ) except ValueError as e: assert "Search strategy WEIGHTED_SUM is not" in str(e) - drop(table_name) + docsearch.drop() @pytest.mark.skipif(not singlestoredb_installed, reason="singlestoredb not installed") @@ -752,4 +752,49 @@ def test_singlestoredb_weighted_sum_search(snow_rain_docs: List[Document]) -> No assert ( "Atop the rugged peaks, snow fell with an unyielding" in output[0].page_content ) + docsearch.drop() + + +@pytest.mark.skipif(not singlestoredb_installed, reason="singlestoredb not installed") +def test_insert(snow_rain_docs: List[Document]) -> None: + table_name = "test_singlestoredb_insert" + drop(table_name) + embeddings = IncrementalEmbeddings() + docsearch = SingleStoreDB( + embeddings, + table_name=table_name, + host=TEST_SINGLESTOREDB_URL, + ) + ids = docsearch.add_documents(snow_rain_docs, return_ids=True) + assert len(ids) == len(snow_rain_docs) + for i, id1 in enumerate(ids): + for j, id2 in enumerate(ids): + if i != j: + assert id1 != id2 + docsearch.drop() + + +@pytest.mark.skipif(not singlestoredb_installed, reason="singlestoredb not installed") +def test_delete(snow_rain_docs: List[Document]) -> None: + table_name = "test_singlestoredb_delete" drop(table_name) + embeddings = IncrementalEmbeddings() + docsearch = SingleStoreDB( + embeddings, + table_name=table_name, + host=TEST_SINGLESTOREDB_URL, + ) + ids = docsearch.add_documents(snow_rain_docs, return_ids=True) + output = docsearch.similarity_search( + "rainstorm in parched desert", + k=3, + filter={"count": "1"}, + ) + assert len(output) == 2 + docsearch.delete(ids) + output = docsearch.similarity_search( + "rainstorm in parched desert", + k=3, + ) + assert len(output) == 0 + docsearch.drop() diff --git a/libs/community/tests/unit_tests/vectorstores/test_indexing_docs.py b/libs/community/tests/unit_tests/vectorstores/test_indexing_docs.py index 2033e84b09..c1362205aa 100644 --- a/libs/community/tests/unit_tests/vectorstores/test_indexing_docs.py +++ b/libs/community/tests/unit_tests/vectorstores/test_indexing_docs.py @@ -84,6 +84,7 @@ def test_compatible_vectorstore_documentation() -> None: "Rockset", "ScaNN", "SemaDB", + "SingleStoreDB", "SupabaseVectorStore", "SurrealDBStore", "TileDB",