mirror of https://github.com/hwchase17/langchain
Added SingleStoreDB Vector Store (#5619)
- Added `SingleStoreDB` vector store, which is a wrapper over the SingleStore DB database, that can be used as a vector storage and has an efficient similarity search. - Added integration tests for the vector store - Added jupyter notebook with the example @dev2049 --------- Co-authored-by: Volodymyr Tkachuk <vtkachuk-ua@singlestore.com> Co-authored-by: Dev 2049 <dev.dev2049@gmail.com> Co-authored-by: Harrison Chase <hw.chase.17@gmail.com>pull/5864/head^2
parent
78aa59c68b
commit
a1549901ce
@ -0,0 +1,139 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2b9582dc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# SingleStoreDB vector search\n",
|
||||
"[SingleStore DB](https://singlestore.com) is a high-performance distributed database that supports deployment both in the [cloud](https://www.singlestore.com/cloud/) and on-premises. For a significant duration, it has provided support for vector functions such as [dot_product](https://docs.singlestore.com/managed-service/en/reference/sql-reference/vector-functions/dot_product.html), thereby positioning itself as an ideal solution for AI applications that require text similarity matching. \n",
|
||||
"This tutorial illustrates how to utilize the features of the SingleStore DB Vector Store."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e4a61a4d",
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Establishing a connection to the database is facilitated through the singlestoredb Python connector.\n",
|
||||
"# Please ensure that this connector is installed in your working environment.\n",
|
||||
"!pip install singlestoredb"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "39a0132a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"import getpass\n",
|
||||
"\n",
|
||||
"# We want to use OpenAIEmbeddings so we have to get the OpenAI API Key.\n",
|
||||
"os.environ['OPENAI_API_KEY'] = getpass.getpass('OpenAI API Key:')\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "6104fde8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain.vectorstores import SingleStoreDB\n",
|
||||
"from langchain.document_loaders import TextLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "7b45113c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Load text samples \n",
|
||||
"from langchain.document_loaders import TextLoader\n",
|
||||
"loader = TextLoader('../../../state_of_the_union.txt')\n",
|
||||
"documents = loader.load()\n",
|
||||
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
||||
"docs = text_splitter.split_documents(documents)\n",
|
||||
"\n",
|
||||
"embeddings = OpenAIEmbeddings()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "535b2687",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"There are several ways to establish a [connection](https://singlestoredb-python.labs.singlestore.com/generated/singlestoredb.connect.html) to the database. You can either set up environment variables or pass named parameters to the `SingleStoreDB constructor`. Alternatively, you may provide these parameters to the `from_documents` and `from_texts` methods."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d0b316bf",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Setup connection url as environment variable\n",
|
||||
"os.environ['SINGLESTOREDB_URL'] = 'root:pass@localhost:3306/db'\n",
|
||||
"\n",
|
||||
"# Load documents to the store\n",
|
||||
"docsearch = SingleStoreDB.from_documents(\n",
|
||||
" docs,\n",
|
||||
" embeddings,\n",
|
||||
" table_name = \"noteook\", # use table with a custom name \n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "0eaa4297",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||
"docs = docsearch.similarity_search(query) # Find documents that correspond to the query\n",
|
||||
"print(docs[0].page_content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "86efff90",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.2"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
@ -0,0 +1,372 @@
|
||||
"""Wrapper around SingleStore DB."""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from typing import (
|
||||
Any,
|
||||
ClassVar,
|
||||
Collection,
|
||||
Iterable,
|
||||
List,
|
||||
Optional,
|
||||
Tuple,
|
||||
Type,
|
||||
)
|
||||
|
||||
from sqlalchemy.pool import QueuePool
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.embeddings.base import Embeddings
|
||||
from langchain.vectorstores.base import VectorStore, VectorStoreRetriever
|
||||
|
||||
|
||||
class SingleStoreDB(VectorStore):
|
||||
"""
|
||||
This class serves as a Pythonic interface to the SingleStore DB database.
|
||||
The prerequisite for using this class is the installation of the ``singlestoredb``
|
||||
Python package.
|
||||
|
||||
The SingleStoreDB vectorstore can be created by providing an embedding function and
|
||||
the relevant parameters for the database connection, connection pool, and
|
||||
optionally, the names of the table and the fields to use.
|
||||
"""
|
||||
|
||||
def _get_connection(self: SingleStoreDB) -> Any:
|
||||
try:
|
||||
import singlestoredb as s2
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"Could not import singlestoredb python package. "
|
||||
"Please install it with `pip install singlestoredb`."
|
||||
)
|
||||
return s2.connect(**self.connection_kwargs)
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
embedding: Embeddings,
|
||||
*,
|
||||
table_name: str = "embeddings",
|
||||
content_field: str = "content",
|
||||
metadata_field: str = "metadata",
|
||||
vector_field: str = "vector",
|
||||
pool_size: int = 5,
|
||||
max_overflow: int = 10,
|
||||
timeout: float = 30,
|
||||
**kwargs: Any,
|
||||
):
|
||||
"""Initialize with necessary components.
|
||||
|
||||
Args:
|
||||
embedding (Embeddings): A text embedding model.
|
||||
|
||||
table_name (str, optional): Specifies the name of the table in use.
|
||||
Defaults to "embeddings".
|
||||
content_field (str, optional): Specifies the field to store the content.
|
||||
Defaults to "content".
|
||||
metadata_field (str, optional): Specifies the field to store metadata.
|
||||
Defaults to "metadata".
|
||||
vector_field (str, optional): Specifies the field to store the vector.
|
||||
Defaults to "vector".
|
||||
|
||||
Following arguments pertain to the connection pool:
|
||||
|
||||
pool_size (int, optional): Determines the number of active connections in
|
||||
the pool. Defaults to 5.
|
||||
max_overflow (int, optional): Determines the maximum number of connections
|
||||
allowed beyond the pool_size. Defaults to 10.
|
||||
timeout (float, optional): Specifies the maximum wait time in seconds for
|
||||
establishing a connection. Defaults to 30.
|
||||
|
||||
Following arguments pertain to the database connection:
|
||||
|
||||
host (str, optional): Specifies the hostname, IP address, or URL for the
|
||||
database connection. The default scheme is "mysql".
|
||||
user (str, optional): Database username.
|
||||
password (str, optional): Database password.
|
||||
port (int, optional): Database port. Defaults to 3306 for non-HTTP
|
||||
connections, 80 for HTTP connections, and 443 for HTTPS connections.
|
||||
database (str, optional): Database name.
|
||||
|
||||
Additional optional arguments provide further customization over the
|
||||
database connection:
|
||||
|
||||
pure_python (bool, optional): Toggles the connector mode. If True,
|
||||
operates in pure Python mode.
|
||||
local_infile (bool, optional): Allows local file uploads.
|
||||
charset (str, optional): Specifies the character set for string values.
|
||||
ssl_key (str, optional): Specifies the path of the file containing the SSL
|
||||
key.
|
||||
ssl_cert (str, optional): Specifies the path of the file containing the SSL
|
||||
certificate.
|
||||
ssl_ca (str, optional): Specifies the path of the file containing the SSL
|
||||
certificate authority.
|
||||
ssl_cipher (str, optional): Sets the SSL cipher list.
|
||||
ssl_disabled (bool, optional): Disables SSL usage.
|
||||
ssl_verify_cert (bool, optional): Verifies the server's certificate.
|
||||
Automatically enabled if ``ssl_ca`` is specified.
|
||||
ssl_verify_identity (bool, optional): Verifies the server's identity.
|
||||
conv (dict[int, Callable], optional): A dictionary of data conversion
|
||||
functions.
|
||||
credential_type (str, optional): Specifies the type of authentication to
|
||||
use: auth.PASSWORD, auth.JWT, or auth.BROWSER_SSO.
|
||||
autocommit (bool, optional): Enables autocommits.
|
||||
results_type (str, optional): Determines the structure of the query results:
|
||||
tuples, namedtuples, dicts.
|
||||
results_format (str, optional): Deprecated. This option has been renamed to
|
||||
results_type.
|
||||
|
||||
Examples:
|
||||
Basic Usage:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
from langchain.embeddings import OpenAIEmbeddings
|
||||
from langchain.vectorstores import SingleStoreDB
|
||||
|
||||
vectorstore = SingleStoreDB(
|
||||
OpenAIEmbeddings(),
|
||||
host="https://user:password@127.0.0.1:3306/database"
|
||||
)
|
||||
|
||||
Advanced Usage:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
from langchain.embeddings import OpenAIEmbeddings
|
||||
from langchain.vectorstores import SingleStoreDB
|
||||
|
||||
vectorstore = SingleStoreDB(
|
||||
OpenAIEmbeddings(),
|
||||
host="127.0.0.1",
|
||||
port=3306,
|
||||
user="user",
|
||||
password="password",
|
||||
database="db",
|
||||
table_name="my_custom_table",
|
||||
pool_size=10,
|
||||
timeout=60,
|
||||
)
|
||||
|
||||
Using environment variables:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
from langchain.embeddings import OpenAIEmbeddings
|
||||
from langchain.vectorstores import SingleStoreDB
|
||||
|
||||
os.environ['SINGLESTOREDB_URL'] = 'me:p455w0rd@s2-host.com/my_db'
|
||||
vectorstore = SingleStoreDB(OpenAIEmbeddings())
|
||||
"""
|
||||
|
||||
self.embedding = embedding
|
||||
self.table_name = table_name
|
||||
self.content_field = content_field
|
||||
self.metadata_field = metadata_field
|
||||
self.vector_field = vector_field
|
||||
|
||||
"""Pass the rest of the kwargs to the connection."""
|
||||
self.connection_kwargs = kwargs
|
||||
|
||||
"""Create connection pool."""
|
||||
self.connection_pool = QueuePool(
|
||||
self._get_connection,
|
||||
max_overflow=max_overflow,
|
||||
pool_size=pool_size,
|
||||
timeout=timeout,
|
||||
)
|
||||
self._create_table()
|
||||
|
||||
def _create_table(self: SingleStoreDB) -> None:
|
||||
"""Create table if it doesn't exist."""
|
||||
conn = self.connection_pool.connect()
|
||||
try:
|
||||
cur = conn.cursor()
|
||||
try:
|
||||
cur.execute(
|
||||
"""CREATE TABLE IF NOT EXISTS {}
|
||||
({} TEXT CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,
|
||||
{} BLOB, {} JSON);""".format(
|
||||
self.table_name,
|
||||
self.content_field,
|
||||
self.vector_field,
|
||||
self.metadata_field,
|
||||
),
|
||||
)
|
||||
finally:
|
||||
cur.close()
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
def add_texts(
|
||||
self,
|
||||
texts: Iterable[str],
|
||||
metadatas: Optional[List[dict]] = None,
|
||||
embeddings: Optional[List[List[float]]] = None,
|
||||
**kwargs: Any,
|
||||
) -> List[str]:
|
||||
"""Add more texts to the vectorstore.
|
||||
|
||||
Args:
|
||||
texts (Iterable[str]): Iterable of strings/text to add to the vectorstore.
|
||||
metadatas (Optional[List[dict]], optional): Optional list of metadatas.
|
||||
Defaults to None.
|
||||
embeddings (Optional[List[List[float]]], optional): Optional pre-generated
|
||||
embeddings. Defaults to None.
|
||||
|
||||
Returns:
|
||||
List[str]: empty list
|
||||
"""
|
||||
conn = self.connection_pool.connect()
|
||||
try:
|
||||
cur = conn.cursor()
|
||||
try:
|
||||
# Write data to singlestore db
|
||||
for i, text in enumerate(texts):
|
||||
# Use provided values by default or fallback
|
||||
metadata = metadatas[i] if metadatas else {}
|
||||
embedding = (
|
||||
embeddings[i]
|
||||
if embeddings
|
||||
else self.embedding.embed_documents([text])[0]
|
||||
)
|
||||
cur.execute(
|
||||
"INSERT INTO {} VALUES (%s, JSON_ARRAY_PACK(%s), %s)".format(
|
||||
self.table_name
|
||||
),
|
||||
(
|
||||
text,
|
||||
"[{}]".format(",".join(map(str, embedding))),
|
||||
json.dumps(metadata),
|
||||
),
|
||||
)
|
||||
finally:
|
||||
cur.close()
|
||||
finally:
|
||||
conn.close()
|
||||
return []
|
||||
|
||||
def similarity_search(
|
||||
self, query: str, k: int = 4, **kwargs: Any
|
||||
) -> List[Document]:
|
||||
"""Returns the most similar indexed documents to the query text.
|
||||
|
||||
Uses cosine similarity.
|
||||
|
||||
Args:
|
||||
query (str): The query text for which to find similar documents.
|
||||
k (int): The number of documents to return. Default is 4.
|
||||
|
||||
Returns:
|
||||
List[Document]: A list of documents that are most similar to the query text.
|
||||
"""
|
||||
docs_and_scores = self.similarity_search_with_score(query, k=k)
|
||||
return [doc for doc, _ in docs_and_scores]
|
||||
|
||||
def similarity_search_with_score(
|
||||
self, query: str, k: int = 4
|
||||
) -> List[Tuple[Document, float]]:
|
||||
"""Return docs most similar to query. Uses cosine similarity.
|
||||
|
||||
Args:
|
||||
query: Text to look up documents similar to.
|
||||
k: Number of Documents to return. Defaults to 4.
|
||||
|
||||
Returns:
|
||||
List of Documents most similar to the query and score for each
|
||||
"""
|
||||
# Creates embedding vector from user query
|
||||
embedding = self.embedding.embed_query(query)
|
||||
conn = self.connection_pool.connect()
|
||||
result = []
|
||||
try:
|
||||
cur = conn.cursor()
|
||||
try:
|
||||
cur.execute(
|
||||
"""SELECT {}, {}, DOT_PRODUCT({}, JSON_ARRAY_PACK(%s)) as __score
|
||||
FROM {} ORDER BY __score DESC LIMIT %s""".format(
|
||||
self.content_field,
|
||||
self.metadata_field,
|
||||
self.vector_field,
|
||||
self.table_name,
|
||||
),
|
||||
(
|
||||
"[{}]".format(",".join(map(str, embedding))),
|
||||
k,
|
||||
),
|
||||
)
|
||||
|
||||
for row in cur.fetchall():
|
||||
doc = Document(page_content=row[0], metadata=row[1])
|
||||
result.append((doc, float(row[2])))
|
||||
finally:
|
||||
cur.close()
|
||||
finally:
|
||||
conn.close()
|
||||
return result
|
||||
|
||||
@classmethod
|
||||
def from_texts(
|
||||
cls: Type[SingleStoreDB],
|
||||
texts: List[str],
|
||||
embedding: Embeddings,
|
||||
metadatas: Optional[List[dict]] = None,
|
||||
table_name: str = "embeddings",
|
||||
content_field: str = "content",
|
||||
metadata_field: str = "metadata",
|
||||
vector_field: str = "vector",
|
||||
pool_size: int = 5,
|
||||
max_overflow: int = 10,
|
||||
timeout: float = 30,
|
||||
**kwargs: Any,
|
||||
) -> SingleStoreDB:
|
||||
"""Create a SingleStoreDB vectorstore from raw documents.
|
||||
This is a user-friendly interface that:
|
||||
1. Embeds documents.
|
||||
2. Creates a new table for the embeddings in SingleStoreDB.
|
||||
3. Adds the documents to the newly created table.
|
||||
This is intended to be a quick way to get started.
|
||||
Example:
|
||||
.. code-block:: python
|
||||
from langchain.vectorstores import SingleStoreDB
|
||||
from langchain.embeddings import OpenAIEmbeddings
|
||||
s2 = SingleStoreDB.from_texts(
|
||||
texts,
|
||||
OpenAIEmbeddings(),
|
||||
host="username:password@localhost:3306/database"
|
||||
)
|
||||
"""
|
||||
|
||||
instance = cls(
|
||||
embedding,
|
||||
table_name=table_name,
|
||||
content_field=content_field,
|
||||
metadata_field=metadata_field,
|
||||
vector_field=vector_field,
|
||||
pool_size=pool_size,
|
||||
max_overflow=max_overflow,
|
||||
timeout=timeout,
|
||||
**kwargs,
|
||||
)
|
||||
instance.add_texts(texts, metadatas, embedding.embed_documents(texts), **kwargs)
|
||||
return instance
|
||||
|
||||
def as_retriever(self, **kwargs: Any) -> SingleStoreDBRetriever:
|
||||
return SingleStoreDBRetriever(vectorstore=self, **kwargs)
|
||||
|
||||
|
||||
class SingleStoreDBRetriever(VectorStoreRetriever):
|
||||
vectorstore: SingleStoreDB
|
||||
k: int = 4
|
||||
allowed_search_types: ClassVar[Collection[str]] = ("similarity",)
|
||||
|
||||
def get_relevant_documents(self, query: str) -> List[Document]:
|
||||
if self.search_type == "similarity":
|
||||
docs = self.vectorstore.similarity_search(query, k=self.k)
|
||||
else:
|
||||
raise ValueError(f"search_type of {self.search_type} not allowed.")
|
||||
return docs
|
||||
|
||||
async def aget_relevant_documents(self, query: str) -> List[Document]:
|
||||
raise NotImplementedError(
|
||||
"SingleStoreDBVectorStoreRetriever does not support async"
|
||||
)
|
@ -0,0 +1,142 @@
|
||||
"""Test SingleStoreDB functionality."""
|
||||
from typing import List
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.vectorstores.singlestoredb import SingleStoreDB
|
||||
from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings
|
||||
|
||||
TEST_SINGLESTOREDB_URL = "root:pass@localhost:3306/db"
|
||||
TEST_SINGLE_RESULT = [Document(page_content="foo")]
|
||||
TEST_SINGLE_WITH_METADATA_RESULT = [Document(page_content="foo", metadata={"a": "b"})]
|
||||
TEST_RESULT = [Document(page_content="foo"), Document(page_content="foo")]
|
||||
|
||||
try:
|
||||
import singlestoredb as s2
|
||||
|
||||
singlestoredb_installed = True
|
||||
except ImportError:
|
||||
singlestoredb_installed = False
|
||||
|
||||
|
||||
def drop(table_name: str) -> None:
|
||||
with s2.connect(TEST_SINGLESTOREDB_URL) as conn:
|
||||
conn.autocommit(True)
|
||||
with conn.cursor() as cursor:
|
||||
cursor.execute(f"DROP TABLE IF EXISTS {table_name};")
|
||||
|
||||
|
||||
class NormilizedFakeEmbeddings(FakeEmbeddings):
|
||||
"""Fake embeddings with normalization. For testing purposes."""
|
||||
|
||||
def normalize(self, vector: List[float]) -> List[float]:
|
||||
"""Normalize vector."""
|
||||
return [float(v / np.linalg.norm(vector)) for v in vector]
|
||||
|
||||
def embed_documents(self, texts: List[str]) -> List[List[float]]:
|
||||
return [self.normalize(v) for v in super().embed_documents(texts)]
|
||||
|
||||
def embed_query(self, text: str) -> List[float]:
|
||||
return self.normalize(super().embed_query(text))
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def texts() -> List[str]:
|
||||
return ["foo", "bar", "baz"]
|
||||
|
||||
|
||||
@pytest.mark.skipif(not singlestoredb_installed, reason="singlestoredb not installed")
|
||||
def test_singlestoredb(texts: List[str]) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
table_name = "test_singlestoredb"
|
||||
drop(table_name)
|
||||
docsearch = SingleStoreDB.from_texts(
|
||||
texts,
|
||||
NormilizedFakeEmbeddings(),
|
||||
table_name=table_name,
|
||||
host=TEST_SINGLESTOREDB_URL,
|
||||
)
|
||||
output = docsearch.similarity_search("foo", k=1)
|
||||
assert output == TEST_SINGLE_RESULT
|
||||
drop(table_name)
|
||||
|
||||
|
||||
@pytest.mark.skipif(not singlestoredb_installed, reason="singlestoredb not installed")
|
||||
def test_singlestoredb_new_vector(texts: List[str]) -> None:
|
||||
"""Test adding a new document"""
|
||||
table_name = "test_singlestoredb_new_vector"
|
||||
drop(table_name)
|
||||
docsearch = SingleStoreDB.from_texts(
|
||||
texts,
|
||||
NormilizedFakeEmbeddings(),
|
||||
table_name=table_name,
|
||||
host=TEST_SINGLESTOREDB_URL,
|
||||
)
|
||||
docsearch.add_texts(["foo"])
|
||||
output = docsearch.similarity_search("foo", k=2)
|
||||
assert output == TEST_RESULT
|
||||
drop(table_name)
|
||||
|
||||
|
||||
@pytest.mark.skipif(not singlestoredb_installed, reason="singlestoredb not installed")
|
||||
def test_singlestoredb_from_existing(texts: List[str]) -> None:
|
||||
"""Test adding a new document"""
|
||||
table_name = "test_singlestoredb_from_existing"
|
||||
drop(table_name)
|
||||
SingleStoreDB.from_texts(
|
||||
texts,
|
||||
NormilizedFakeEmbeddings(),
|
||||
table_name=table_name,
|
||||
host=TEST_SINGLESTOREDB_URL,
|
||||
)
|
||||
# Test creating from an existing
|
||||
docsearch2 = SingleStoreDB(
|
||||
NormilizedFakeEmbeddings(),
|
||||
table_name="test_singlestoredb_from_existing",
|
||||
host=TEST_SINGLESTOREDB_URL,
|
||||
)
|
||||
output = docsearch2.similarity_search("foo", k=1)
|
||||
assert output == TEST_SINGLE_RESULT
|
||||
drop(table_name)
|
||||
|
||||
|
||||
@pytest.mark.skipif(not singlestoredb_installed, reason="singlestoredb not installed")
|
||||
def test_singlestoredb_from_documents(texts: List[str]) -> None:
|
||||
"""Test from_documents constructor."""
|
||||
table_name = "test_singlestoredb_from_documents"
|
||||
drop(table_name)
|
||||
docs = [Document(page_content=t, metadata={"a": "b"}) for t in texts]
|
||||
docsearch = SingleStoreDB.from_documents(
|
||||
docs,
|
||||
NormilizedFakeEmbeddings(),
|
||||
table_name=table_name,
|
||||
host=TEST_SINGLESTOREDB_URL,
|
||||
)
|
||||
output = docsearch.similarity_search("foo", k=1)
|
||||
assert output == TEST_SINGLE_WITH_METADATA_RESULT
|
||||
drop(table_name)
|
||||
|
||||
|
||||
@pytest.mark.skipif(not singlestoredb_installed, reason="singlestoredb not installed")
|
||||
def test_singlestoredb_add_texts_to_existing(texts: List[str]) -> None:
|
||||
"""Test adding a new document"""
|
||||
table_name = "test_singlestoredb_add_texts_to_existing"
|
||||
drop(table_name)
|
||||
# Test creating from an existing
|
||||
SingleStoreDB.from_texts(
|
||||
texts,
|
||||
NormilizedFakeEmbeddings(),
|
||||
table_name=table_name,
|
||||
host=TEST_SINGLESTOREDB_URL,
|
||||
)
|
||||
docsearch = SingleStoreDB(
|
||||
NormilizedFakeEmbeddings(),
|
||||
table_name=table_name,
|
||||
host=TEST_SINGLESTOREDB_URL,
|
||||
)
|
||||
docsearch.add_texts(["foo"])
|
||||
output = docsearch.similarity_search("foo", k=2)
|
||||
assert output == TEST_RESULT
|
||||
drop(table_name)
|
Loading…
Reference in New Issue