mirror of
https://github.com/hwchase17/langchain
synced 2024-10-29 17:07:25 +00:00
cab7d86f23
Hi, there This pull request contains two commit: **1. Implement delete interface with optional ids parameter on AnalyticDB.** **2. Allow customization of database connection behavior by exposing engine_args parameter in interfaces.** - This commit adds the `engine_args` parameter to the interfaces, allowing users to customize the behavior of the database connection. The `engine_args` parameter accepts a dictionary of additional arguments that will be passed to the create_engine function. Users can now modify various aspects of the database connection, such as connection pool size and recycle time. This enhancement provides more flexibility and control to users when interacting with the database through the exposed interfaces. This commit is related to VectorStores @rlancemartin @eyurtsev Thank you for your attention and consideration.
167 lines
6.0 KiB
Python
167 lines
6.0 KiB
Python
"""Test PGVector functionality."""
|
|
import os
|
|
from typing import List
|
|
|
|
from langchain.docstore.document import Document
|
|
from langchain.vectorstores.analyticdb import AnalyticDB
|
|
from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings
|
|
|
|
CONNECTION_STRING = AnalyticDB.connection_string_from_db_params(
|
|
driver=os.environ.get("PG_DRIVER", "psycopg2cffi"),
|
|
host=os.environ.get("PG_HOST", "localhost"),
|
|
port=int(os.environ.get("PG_PORT", "5432")),
|
|
database=os.environ.get("PG_DATABASE", "postgres"),
|
|
user=os.environ.get("PG_USER", "postgres"),
|
|
password=os.environ.get("PG_PASSWORD", "postgres"),
|
|
)
|
|
|
|
|
|
ADA_TOKEN_COUNT = 1536
|
|
|
|
|
|
class FakeEmbeddingsWithAdaDimension(FakeEmbeddings):
|
|
"""Fake embeddings functionality for testing."""
|
|
|
|
def embed_documents(self, texts: List[str]) -> List[List[float]]:
|
|
"""Return simple embeddings."""
|
|
return [
|
|
[float(1.0)] * (ADA_TOKEN_COUNT - 1) + [float(i)] for i in range(len(texts))
|
|
]
|
|
|
|
def embed_query(self, text: str) -> List[float]:
|
|
"""Return simple embeddings."""
|
|
return [float(1.0)] * (ADA_TOKEN_COUNT - 1) + [float(0.0)]
|
|
|
|
|
|
def test_analyticdb() -> None:
|
|
"""Test end to end construction and search."""
|
|
texts = ["foo", "bar", "baz"]
|
|
docsearch = AnalyticDB.from_texts(
|
|
texts=texts,
|
|
collection_name="test_collection",
|
|
embedding=FakeEmbeddingsWithAdaDimension(),
|
|
connection_string=CONNECTION_STRING,
|
|
pre_delete_collection=True,
|
|
)
|
|
output = docsearch.similarity_search("foo", k=1)
|
|
assert output == [Document(page_content="foo")]
|
|
|
|
|
|
def test_analyticdb_with_engine_args() -> None:
|
|
engine_args = {"pool_recycle": 3600, "pool_size": 50}
|
|
"""Test end to end construction and search."""
|
|
texts = ["foo", "bar", "baz"]
|
|
docsearch = AnalyticDB.from_texts(
|
|
texts=texts,
|
|
collection_name="test_collection",
|
|
embedding=FakeEmbeddingsWithAdaDimension(),
|
|
connection_string=CONNECTION_STRING,
|
|
pre_delete_collection=True,
|
|
engine_args=engine_args,
|
|
)
|
|
output = docsearch.similarity_search("foo", k=1)
|
|
assert output == [Document(page_content="foo")]
|
|
|
|
|
|
def test_analyticdb_with_metadatas() -> None:
|
|
"""Test end to end construction and search."""
|
|
texts = ["foo", "bar", "baz"]
|
|
metadatas = [{"page": str(i)} for i in range(len(texts))]
|
|
docsearch = AnalyticDB.from_texts(
|
|
texts=texts,
|
|
collection_name="test_collection",
|
|
embedding=FakeEmbeddingsWithAdaDimension(),
|
|
metadatas=metadatas,
|
|
connection_string=CONNECTION_STRING,
|
|
pre_delete_collection=True,
|
|
)
|
|
output = docsearch.similarity_search("foo", k=1)
|
|
assert output == [Document(page_content="foo", metadata={"page": "0"})]
|
|
|
|
|
|
def test_analyticdb_with_metadatas_with_scores() -> None:
|
|
"""Test end to end construction and search."""
|
|
texts = ["foo", "bar", "baz"]
|
|
metadatas = [{"page": str(i)} for i in range(len(texts))]
|
|
docsearch = AnalyticDB.from_texts(
|
|
texts=texts,
|
|
collection_name="test_collection",
|
|
embedding=FakeEmbeddingsWithAdaDimension(),
|
|
metadatas=metadatas,
|
|
connection_string=CONNECTION_STRING,
|
|
pre_delete_collection=True,
|
|
)
|
|
output = docsearch.similarity_search_with_score("foo", k=1)
|
|
assert output == [(Document(page_content="foo", metadata={"page": "0"}), 0.0)]
|
|
|
|
|
|
def test_analyticdb_with_filter_match() -> None:
|
|
"""Test end to end construction and search."""
|
|
texts = ["foo", "bar", "baz"]
|
|
metadatas = [{"page": str(i)} for i in range(len(texts))]
|
|
docsearch = AnalyticDB.from_texts(
|
|
texts=texts,
|
|
collection_name="test_collection_filter",
|
|
embedding=FakeEmbeddingsWithAdaDimension(),
|
|
metadatas=metadatas,
|
|
connection_string=CONNECTION_STRING,
|
|
pre_delete_collection=True,
|
|
)
|
|
output = docsearch.similarity_search_with_score("foo", k=1, filter={"page": "0"})
|
|
assert output == [(Document(page_content="foo", metadata={"page": "0"}), 0.0)]
|
|
|
|
|
|
def test_analyticdb_with_filter_distant_match() -> None:
|
|
"""Test end to end construction and search."""
|
|
texts = ["foo", "bar", "baz"]
|
|
metadatas = [{"page": str(i)} for i in range(len(texts))]
|
|
docsearch = AnalyticDB.from_texts(
|
|
texts=texts,
|
|
collection_name="test_collection_filter",
|
|
embedding=FakeEmbeddingsWithAdaDimension(),
|
|
metadatas=metadatas,
|
|
connection_string=CONNECTION_STRING,
|
|
pre_delete_collection=True,
|
|
)
|
|
output = docsearch.similarity_search_with_score("foo", k=1, filter={"page": "2"})
|
|
print(output)
|
|
assert output == [(Document(page_content="baz", metadata={"page": "2"}), 4.0)]
|
|
|
|
|
|
def test_analyticdb_with_filter_no_match() -> None:
|
|
"""Test end to end construction and search."""
|
|
texts = ["foo", "bar", "baz"]
|
|
metadatas = [{"page": str(i)} for i in range(len(texts))]
|
|
docsearch = AnalyticDB.from_texts(
|
|
texts=texts,
|
|
collection_name="test_collection_filter",
|
|
embedding=FakeEmbeddingsWithAdaDimension(),
|
|
metadatas=metadatas,
|
|
connection_string=CONNECTION_STRING,
|
|
pre_delete_collection=True,
|
|
)
|
|
output = docsearch.similarity_search_with_score("foo", k=1, filter={"page": "5"})
|
|
assert output == []
|
|
|
|
|
|
def test_analyticdb_delete() -> None:
|
|
"""Test end to end construction and search."""
|
|
texts = ["foo", "bar", "baz"]
|
|
ids = ["fooid", "barid", "bazid"]
|
|
metadatas = [{"page": str(i)} for i in range(len(texts))]
|
|
docsearch = AnalyticDB.from_texts(
|
|
texts=texts,
|
|
collection_name="test_collection_delete",
|
|
embedding=FakeEmbeddingsWithAdaDimension(),
|
|
metadatas=metadatas,
|
|
connection_string=CONNECTION_STRING,
|
|
ids=ids,
|
|
pre_delete_collection=True,
|
|
)
|
|
output = docsearch.similarity_search_with_score("foo", k=1, filter={"page": "2"})
|
|
print(output)
|
|
assert output == [(Document(page_content="baz", metadata={"page": "2"}), 4.0)]
|
|
docsearch.delete(ids=ids)
|
|
output = docsearch.similarity_search_with_score("foo", k=1, filter={"page": "2"})
|
|
assert output == []
|