"""Test Hologres functionality.""" import os from typing import List from langchain_core.documents import Document from langchain_community.vectorstores.hologres import Hologres from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings CONNECTION_STRING = Hologres.connection_string_from_db_params( host=os.environ.get("TEST_HOLOGRES_HOST", "localhost"), port=int(os.environ.get("TEST_HOLOGRES_PORT", "80")), database=os.environ.get("TEST_HOLOGRES_DATABASE", "postgres"), user=os.environ.get("TEST_HOLOGRES_USER", "postgres"), password=os.environ.get("TEST_HOLOGRES_PASSWORD", "postgres"), ) ADA_TOKEN_COUNT = 1536 class FakeEmbeddingsWithAdaDimension(FakeEmbeddings): """Fake embeddings functionality for testing.""" def embed_documents(self, texts: List[str]) -> List[List[float]]: """Return simple embeddings.""" return [ [float(1.0)] * (ADA_TOKEN_COUNT - 1) + [float(i)] for i in range(len(texts)) ] def embed_query(self, text: str) -> List[float]: """Return simple embeddings.""" return [float(1.0)] * (ADA_TOKEN_COUNT - 1) + [float(0.0)] def test_hologres() -> None: """Test end to end construction and search.""" texts = ["foo", "bar", "baz"] docsearch = Hologres.from_texts( texts=texts, table_name="test_table", embedding=FakeEmbeddingsWithAdaDimension(), connection_string=CONNECTION_STRING, pre_delete_table=True, ) output = docsearch.similarity_search("foo", k=1) assert output == [Document(page_content="foo")] def test_hologres_embeddings() -> None: """Test end to end construction with embeddings and search.""" texts = ["foo", "bar", "baz"] text_embeddings = FakeEmbeddingsWithAdaDimension().embed_documents(texts) text_embedding_pairs = list(zip(texts, text_embeddings)) docsearch = Hologres.from_embeddings( text_embeddings=text_embedding_pairs, table_name="test_table", embedding=FakeEmbeddingsWithAdaDimension(), connection_string=CONNECTION_STRING, pre_delete_table=True, ) output = docsearch.similarity_search("foo", k=1) assert output == [Document(page_content="foo")] def test_hologres_with_metadatas() -> None: """Test end to end construction and search.""" texts = ["foo", "bar", "baz"] metadatas = [{"page": str(i)} for i in range(len(texts))] docsearch = Hologres.from_texts( texts=texts, table_name="test_table", embedding=FakeEmbeddingsWithAdaDimension(), metadatas=metadatas, connection_string=CONNECTION_STRING, pre_delete_table=True, ) output = docsearch.similarity_search("foo", k=1) assert output == [Document(page_content="foo", metadata={"page": "0"})] def test_hologres_with_metadatas_with_scores() -> None: """Test end to end construction and search.""" texts = ["foo", "bar", "baz"] metadatas = [{"page": str(i)} for i in range(len(texts))] docsearch = Hologres.from_texts( texts=texts, table_name="test_table", embedding=FakeEmbeddingsWithAdaDimension(), metadatas=metadatas, connection_string=CONNECTION_STRING, pre_delete_table=True, ) output = docsearch.similarity_search_with_score("foo", k=1) assert output == [(Document(page_content="foo", metadata={"page": "0"}), 0.0)] def test_hologres_with_filter_match() -> None: """Test end to end construction and search.""" texts = ["foo", "bar", "baz"] metadatas = [{"page": str(i)} for i in range(len(texts))] docsearch = Hologres.from_texts( texts=texts, table_name="test_table_filter", embedding=FakeEmbeddingsWithAdaDimension(), metadatas=metadatas, connection_string=CONNECTION_STRING, pre_delete_table=True, ) output = docsearch.similarity_search_with_score("foo", k=1, filter={"page": "0"}) assert output == [(Document(page_content="foo", metadata={"page": "0"}), 0.0)] def test_hologres_with_filter_distant_match() -> None: """Test end to end construction and search.""" texts = ["foo", "bar", "baz"] metadatas = [{"page": str(i)} for i in range(len(texts))] docsearch = Hologres.from_texts( texts=texts, table_name="test_table_filter", embedding=FakeEmbeddingsWithAdaDimension(), metadatas=metadatas, connection_string=CONNECTION_STRING, pre_delete_table=True, ) output = docsearch.similarity_search_with_score("foo", k=1, filter={"page": "2"}) assert output == [(Document(page_content="baz", metadata={"page": "2"}), 4.0)] def test_hologres_with_filter_no_match() -> None: """Test end to end construction and search.""" texts = ["foo", "bar", "baz"] metadatas = [{"page": str(i)} for i in range(len(texts))] docsearch = Hologres.from_texts( texts=texts, table_name="test_table_filter", embedding=FakeEmbeddingsWithAdaDimension(), metadatas=metadatas, connection_string=CONNECTION_STRING, pre_delete_table=True, ) output = docsearch.similarity_search_with_score("foo", k=1, filter={"page": "5"}) assert output == []