diff --git a/tests/integration_tests/vectorstores/test_faiss.py b/tests/integration_tests/vectorstores/test_faiss.py index e19f6e08b5..98f6384cd1 100644 --- a/tests/integration_tests/vectorstores/test_faiss.py +++ b/tests/integration_tests/vectorstores/test_faiss.py @@ -47,6 +47,25 @@ def test_faiss_vector_sim() -> None: assert output == [Document(page_content="foo")] +def test_similarity_search_with_score_by_vector() -> None: + """Test vector similarity with score by vector.""" + texts = ["foo", "bar", "baz"] + docsearch = FAISS.from_texts(texts, FakeEmbeddings()) + index_to_id = docsearch.index_to_docstore_id + expected_docstore = InMemoryDocstore( + { + index_to_id[0]: Document(page_content="foo"), + index_to_id[1]: Document(page_content="bar"), + index_to_id[2]: Document(page_content="baz"), + } + ) + assert docsearch.docstore.__dict__ == expected_docstore.__dict__ + query_vec = FakeEmbeddings().embed_query(text="foo") + output = docsearch.similarity_search_with_score_by_vector(query_vec, k=1) + assert len(output) == 1 + assert output[0][0] == Document(page_content="foo") + + def test_faiss_mmr() -> None: texts = ["foo", "foo", "fou", "foy"] docsearch = FAISS.from_texts(texts, FakeEmbeddings()) @@ -61,6 +80,48 @@ def test_faiss_mmr() -> None: assert output[1][0] != Document(page_content="foo") +def test_faiss_mmr_with_metadatas() -> None: + texts = ["foo", "foo", "fou", "foy"] + metadatas = [{"page": i} for i in range(len(texts))] + docsearch = FAISS.from_texts(texts, FakeEmbeddings(), metadatas=metadatas) + query_vec = FakeEmbeddings().embed_query(text="foo") + output = docsearch.max_marginal_relevance_search_with_score_by_vector( + query_vec, k=10, lambda_mult=0.1 + ) + assert len(output) == len(texts) + assert output[0][0] == Document(page_content="foo", metadata={"page": 0}) + assert output[0][1] == 0.0 + assert output[1][0] != Document(page_content="foo", metadata={"page": 0}) + + +def test_faiss_mmr_with_metadatas_and_filter() -> None: + texts = ["foo", "foo", "fou", "foy"] + metadatas = [{"page": i} for i in range(len(texts))] + docsearch = FAISS.from_texts(texts, FakeEmbeddings(), metadatas=metadatas) + query_vec = FakeEmbeddings().embed_query(text="foo") + output = docsearch.max_marginal_relevance_search_with_score_by_vector( + query_vec, k=10, lambda_mult=0.1, filter={"page": 1} + ) + assert len(output) == len(texts) + assert output[0][0] == Document(page_content="foo", metadata={"page": 1}) + assert output[0][1] == 0.0 + assert output[1][0] != Document(page_content="foo", metadata={"page": 1}) + + +def test_faiss_mmr_with_metadatas_and_list_filter() -> None: + texts = ["foo", "foo", "fou", "foy"] + metadatas = [{"page": i} if i <= 3 else {"page": 3} for i in range(len(texts))] + docsearch = FAISS.from_texts(texts, FakeEmbeddings(), metadatas=metadatas) + query_vec = FakeEmbeddings().embed_query(text="foo") + output = docsearch.max_marginal_relevance_search_with_score_by_vector( + query_vec, k=10, lambda_mult=0.1, filter={"page": [0, 1, 2]} + ) + assert len(output) == len(texts) + assert output[0][0] == Document(page_content="foo", metadata={"page": 0}) + assert output[0][1] == 0.0 + assert output[1][0] != Document(page_content="foo", metadata={"page": 0}) + + def test_faiss_with_metadatas() -> None: """Test end to end construction and search.""" texts = ["foo", "bar", "baz"]