diff --git a/libs/langchain/langchain/vectorstores/faiss.py b/libs/langchain/langchain/vectorstores/faiss.py index e59aa73178..6a23fc8ae8 100644 --- a/libs/langchain/langchain/vectorstores/faiss.py +++ b/libs/langchain/langchain/vectorstores/faiss.py @@ -370,7 +370,12 @@ class FAISS(VectorStore): doc = self.docstore.search(_id) if not isinstance(doc, Document): raise ValueError(f"Could not find document for id {_id}, got {doc}") - if all(doc.metadata.get(key) == value for key, value in filter.items()): + if all( + doc.metadata.get(key) in value + if isinstance(value, list) + else doc.metadata.get(key) == value + for key, value in filter.items() + ): filtered_indices.append(i) indices = np.array([filtered_indices]) # -1 happens when not enough docs are returned. diff --git a/libs/langchain/tests/integration_tests/vectorstores/test_faiss.py b/libs/langchain/tests/integration_tests/vectorstores/test_faiss.py index 98f6384cd1..b1df949045 100644 --- a/libs/langchain/tests/integration_tests/vectorstores/test_faiss.py +++ b/libs/langchain/tests/integration_tests/vectorstores/test_faiss.py @@ -47,6 +47,24 @@ def test_faiss_vector_sim() -> None: assert output == [Document(page_content="foo")] +def test_faiss_vector_sim_with_score_threshold() -> None: + """Test vector similarity.""" + texts = ["foo", "bar", "baz"] + docsearch = FAISS.from_texts(texts, FakeEmbeddings()) + index_to_id = docsearch.index_to_docstore_id + expected_docstore = InMemoryDocstore( + { + index_to_id[0]: Document(page_content="foo"), + index_to_id[1]: Document(page_content="bar"), + index_to_id[2]: Document(page_content="baz"), + } + ) + assert docsearch.docstore.__dict__ == expected_docstore.__dict__ + query_vec = FakeEmbeddings().embed_query(text="foo") + output = docsearch.similarity_search_by_vector(query_vec, k=2, score_threshold=0.2) + assert output == [Document(page_content="foo")] + + def test_similarity_search_with_score_by_vector() -> None: """Test vector similarity with score by vector.""" texts = ["foo", "bar", "baz"] @@ -66,6 +84,30 @@ def test_similarity_search_with_score_by_vector() -> None: assert output[0][0] == Document(page_content="foo") +def test_similarity_search_with_score_by_vector_with_score_threshold() -> None: + """Test vector similarity with score by vector.""" + texts = ["foo", "bar", "baz"] + docsearch = FAISS.from_texts(texts, FakeEmbeddings()) + index_to_id = docsearch.index_to_docstore_id + expected_docstore = InMemoryDocstore( + { + index_to_id[0]: Document(page_content="foo"), + index_to_id[1]: Document(page_content="bar"), + index_to_id[2]: Document(page_content="baz"), + } + ) + assert docsearch.docstore.__dict__ == expected_docstore.__dict__ + query_vec = FakeEmbeddings().embed_query(text="foo") + output = docsearch.similarity_search_with_score_by_vector( + query_vec, + k=2, + score_threshold=0.2, + ) + assert len(output) == 1 + assert output[0][0] == Document(page_content="foo") + assert output[0][1] < 0.2 + + def test_faiss_mmr() -> None: texts = ["foo", "foo", "fou", "foy"] docsearch = FAISS.from_texts(texts, FakeEmbeddings()) @@ -102,10 +144,9 @@ def test_faiss_mmr_with_metadatas_and_filter() -> None: output = docsearch.max_marginal_relevance_search_with_score_by_vector( query_vec, k=10, lambda_mult=0.1, filter={"page": 1} ) - assert len(output) == len(texts) + assert len(output) == 1 assert output[0][0] == Document(page_content="foo", metadata={"page": 1}) assert output[0][1] == 0.0 - assert output[1][0] != Document(page_content="foo", metadata={"page": 1}) def test_faiss_mmr_with_metadatas_and_list_filter() -> None: @@ -116,7 +157,7 @@ def test_faiss_mmr_with_metadatas_and_list_filter() -> None: output = docsearch.max_marginal_relevance_search_with_score_by_vector( query_vec, k=10, lambda_mult=0.1, filter={"page": [0, 1, 2]} ) - assert len(output) == len(texts) + assert len(output) == 3 assert output[0][0] == Document(page_content="foo", metadata={"page": 0}) assert output[0][1] == 0.0 assert output[1][0] != Document(page_content="foo", metadata={"page": 0})