From 54fa78c887c790bdcd90ab85649bc110a3542517 Mon Sep 17 00:00:00 2001 From: Spencer Kelly Date: Mon, 12 Feb 2024 14:52:57 -0800 Subject: [PATCH] community[patch]: fixed vector similarity filtering (#16967) **Description:** changed filtering so that failed filter doesn't add document to results. Currently filtering is entirely broken and all documents are returned whether or not they pass the filter. fixes issue introduced in https://github.com/langchain-ai/langchain/pull/16190 --- .../langchain_community/vectorstores/faiss.py | 5 +++-- .../tests/unit_tests/vectorstores/test_faiss.py | 10 ++++++---- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/libs/community/langchain_community/vectorstores/faiss.py b/libs/community/langchain_community/vectorstores/faiss.py index 06f28d385c..1617a8c36b 100644 --- a/libs/community/langchain_community/vectorstores/faiss.py +++ b/libs/community/langchain_community/vectorstores/faiss.py @@ -333,8 +333,9 @@ class FAISS(VectorStore): doc = self.docstore.search(_id) if not isinstance(doc, Document): raise ValueError(f"Could not find document for id {_id}, got {doc}") - if filter is not None and filter_func(doc.metadata): - docs.append((doc, scores[0][j])) + if filter is not None: + if filter_func(doc.metadata): + docs.append((doc, scores[0][j])) else: docs.append((doc, scores[0][j])) diff --git a/libs/community/tests/unit_tests/vectorstores/test_faiss.py b/libs/community/tests/unit_tests/vectorstores/test_faiss.py index 375fa00eb6..cedecc8ada 100644 --- a/libs/community/tests/unit_tests/vectorstores/test_faiss.py +++ b/libs/community/tests/unit_tests/vectorstores/test_faiss.py @@ -438,8 +438,9 @@ def test_faiss_with_metadatas_and_filter() -> None: ) assert docsearch.docstore.__dict__ == expected_docstore.__dict__ output = docsearch.similarity_search("foo", k=1, filter={"page": 1}) - assert output == [Document(page_content="foo", metadata={"page": 0})] - assert output != [Document(page_content="bar", metadata={"page": 1})] + # make sure it returns the result that matches the filter. + # Not the one who's text matches better. + assert output == [Document(page_content="bar", metadata={"page": 1})] assert output == docsearch.similarity_search( "foo", k=1, filter=lambda di: di["page"] == 1 ) @@ -465,8 +466,9 @@ async def test_faiss_async_with_metadatas_and_filter() -> None: ) assert docsearch.docstore.__dict__ == expected_docstore.__dict__ output = await docsearch.asimilarity_search("foo", k=1, filter={"page": 1}) - assert output == [Document(page_content="foo", metadata={"page": 0})] - assert output != [Document(page_content="bar", metadata={"page": 1})] + # make sure it returns the result that matches the filter. + # Not the one who's text matches better. + assert output == [Document(page_content="bar", metadata={"page": 1})] assert output == await docsearch.asimilarity_search( "foo", k=1, filter=lambda di: di["page"] == 1 )