community[patch]: fixed vector similarity filtering (#16967)

**Description:** changed filtering so that failed filter doesn't add
document to results. Currently filtering is entirely broken and all
documents are returned whether or not they pass the filter.

fixes issue introduced in
https://github.com/langchain-ai/langchain/pull/16190
pull/17435/head
Spencer Kelly 5 months ago committed by GitHub
parent a23c719c8b
commit 54fa78c887
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -333,8 +333,9 @@ class FAISS(VectorStore):
doc = self.docstore.search(_id)
if not isinstance(doc, Document):
raise ValueError(f"Could not find document for id {_id}, got {doc}")
if filter is not None and filter_func(doc.metadata):
docs.append((doc, scores[0][j]))
if filter is not None:
if filter_func(doc.metadata):
docs.append((doc, scores[0][j]))
else:
docs.append((doc, scores[0][j]))

@ -438,8 +438,9 @@ def test_faiss_with_metadatas_and_filter() -> None:
)
assert docsearch.docstore.__dict__ == expected_docstore.__dict__
output = docsearch.similarity_search("foo", k=1, filter={"page": 1})
assert output == [Document(page_content="foo", metadata={"page": 0})]
assert output != [Document(page_content="bar", metadata={"page": 1})]
# make sure it returns the result that matches the filter.
# Not the one who's text matches better.
assert output == [Document(page_content="bar", metadata={"page": 1})]
assert output == docsearch.similarity_search(
"foo", k=1, filter=lambda di: di["page"] == 1
)
@ -465,8 +466,9 @@ async def test_faiss_async_with_metadatas_and_filter() -> None:
)
assert docsearch.docstore.__dict__ == expected_docstore.__dict__
output = await docsearch.asimilarity_search("foo", k=1, filter={"page": 1})
assert output == [Document(page_content="foo", metadata={"page": 0})]
assert output != [Document(page_content="bar", metadata={"page": 1})]
# make sure it returns the result that matches the filter.
# Not the one who's text matches better.
assert output == [Document(page_content="bar", metadata={"page": 1})]
assert output == await docsearch.asimilarity_search(
"foo", k=1, filter=lambda di: di["page"] == 1
)

Loading…
Cancel
Save