community[patch]: fixed vector similarity filtering (#16967)

**Description:** changed filtering so that failed filter doesn't add
document to results. Currently filtering is entirely broken and all
documents are returned whether or not they pass the filter.

fixes issue introduced in
https://github.com/langchain-ai/langchain/pull/16190
pull/17435/head
Spencer Kelly 8 months ago committed by GitHub
parent a23c719c8b
commit 54fa78c887
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -333,8 +333,9 @@ class FAISS(VectorStore):
doc = self.docstore.search(_id) doc = self.docstore.search(_id)
if not isinstance(doc, Document): if not isinstance(doc, Document):
raise ValueError(f"Could not find document for id {_id}, got {doc}") raise ValueError(f"Could not find document for id {_id}, got {doc}")
if filter is not None and filter_func(doc.metadata): if filter is not None:
docs.append((doc, scores[0][j])) if filter_func(doc.metadata):
docs.append((doc, scores[0][j]))
else: else:
docs.append((doc, scores[0][j])) docs.append((doc, scores[0][j]))

@ -438,8 +438,9 @@ def test_faiss_with_metadatas_and_filter() -> None:
) )
assert docsearch.docstore.__dict__ == expected_docstore.__dict__ assert docsearch.docstore.__dict__ == expected_docstore.__dict__
output = docsearch.similarity_search("foo", k=1, filter={"page": 1}) output = docsearch.similarity_search("foo", k=1, filter={"page": 1})
assert output == [Document(page_content="foo", metadata={"page": 0})] # make sure it returns the result that matches the filter.
assert output != [Document(page_content="bar", metadata={"page": 1})] # Not the one who's text matches better.
assert output == [Document(page_content="bar", metadata={"page": 1})]
assert output == docsearch.similarity_search( assert output == docsearch.similarity_search(
"foo", k=1, filter=lambda di: di["page"] == 1 "foo", k=1, filter=lambda di: di["page"] == 1
) )
@ -465,8 +466,9 @@ async def test_faiss_async_with_metadatas_and_filter() -> None:
) )
assert docsearch.docstore.__dict__ == expected_docstore.__dict__ assert docsearch.docstore.__dict__ == expected_docstore.__dict__
output = await docsearch.asimilarity_search("foo", k=1, filter={"page": 1}) output = await docsearch.asimilarity_search("foo", k=1, filter={"page": 1})
assert output == [Document(page_content="foo", metadata={"page": 0})] # make sure it returns the result that matches the filter.
assert output != [Document(page_content="bar", metadata={"page": 1})] # Not the one who's text matches better.
assert output == [Document(page_content="bar", metadata={"page": 1})]
assert output == await docsearch.asimilarity_search( assert output == await docsearch.asimilarity_search(
"foo", k=1, filter=lambda di: di["page"] == 1 "foo", k=1, filter=lambda di: di["page"] == 1
) )

Loading…
Cancel
Save