community[patch]: Fix pgvector deprecated filter clause usage with OR and AND conditions (#20446)

**Description**: Support filter by OR and AND for deprecated PGVector
version
**Issue**: #20445 
**Dependencies**: N/A
**Twitter** handle: @martinferenaz
pull/20220/head
Martín Gotelli Ferenaz 3 months ago committed by GitHub
parent c50099161b
commit b48add4353
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -795,13 +795,13 @@ class PGVector(VectorStore):
)
elif OR in map(str.lower, value):
or_clauses = [
self._create_filter_clause(key, sub_value)
self._create_filter_clause_deprecated(key, sub_value)
for sub_value in value_case_insensitive[OR]
]
filter_by_metadata = sqlalchemy.or_(*or_clauses)
elif AND in map(str.lower, value):
and_clauses = [
self._create_filter_clause(key, sub_value)
self._create_filter_clause_deprecated(key, sub_value)
for sub_value in value_case_insensitive[AND]
]
filter_by_metadata = sqlalchemy.and_(*and_clauses)

@ -227,6 +227,45 @@ def test_pgvector_with_filter_nin_set() -> None:
]
def test_pg_vector_with_or_filter() -> None:
"""Test end to end construction and search with specific OR filter."""
texts = ["foo", "bar", "baz"]
metadatas = [{"page": str(i)} for i in range(len(texts))]
docsearch = PGVector.from_texts(
texts=texts,
collection_name="test_collection_filter",
embedding=FakeEmbeddingsWithAdaDimension(),
metadatas=metadatas,
connection_string=CONNECTION_STRING,
pre_delete_collection=True,
)
output = docsearch.similarity_search_with_score(
"foo", k=3, filter={"page": {"OR": [{"EQ": "0"}, {"EQ": "2"}]}}
)
assert output == [
(Document(page_content="foo", metadata={"page": "0"}), 0.0),
(Document(page_content="baz", metadata={"page": "2"}), 0.0013003906671379406),
]
def test_pg_vector_with_and_filter() -> None:
"""Test end to end construction and search with specific AND filter."""
texts = ["foo", "bar", "baz"]
metadatas = [{"page": str(i)} for i in range(len(texts))]
docsearch = PGVector.from_texts(
texts=texts,
collection_name="test_collection_filter",
embedding=FakeEmbeddingsWithAdaDimension(),
metadatas=metadatas,
connection_string=CONNECTION_STRING,
pre_delete_collection=True,
)
output = docsearch.similarity_search_with_score(
"foo", k=3, filter={"page": {"AND": [{"IN": ["0", "1"]}, {"NIN": ["1"]}]}}
)
assert output == [(Document(page_content="foo", metadata={"page": "0"}), 0.0)]
def test_pgvector_delete_docs() -> None:
"""Add and delete documents."""
texts = ["foo", "bar", "baz"]

Loading…
Cancel
Save