Feature: Qdrant filters supports (#5446)

# Support Qdrant filters

Qdrant has an [extensive filtering
system](https://qdrant.tech/documentation/concepts/filtering/) with rich
type support. This PR makes it possible to use the filters in Langchain
by passing an additional param to both the
`similarity_search_with_score` and `similarity_search` methods.

## Who can review?

@dev2049 @hwchase17

---------

Co-authored-by: Dev 2049 <dev.dev2049@gmail.com>
searx_updates
Kacper Łukawski 1 year ago committed by GitHub
parent f72bb966f8
commit 8bcaca435a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -399,6 +399,31 @@
"print(f\"\\nScore: {score}\")" "print(f\"\\nScore: {score}\")"
] ]
}, },
{
"cell_type": "markdown",
"source": [
"### Metadata filtering\n",
"\n",
"Qdrant has an [extensive filtering system](https://qdrant.tech/documentation/concepts/filtering/) with rich type support. It is also possible to use the filters in Langchain, by passing an additional param to both the `similarity_search_with_score` and `similarity_search` methods."
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "markdown",
"source": [
"```python\n",
"from qdrant_client.http import models as rest\n",
"\n",
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
"found_docs = qdrant.similarity_search_with_score(query, filter=rest.Filter(...))\n",
"```"
],
"metadata": {
"collapsed": false
}
},
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "c58c30bf", "id": "c58c30bf",

@ -27,10 +27,11 @@ from langchain.vectorstores import VectorStore
from langchain.vectorstores.utils import maximal_marginal_relevance from langchain.vectorstores.utils import maximal_marginal_relevance
if TYPE_CHECKING: if TYPE_CHECKING:
from qdrant_client.conversions import common_types
from qdrant_client.http import models as rest from qdrant_client.http import models as rest
DictFilter = Dict[str, Union[str, int, bool, dict, list]]
MetadataFilter = Dict[str, Union[str, int, bool, dict, list]] MetadataFilter = Union[DictFilter, common_types.Filter]
class Qdrant(VectorStore): class Qdrant(VectorStore):
@ -234,10 +235,21 @@ class Qdrant(VectorStore):
List of Documents most similar to the query and score for each. List of Documents most similar to the query and score for each.
""" """
if filter is not None and isinstance(filter, dict):
warnings.warn(
"Using dict as a `filter` is deprecated. Please use qdrant-client "
"filters directly: "
"https://qdrant.tech/documentation/concepts/filtering/",
DeprecationWarning,
)
qdrant_filter = self._qdrant_filter_from_dict(filter)
else:
qdrant_filter = filter
results = self.client.search( results = self.client.search(
collection_name=self.collection_name, collection_name=self.collection_name,
query_vector=self._embed_query(query), query_vector=self._embed_query(query),
query_filter=self._qdrant_filter_from_dict(filter), query_filter=qdrant_filter,
with_payload=True, with_payload=True,
limit=k, limit=k,
) )
@ -519,7 +531,7 @@ class Qdrant(VectorStore):
return out return out
def _qdrant_filter_from_dict( def _qdrant_filter_from_dict(
self, filter: Optional[MetadataFilter] self, filter: Optional[DictFilter]
) -> Optional[rest.Filter]: ) -> Optional[rest.Filter]:
from qdrant_client.http import models as rest from qdrant_client.http import models as rest

@ -306,7 +306,7 @@ extended_testing = [
"html2text", "html2text",
"py-trello", "py-trello",
"scikit-learn", "scikit-learn",
"pyspark", "pyspark"
] ]
[tool.ruff] [tool.ruff]

@ -2,6 +2,7 @@
from typing import Callable, Optional from typing import Callable, Optional
import pytest import pytest
from qdrant_client.http import models as rest
from langchain.docstore.document import Document from langchain.docstore.document import Document
from langchain.embeddings.base import Embeddings from langchain.embeddings.base import Embeddings
@ -129,6 +130,45 @@ def test_qdrant_similarity_search_filters(batch_size: int) -> None:
] ]
def test_qdrant_similarity_search_filters_with_qdrant_filters() -> None:
"""Test end to end construction and search."""
texts = ["foo", "bar", "baz"]
metadatas = [
{"page": i, "details": {"page": i + 1, "pages": [i + 2, -1]}}
for i in range(len(texts))
]
docsearch = Qdrant.from_texts(
texts,
ConsistentFakeEmbeddings(),
metadatas=metadatas,
location=":memory:",
)
qdrant_filter = rest.Filter(
must=[
rest.FieldCondition(
key="metadata.page",
match=rest.MatchValue(value=1),
),
rest.FieldCondition(
key="metadata.details.page",
match=rest.MatchValue(value=2),
),
rest.FieldCondition(
key="metadata.details.pages",
match=rest.MatchAny(any=[3]),
),
]
)
output = docsearch.similarity_search("foo", k=1, filter=qdrant_filter)
assert output == [
Document(
page_content="bar",
metadata={"page": 1, "details": {"page": 2, "pages": [3, -1]}},
)
]
@pytest.mark.parametrize("batch_size", [1, 64]) @pytest.mark.parametrize("batch_size", [1, 64])
@pytest.mark.parametrize( @pytest.mark.parametrize(
["content_payload_key", "metadata_payload_key"], ["content_payload_key", "metadata_payload_key"],

Loading…
Cancel
Save