Feature: Qdrant filters supports (#5446)

# Support Qdrant filters

Qdrant has an [extensive filtering
system](https://qdrant.tech/documentation/concepts/filtering/) with rich
type support. This PR makes it possible to use the filters in Langchain
by passing an additional param to both the
`similarity_search_with_score` and `similarity_search` methods.

## Who can review?

@dev2049 @hwchase17

---------

Co-authored-by: Dev 2049 <dev.dev2049@gmail.com>
searx_updates
Kacper Łukawski 12 months ago committed by GitHub
parent f72bb966f8
commit 8bcaca435a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -399,6 +399,31 @@
"print(f\"\\nScore: {score}\")"
]
},
{
"cell_type": "markdown",
"source": [
"### Metadata filtering\n",
"\n",
"Qdrant has an [extensive filtering system](https://qdrant.tech/documentation/concepts/filtering/) with rich type support. It is also possible to use the filters in Langchain, by passing an additional param to both the `similarity_search_with_score` and `similarity_search` methods."
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "markdown",
"source": [
"```python\n",
"from qdrant_client.http import models as rest\n",
"\n",
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
"found_docs = qdrant.similarity_search_with_score(query, filter=rest.Filter(...))\n",
"```"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "markdown",
"id": "c58c30bf",

@ -27,10 +27,11 @@ from langchain.vectorstores import VectorStore
from langchain.vectorstores.utils import maximal_marginal_relevance
if TYPE_CHECKING:
from qdrant_client.conversions import common_types
from qdrant_client.http import models as rest
MetadataFilter = Dict[str, Union[str, int, bool, dict, list]]
DictFilter = Dict[str, Union[str, int, bool, dict, list]]
MetadataFilter = Union[DictFilter, common_types.Filter]
class Qdrant(VectorStore):
@ -234,10 +235,21 @@ class Qdrant(VectorStore):
List of Documents most similar to the query and score for each.
"""
if filter is not None and isinstance(filter, dict):
warnings.warn(
"Using dict as a `filter` is deprecated. Please use qdrant-client "
"filters directly: "
"https://qdrant.tech/documentation/concepts/filtering/",
DeprecationWarning,
)
qdrant_filter = self._qdrant_filter_from_dict(filter)
else:
qdrant_filter = filter
results = self.client.search(
collection_name=self.collection_name,
query_vector=self._embed_query(query),
query_filter=self._qdrant_filter_from_dict(filter),
query_filter=qdrant_filter,
with_payload=True,
limit=k,
)
@ -519,7 +531,7 @@ class Qdrant(VectorStore):
return out
def _qdrant_filter_from_dict(
self, filter: Optional[MetadataFilter]
self, filter: Optional[DictFilter]
) -> Optional[rest.Filter]:
from qdrant_client.http import models as rest

@ -306,7 +306,7 @@ extended_testing = [
"html2text",
"py-trello",
"scikit-learn",
"pyspark",
"pyspark"
]
[tool.ruff]

@ -2,6 +2,7 @@
from typing import Callable, Optional
import pytest
from qdrant_client.http import models as rest
from langchain.docstore.document import Document
from langchain.embeddings.base import Embeddings
@ -129,6 +130,45 @@ def test_qdrant_similarity_search_filters(batch_size: int) -> None:
]
def test_qdrant_similarity_search_filters_with_qdrant_filters() -> None:
"""Test end to end construction and search."""
texts = ["foo", "bar", "baz"]
metadatas = [
{"page": i, "details": {"page": i + 1, "pages": [i + 2, -1]}}
for i in range(len(texts))
]
docsearch = Qdrant.from_texts(
texts,
ConsistentFakeEmbeddings(),
metadatas=metadatas,
location=":memory:",
)
qdrant_filter = rest.Filter(
must=[
rest.FieldCondition(
key="metadata.page",
match=rest.MatchValue(value=1),
),
rest.FieldCondition(
key="metadata.details.page",
match=rest.MatchValue(value=2),
),
rest.FieldCondition(
key="metadata.details.pages",
match=rest.MatchAny(any=[3]),
),
]
)
output = docsearch.similarity_search("foo", k=1, filter=qdrant_filter)
assert output == [
Document(
page_content="bar",
metadata={"page": 1, "details": {"page": 2, "pages": [3, -1]}},
)
]
@pytest.mark.parametrize("batch_size", [1, 64])
@pytest.mark.parametrize(
["content_payload_key", "metadata_payload_key"],

Loading…
Cancel
Save