Feature: Qdrant filters supports (#5446)

# Support Qdrant filters Qdrant has an [extensive filtering system](https://qdrant.tech/documentation/concepts/filtering/) with rich type support. This PR makes it possible to use the filters in Langchain by passing an additional param to both the `similarity_search_with_score` and `similarity_search` methods. ## Who can review? @dev2049 @hwchase17 --------- Co-authored-by: Dev 2049 <dev.dev2049@gmail.com>
1 year ago · 8bcaca435a
parent f72bb966f8
commit 8bcaca435a
4 changed files with 82 additions and 5 deletions
--- a/docs/modules/indexes/vectorstores/examples/qdrant.ipynb
+++ b/docs/modules/indexes/vectorstores/examples/qdrant.ipynb
@ -399,6 +399,31 @@
    "print(f\"\\nScore: {score}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "source": [
    "### Metadata filtering\n",
    "\n",
    "Qdrant has an [extensive filtering system](https://qdrant.tech/documentation/concepts/filtering/) with rich type support. It is also possible to use the filters in Langchain, by passing an additional param to both the `similarity_search_with_score` and `similarity_search` methods."
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "```python\n",
    "from qdrant_client.http import models as rest\n",
    "\n",
    "query = \"What did the president say about Ketanji Brown Jackson\"\n",
    "found_docs = qdrant.similarity_search_with_score(query, filter=rest.Filter(...))\n",
    "```"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "markdown",
   "id": "c58c30bf",
--- a/langchain/vectorstores/qdrant.py
+++ b/langchain/vectorstores/qdrant.py
@ -27,10 +27,11 @@ from langchain.vectorstores import VectorStore
 from langchain.vectorstores.utils import maximal_marginal_relevance
 if TYPE_CHECKING:
    from qdrant_client.conversions import common_types
    from qdrant_client.http import models as rest
-
+    DictFilter = Dict[str, Union[str, int, bool, dict, list]]
-MetadataFilter = Dict[str, Union[str, int, bool, dict, list]]
+    MetadataFilter = Union[DictFilter, common_types.Filter]
 class Qdrant(VectorStore):
@ -234,10 +235,21 @@ class Qdrant(VectorStore):
            List of Documents most similar to the query and score for each.
        """
        if filter is not None and isinstance(filter, dict):
            warnings.warn(
                "Using dict as a `filter` is deprecated. Please use qdrant-client "
                "filters directly: "
                "https://qdrant.tech/documentation/concepts/filtering/",
                DeprecationWarning,
            )
            qdrant_filter = self._qdrant_filter_from_dict(filter)
        else:
            qdrant_filter = filter
        results = self.client.search(
            collection_name=self.collection_name,
            query_vector=self._embed_query(query),
-            query_filter=self._qdrant_filter_from_dict(filter),
+            query_filter=qdrant_filter,
            with_payload=True,
            limit=k,
        )
@ -519,7 +531,7 @@ class Qdrant(VectorStore):
        return out
    def _qdrant_filter_from_dict(
-        self, filter: Optional[MetadataFilter]
+        self, filter: Optional[DictFilter]
    ) -> Optional[rest.Filter]:
        from qdrant_client.http import models as rest
--- a/pyproject.toml
+++ b/pyproject.toml
@ -306,7 +306,7 @@ extended_testing = [
 "html2text",
 "py-trello",
 "scikit-learn",
- "pyspark",
+ "pyspark"
 ]
 [tool.ruff]
--- a/tests/integration_tests/vectorstores/test_qdrant.py
+++ b/tests/integration_tests/vectorstores/test_qdrant.py
@ -2,6 +2,7 @@
 from typing import Callable, Optional
 import pytest
 from qdrant_client.http import models as rest
 from langchain.docstore.document import Document
 from langchain.embeddings.base import Embeddings
@ -129,6 +130,45 @@ def test_qdrant_similarity_search_filters(batch_size: int) -> None:
    ]
 def test_qdrant_similarity_search_filters_with_qdrant_filters() -> None:
    """Test end to end construction and search."""
    texts = ["foo", "bar", "baz"]
    metadatas = [
        {"page": i, "details": {"page": i + 1, "pages": [i + 2, -1]}}
        for i in range(len(texts))
    ]
    docsearch = Qdrant.from_texts(
        texts,
        ConsistentFakeEmbeddings(),
        metadatas=metadatas,
        location=":memory:",
    )
    qdrant_filter = rest.Filter(
        must=[
            rest.FieldCondition(
                key="metadata.page",
                match=rest.MatchValue(value=1),
            ),
            rest.FieldCondition(
                key="metadata.details.page",
                match=rest.MatchValue(value=2),
            ),
            rest.FieldCondition(
                key="metadata.details.pages",
                match=rest.MatchAny(any=[3]),
            ),
        ]
    )
    output = docsearch.similarity_search("foo", k=1, filter=qdrant_filter)
    assert output == [
        Document(
            page_content="bar",
            metadata={"page": 1, "details": {"page": 2, "pages": [3, -1]}},
        )
    ]
@pytest.mark.parametrize("batch_size", [1, 64])
@pytest.mark.parametrize(
    ["content_payload_key", "metadata_payload_key"],