diff --git a/docs/modules/indexes/vectorstores/examples/qdrant.ipynb b/docs/modules/indexes/vectorstores/examples/qdrant.ipynb index 7bd371ae..45273241 100644 --- a/docs/modules/indexes/vectorstores/examples/qdrant.ipynb +++ b/docs/modules/indexes/vectorstores/examples/qdrant.ipynb @@ -399,6 +399,31 @@ "print(f\"\\nScore: {score}\")" ] }, + { + "cell_type": "markdown", + "source": [ + "### Metadata filtering\n", + "\n", + "Qdrant has an [extensive filtering system](https://qdrant.tech/documentation/concepts/filtering/) with rich type support. It is also possible to use the filters in Langchain, by passing an additional param to both the `similarity_search_with_score` and `similarity_search` methods." + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "markdown", + "source": [ + "```python\n", + "from qdrant_client.http import models as rest\n", + "\n", + "query = \"What did the president say about Ketanji Brown Jackson\"\n", + "found_docs = qdrant.similarity_search_with_score(query, filter=rest.Filter(...))\n", + "```" + ], + "metadata": { + "collapsed": false + } + }, { "cell_type": "markdown", "id": "c58c30bf", diff --git a/langchain/vectorstores/qdrant.py b/langchain/vectorstores/qdrant.py index 1ff1e061..6542a6bc 100644 --- a/langchain/vectorstores/qdrant.py +++ b/langchain/vectorstores/qdrant.py @@ -27,10 +27,11 @@ from langchain.vectorstores import VectorStore from langchain.vectorstores.utils import maximal_marginal_relevance if TYPE_CHECKING: + from qdrant_client.conversions import common_types from qdrant_client.http import models as rest - -MetadataFilter = Dict[str, Union[str, int, bool, dict, list]] + DictFilter = Dict[str, Union[str, int, bool, dict, list]] + MetadataFilter = Union[DictFilter, common_types.Filter] class Qdrant(VectorStore): @@ -234,10 +235,21 @@ class Qdrant(VectorStore): List of Documents most similar to the query and score for each. """ + if filter is not None and isinstance(filter, dict): + warnings.warn( + "Using dict as a `filter` is deprecated. Please use qdrant-client " + "filters directly: " + "https://qdrant.tech/documentation/concepts/filtering/", + DeprecationWarning, + ) + qdrant_filter = self._qdrant_filter_from_dict(filter) + else: + qdrant_filter = filter + results = self.client.search( collection_name=self.collection_name, query_vector=self._embed_query(query), - query_filter=self._qdrant_filter_from_dict(filter), + query_filter=qdrant_filter, with_payload=True, limit=k, ) @@ -519,7 +531,7 @@ class Qdrant(VectorStore): return out def _qdrant_filter_from_dict( - self, filter: Optional[MetadataFilter] + self, filter: Optional[DictFilter] ) -> Optional[rest.Filter]: from qdrant_client.http import models as rest diff --git a/pyproject.toml b/pyproject.toml index 95853b3a..82545433 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -306,7 +306,7 @@ extended_testing = [ "html2text", "py-trello", "scikit-learn", - "pyspark", + "pyspark" ] [tool.ruff] diff --git a/tests/integration_tests/vectorstores/test_qdrant.py b/tests/integration_tests/vectorstores/test_qdrant.py index b7c8bca4..1f4db0aa 100644 --- a/tests/integration_tests/vectorstores/test_qdrant.py +++ b/tests/integration_tests/vectorstores/test_qdrant.py @@ -2,6 +2,7 @@ from typing import Callable, Optional import pytest +from qdrant_client.http import models as rest from langchain.docstore.document import Document from langchain.embeddings.base import Embeddings @@ -129,6 +130,45 @@ def test_qdrant_similarity_search_filters(batch_size: int) -> None: ] +def test_qdrant_similarity_search_filters_with_qdrant_filters() -> None: + """Test end to end construction and search.""" + texts = ["foo", "bar", "baz"] + metadatas = [ + {"page": i, "details": {"page": i + 1, "pages": [i + 2, -1]}} + for i in range(len(texts)) + ] + docsearch = Qdrant.from_texts( + texts, + ConsistentFakeEmbeddings(), + metadatas=metadatas, + location=":memory:", + ) + + qdrant_filter = rest.Filter( + must=[ + rest.FieldCondition( + key="metadata.page", + match=rest.MatchValue(value=1), + ), + rest.FieldCondition( + key="metadata.details.page", + match=rest.MatchValue(value=2), + ), + rest.FieldCondition( + key="metadata.details.pages", + match=rest.MatchAny(any=[3]), + ), + ] + ) + output = docsearch.similarity_search("foo", k=1, filter=qdrant_filter) + assert output == [ + Document( + page_content="bar", + metadata={"page": 1, "details": {"page": 2, "pages": [3, -1]}}, + ) + ] + + @pytest.mark.parametrize("batch_size", [1, 64]) @pytest.mark.parametrize( ["content_payload_key", "metadata_payload_key"],