diff --git a/langchain/vectorstores/qdrant.py b/langchain/vectorstores/qdrant.py index f1f16137..3114e76b 100644 --- a/langchain/vectorstores/qdrant.py +++ b/langchain/vectorstores/qdrant.py @@ -182,7 +182,11 @@ class Qdrant(VectorStore): return list(map(itemgetter(0), results)) def similarity_search_with_score( - self, query: str, k: int = 4, filter: Optional[MetadataFilter] = None + self, + query: str, + k: int = 4, + filter: Optional[MetadataFilter] = None, + **kwargs: Any, ) -> List[Tuple[Document, float]]: """Return docs most similar to query. @@ -224,6 +228,28 @@ class Qdrant(VectorStore): for result in results ] + def _similarity_search_with_relevance_scores( + self, + query: str, + k: int = 4, + **kwargs: Any, + ) -> List[Tuple[Document, float]]: + """Return docs and relevance scores in the range [0, 1]. + + 0 is dissimilar, 1 is most similar. + + Args: + query: input text + k: Number of Documents to return. Defaults to 4. + **kwargs: kwargs to be passed to similarity search. Should include: + score_threshold: Optional, a floating point value between 0 to 1 to + filter the resulting set of retrieved docs + + Returns: + List of Tuples of (doc, similarity_score) + """ + return self.similarity_search_with_score(query, k, **kwargs) + def max_marginal_relevance_search( self, query: str, diff --git a/tests/integration_tests/vectorstores/test_qdrant.py b/tests/integration_tests/vectorstores/test_qdrant.py index 3dde3753..aec77cd0 100644 --- a/tests/integration_tests/vectorstores/test_qdrant.py +++ b/tests/integration_tests/vectorstores/test_qdrant.py @@ -131,6 +131,78 @@ def test_qdrant_similarity_search_filters(batch_size: int) -> None: ] +def test_qdrant_similarity_search_with_relevance_score_no_threshold() -> None: + """Test end to end construction and search.""" + texts = ["foo", "bar", "baz"] + metadatas = [ + {"page": i, "metadata": {"page": i + 1, "pages": [i + 2, -1]}} + for i in range(len(texts)) + ] + docsearch = Qdrant.from_texts( + texts, + ConsistentFakeEmbeddings(), + metadatas=metadatas, + location=":memory:", + ) + output = docsearch.similarity_search_with_relevance_scores( + "foo", k=3, score_threshold=None + ) + assert len(output) == 3 + for i in range(len(output)): + assert round(output[i][1], 2) >= 0 + assert round(output[i][1], 2) <= 1 + + +def test_qdrant_similarity_search_with_relevance_score_with_threshold() -> None: + """Test end to end construction and search.""" + texts = ["foo", "bar", "baz"] + metadatas = [ + {"page": i, "metadata": {"page": i + 1, "pages": [i + 2, -1]}} + for i in range(len(texts)) + ] + docsearch = Qdrant.from_texts( + texts, + ConsistentFakeEmbeddings(), + metadatas=metadatas, + location=":memory:", + ) + + score_threshold = 0.98 + kwargs = {"score_threshold": score_threshold} + output = docsearch.similarity_search_with_relevance_scores("foo", k=3, **kwargs) + assert len(output) == 1 + assert all([score >= score_threshold for _, score in output]) + + +def test_qdrant_similarity_search_with_relevance_score_with_threshold_and_filter() -> ( + None +): + """Test end to end construction and search.""" + texts = ["foo", "bar", "baz"] + metadatas = [ + {"page": i, "metadata": {"page": i + 1, "pages": [i + 2, -1]}} + for i in range(len(texts)) + ] + docsearch = Qdrant.from_texts( + texts, + ConsistentFakeEmbeddings(), + metadatas=metadatas, + location=":memory:", + ) + score_threshold = 0.99 # for almost exact match + # test negative filter condition + negative_filter = {"page": 1, "metadata": {"page": 2, "pages": [3]}} + kwargs = {"filter": negative_filter, "score_threshold": score_threshold} + output = docsearch.similarity_search_with_relevance_scores("foo", k=3, **kwargs) + assert len(output) == 0 + # test positive filter condition + positive_filter = {"page": 0, "metadata": {"page": 1, "pages": [2]}} + kwargs = {"filter": positive_filter, "score_threshold": score_threshold} + output = docsearch.similarity_search_with_relevance_scores("foo", k=3, **kwargs) + assert len(output) == 1 + assert all([score >= score_threshold for _, score in output]) + + def test_qdrant_similarity_search_filters_with_qdrant_filters() -> None: """Test end to end construction and search.""" texts = ["foo", "bar", "baz"]