qdrant vector store - search with relevancy scores (#5781)

Implementation of similarity_search_with_relevance_scores for quadrant vector store. As implemented the method is also compatible with other capacities such as filtering. Integration tests updated. #### Who can review? Tag maintainers/contributors who might be interested: VectorStores / Retrievers / Memory - @dev2049
2023-06-07 22:26:40 -04:00 · 2023-06-07 22:26:40 -04:00 · 9355e3f5f5
commit 9355e3f5f5
parent f15763518a
2 changed files with 99 additions and 1 deletions
--- a/langchain/vectorstores/qdrant.py
+++ b/langchain/vectorstores/qdrant.py
@ -182,7 +182,11 @@ class Qdrant(VectorStore):
        return list(map(itemgetter(0), results))
    def similarity_search_with_score(
-        self, query: str, k: int = 4, filter: Optional[MetadataFilter] = None
+        self,
        query: str,
        k: int = 4,
        filter: Optional[MetadataFilter] = None,
        **kwargs: Any,
    ) -> List[Tuple[Document, float]]:
        """Return docs most similar to query.
@ -224,6 +228,28 @@ class Qdrant(VectorStore):
            for result in results
        ]
    def _similarity_search_with_relevance_scores(
        self,
        query: str,
        k: int = 4,
        **kwargs: Any,
    ) -> List[Tuple[Document, float]]:
        """Return docs and relevance scores in the range [0, 1].
        0 is dissimilar, 1 is most similar.
        Args:
            query: input text
            k: Number of Documents to return. Defaults to 4.
            **kwargs: kwargs to be passed to similarity search. Should include:
                score_threshold: Optional, a floating point value between 0 to 1 to
                    filter the resulting set of retrieved docs
        Returns:
            List of Tuples of (doc, similarity_score)
        """
        return self.similarity_search_with_score(query, k, **kwargs)
    def max_marginal_relevance_search(
        self,
        query: str,
--- a/tests/integration_tests/vectorstores/test_qdrant.py
+++ b/tests/integration_tests/vectorstores/test_qdrant.py
@ -131,6 +131,78 @@ def test_qdrant_similarity_search_filters(batch_size: int) -> None:
    ]
 def test_qdrant_similarity_search_with_relevance_score_no_threshold() -> None:
    """Test end to end construction and search."""
    texts = ["foo", "bar", "baz"]
    metadatas = [
        {"page": i, "metadata": {"page": i + 1, "pages": [i + 2, -1]}}
        for i in range(len(texts))
    ]
    docsearch = Qdrant.from_texts(
        texts,
        ConsistentFakeEmbeddings(),
        metadatas=metadatas,
        location=":memory:",
    )
    output = docsearch.similarity_search_with_relevance_scores(
        "foo", k=3, score_threshold=None
    )
    assert len(output) == 3
    for i in range(len(output)):
        assert round(output[i][1], 2) >= 0
        assert round(output[i][1], 2) <= 1
 def test_qdrant_similarity_search_with_relevance_score_with_threshold() -> None:
    """Test end to end construction and search."""
    texts = ["foo", "bar", "baz"]
    metadatas = [
        {"page": i, "metadata": {"page": i + 1, "pages": [i + 2, -1]}}
        for i in range(len(texts))
    ]
    docsearch = Qdrant.from_texts(
        texts,
        ConsistentFakeEmbeddings(),
        metadatas=metadatas,
        location=":memory:",
    )
    score_threshold = 0.98
    kwargs = {"score_threshold": score_threshold}
    output = docsearch.similarity_search_with_relevance_scores("foo", k=3, **kwargs)
    assert len(output) == 1
    assert all([score >= score_threshold for _, score in output])
 def test_qdrant_similarity_search_with_relevance_score_with_threshold_and_filter() -> (
    None
 ):
    """Test end to end construction and search."""
    texts = ["foo", "bar", "baz"]
    metadatas = [
        {"page": i, "metadata": {"page": i + 1, "pages": [i + 2, -1]}}
        for i in range(len(texts))
    ]
    docsearch = Qdrant.from_texts(
        texts,
        ConsistentFakeEmbeddings(),
        metadatas=metadatas,
        location=":memory:",
    )
    score_threshold = 0.99  # for almost exact match
    # test negative filter condition
    negative_filter = {"page": 1, "metadata": {"page": 2, "pages": [3]}}
    kwargs = {"filter": negative_filter, "score_threshold": score_threshold}
    output = docsearch.similarity_search_with_relevance_scores("foo", k=3, **kwargs)
    assert len(output) == 0
    # test positive filter condition
    positive_filter = {"page": 0, "metadata": {"page": 1, "pages": [2]}}
    kwargs = {"filter": positive_filter, "score_threshold": score_threshold}
    output = docsearch.similarity_search_with_relevance_scores("foo", k=3, **kwargs)
    assert len(output) == 1
    assert all([score >= score_threshold for _, score in output])
 def test_qdrant_similarity_search_filters_with_qdrant_filters() -> None:
    """Test end to end construction and search."""
    texts = ["foo", "bar", "baz"]