forked from Archives/langchain
qdrant vector store - search with relevancy scores (#5781)
Implementation of similarity_search_with_relevance_scores for quadrant vector store. As implemented the method is also compatible with other capacities such as filtering. Integration tests updated. #### Who can review? Tag maintainers/contributors who might be interested: VectorStores / Retrievers / Memory - @dev2049
This commit is contained in:
parent
f15763518a
commit
9355e3f5f5
@ -182,7 +182,11 @@ class Qdrant(VectorStore):
|
|||||||
return list(map(itemgetter(0), results))
|
return list(map(itemgetter(0), results))
|
||||||
|
|
||||||
def similarity_search_with_score(
|
def similarity_search_with_score(
|
||||||
self, query: str, k: int = 4, filter: Optional[MetadataFilter] = None
|
self,
|
||||||
|
query: str,
|
||||||
|
k: int = 4,
|
||||||
|
filter: Optional[MetadataFilter] = None,
|
||||||
|
**kwargs: Any,
|
||||||
) -> List[Tuple[Document, float]]:
|
) -> List[Tuple[Document, float]]:
|
||||||
"""Return docs most similar to query.
|
"""Return docs most similar to query.
|
||||||
|
|
||||||
@ -224,6 +228,28 @@ class Qdrant(VectorStore):
|
|||||||
for result in results
|
for result in results
|
||||||
]
|
]
|
||||||
|
|
||||||
|
def _similarity_search_with_relevance_scores(
|
||||||
|
self,
|
||||||
|
query: str,
|
||||||
|
k: int = 4,
|
||||||
|
**kwargs: Any,
|
||||||
|
) -> List[Tuple[Document, float]]:
|
||||||
|
"""Return docs and relevance scores in the range [0, 1].
|
||||||
|
|
||||||
|
0 is dissimilar, 1 is most similar.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
query: input text
|
||||||
|
k: Number of Documents to return. Defaults to 4.
|
||||||
|
**kwargs: kwargs to be passed to similarity search. Should include:
|
||||||
|
score_threshold: Optional, a floating point value between 0 to 1 to
|
||||||
|
filter the resulting set of retrieved docs
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of Tuples of (doc, similarity_score)
|
||||||
|
"""
|
||||||
|
return self.similarity_search_with_score(query, k, **kwargs)
|
||||||
|
|
||||||
def max_marginal_relevance_search(
|
def max_marginal_relevance_search(
|
||||||
self,
|
self,
|
||||||
query: str,
|
query: str,
|
||||||
|
@ -131,6 +131,78 @@ def test_qdrant_similarity_search_filters(batch_size: int) -> None:
|
|||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def test_qdrant_similarity_search_with_relevance_score_no_threshold() -> None:
|
||||||
|
"""Test end to end construction and search."""
|
||||||
|
texts = ["foo", "bar", "baz"]
|
||||||
|
metadatas = [
|
||||||
|
{"page": i, "metadata": {"page": i + 1, "pages": [i + 2, -1]}}
|
||||||
|
for i in range(len(texts))
|
||||||
|
]
|
||||||
|
docsearch = Qdrant.from_texts(
|
||||||
|
texts,
|
||||||
|
ConsistentFakeEmbeddings(),
|
||||||
|
metadatas=metadatas,
|
||||||
|
location=":memory:",
|
||||||
|
)
|
||||||
|
output = docsearch.similarity_search_with_relevance_scores(
|
||||||
|
"foo", k=3, score_threshold=None
|
||||||
|
)
|
||||||
|
assert len(output) == 3
|
||||||
|
for i in range(len(output)):
|
||||||
|
assert round(output[i][1], 2) >= 0
|
||||||
|
assert round(output[i][1], 2) <= 1
|
||||||
|
|
||||||
|
|
||||||
|
def test_qdrant_similarity_search_with_relevance_score_with_threshold() -> None:
|
||||||
|
"""Test end to end construction and search."""
|
||||||
|
texts = ["foo", "bar", "baz"]
|
||||||
|
metadatas = [
|
||||||
|
{"page": i, "metadata": {"page": i + 1, "pages": [i + 2, -1]}}
|
||||||
|
for i in range(len(texts))
|
||||||
|
]
|
||||||
|
docsearch = Qdrant.from_texts(
|
||||||
|
texts,
|
||||||
|
ConsistentFakeEmbeddings(),
|
||||||
|
metadatas=metadatas,
|
||||||
|
location=":memory:",
|
||||||
|
)
|
||||||
|
|
||||||
|
score_threshold = 0.98
|
||||||
|
kwargs = {"score_threshold": score_threshold}
|
||||||
|
output = docsearch.similarity_search_with_relevance_scores("foo", k=3, **kwargs)
|
||||||
|
assert len(output) == 1
|
||||||
|
assert all([score >= score_threshold for _, score in output])
|
||||||
|
|
||||||
|
|
||||||
|
def test_qdrant_similarity_search_with_relevance_score_with_threshold_and_filter() -> (
|
||||||
|
None
|
||||||
|
):
|
||||||
|
"""Test end to end construction and search."""
|
||||||
|
texts = ["foo", "bar", "baz"]
|
||||||
|
metadatas = [
|
||||||
|
{"page": i, "metadata": {"page": i + 1, "pages": [i + 2, -1]}}
|
||||||
|
for i in range(len(texts))
|
||||||
|
]
|
||||||
|
docsearch = Qdrant.from_texts(
|
||||||
|
texts,
|
||||||
|
ConsistentFakeEmbeddings(),
|
||||||
|
metadatas=metadatas,
|
||||||
|
location=":memory:",
|
||||||
|
)
|
||||||
|
score_threshold = 0.99 # for almost exact match
|
||||||
|
# test negative filter condition
|
||||||
|
negative_filter = {"page": 1, "metadata": {"page": 2, "pages": [3]}}
|
||||||
|
kwargs = {"filter": negative_filter, "score_threshold": score_threshold}
|
||||||
|
output = docsearch.similarity_search_with_relevance_scores("foo", k=3, **kwargs)
|
||||||
|
assert len(output) == 0
|
||||||
|
# test positive filter condition
|
||||||
|
positive_filter = {"page": 0, "metadata": {"page": 1, "pages": [2]}}
|
||||||
|
kwargs = {"filter": positive_filter, "score_threshold": score_threshold}
|
||||||
|
output = docsearch.similarity_search_with_relevance_scores("foo", k=3, **kwargs)
|
||||||
|
assert len(output) == 1
|
||||||
|
assert all([score >= score_threshold for _, score in output])
|
||||||
|
|
||||||
|
|
||||||
def test_qdrant_similarity_search_filters_with_qdrant_filters() -> None:
|
def test_qdrant_similarity_search_filters_with_qdrant_filters() -> None:
|
||||||
"""Test end to end construction and search."""
|
"""Test end to end construction and search."""
|
||||||
texts = ["foo", "bar", "baz"]
|
texts = ["foo", "bar", "baz"]
|
||||||
|
Loading…
Reference in New Issue
Block a user