Chroma: add vector search with scores (#6864)

- Description: Adding to Chroma integration the option to run a
similarity search by a vector with relevance scores. Fixing two minor
typos.
  
  - Issue: The "lambda_mult" typo is related to #4861 
  
  - Maintainer: @rlancemartin, @eyurtsev
pull/7122/head^2
Jan Kubica 1 year ago committed by GitHub
parent 576880abc5
commit fed64ae060
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -181,7 +181,7 @@ class Chroma(VectorStore):
) -> List[Document]:
"""Return docs most similar to embedding vector.
Args:
embedding (str): Embedding to look up documents similar to.
embedding (List[float]): Embedding to look up documents similar to.
k (int): Number of Documents to return. Defaults to 4.
filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.
Returns:
@ -192,6 +192,31 @@ class Chroma(VectorStore):
)
return _results_to_docs(results)
def similarity_search_by_vector_with_relevance_scores(
self,
embedding: List[float],
k: int = DEFAULT_K,
filter: Optional[Dict[str, str]] = None,
**kwargs: Any,
) -> List[Tuple[Document, float]]:
"""
Return docs most similar to embedding vector and similarity score.
Args:
embedding (List[float]): Embedding to look up documents similar to.
k (int): Number of Documents to return. Defaults to 4.
filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.
Returns:
List[Tuple[Document, float]]: List of documents most similar to
the query text and cosine distance in float for each.
Lower score represents more similarity.
"""
results = self.__query_collection(
query_embeddings=embedding, n_results=k, where=filter
)
return _results_to_docs_and_scores(results)
def similarity_search_with_score(
self,
query: str,
@ -309,7 +334,7 @@ class Chroma(VectorStore):
embedding = self._embedding_function.embed_query(query)
docs = self.max_marginal_relevance_search_by_vector(
embedding, k, fetch_k, lambda_mul=lambda_mult, filter=filter
embedding, k, fetch_k, lambda_mult=lambda_mult, filter=filter
)
return docs

@ -58,6 +58,25 @@ def test_chroma_with_metadatas_with_scores() -> None:
assert output == [(Document(page_content="foo", metadata={"page": "0"}), 0.0)]
def test_chroma_with_metadatas_with_scores_using_vector() -> None:
"""Test end to end construction and scored search, using embedding vector."""
texts = ["foo", "bar", "baz"]
metadatas = [{"page": str(i)} for i in range(len(texts))]
embeddings = FakeEmbeddings()
docsearch = Chroma.from_texts(
collection_name="test_collection",
texts=texts,
embedding=embeddings,
metadatas=metadatas,
)
embedded_query = embeddings.embed_query("foo")
output = docsearch.similarity_search_by_vector_with_relevance_scores(
embedding=embedded_query, k=1
)
assert output == [(Document(page_content="foo", metadata={"page": "0"}), 0.0)]
def test_chroma_search_filter() -> None:
"""Test end to end construction and search with metadata filtering."""
texts = ["far", "bar", "baz"]

Loading…
Cancel
Save