add where_document filter for chroma (#10214)

- Description: add where_document filter parameter in Chroma
- Issue: [10082](https://github.com/langchain-ai/langchain/issues/10082)
  - Dependencies: no
- Tag maintainer: for a quicker response, tag the relevant maintainer
(see below),
  - Twitter handle: no

@hwchase17

---------

Co-authored-by: Jeremy Lai <jeremy_lai@wiwynn.com>
Co-authored-by: Bagatur <baskaryan@gmail.com>
pull/10291/head
Jeremy Lai 12 months ago committed by GitHub
parent 7203c97e8f
commit e93240f023
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -142,6 +142,7 @@ class Chroma(VectorStore):
query_embeddings: Optional[List[List[float]]] = None,
n_results: int = 4,
where: Optional[Dict[str, str]] = None,
where_document: Optional[Dict[str, str]] = None,
**kwargs: Any,
) -> List[Document]:
"""Query the chroma collection."""
@ -157,6 +158,7 @@ class Chroma(VectorStore):
query_embeddings=query_embeddings,
n_results=n_results,
where=where,
where_document=where_document,
**kwargs,
)
@ -264,6 +266,7 @@ class Chroma(VectorStore):
embedding: List[float],
k: int = DEFAULT_K,
filter: Optional[Dict[str, str]] = None,
where_document: Optional[Dict[str, str]] = None,
**kwargs: Any,
) -> List[Document]:
"""Return docs most similar to embedding vector.
@ -275,7 +278,10 @@ class Chroma(VectorStore):
List of Documents most similar to the query vector.
"""
results = self.__query_collection(
query_embeddings=embedding, n_results=k, where=filter
query_embeddings=embedding,
n_results=k,
where=filter,
where_document=where_document,
)
return _results_to_docs(results)
@ -284,6 +290,7 @@ class Chroma(VectorStore):
embedding: List[float],
k: int = DEFAULT_K,
filter: Optional[Dict[str, str]] = None,
where_document: Optional[Dict[str, str]] = None,
**kwargs: Any,
) -> List[Tuple[Document, float]]:
"""
@ -300,7 +307,10 @@ class Chroma(VectorStore):
Lower score represents more similarity.
"""
results = self.__query_collection(
query_embeddings=embedding, n_results=k, where=filter
query_embeddings=embedding,
n_results=k,
where=filter,
where_document=where_document,
)
return _results_to_docs_and_scores(results)
@ -309,6 +319,7 @@ class Chroma(VectorStore):
query: str,
k: int = DEFAULT_K,
filter: Optional[Dict[str, str]] = None,
where_document: Optional[Dict[str, str]] = None,
**kwargs: Any,
) -> List[Tuple[Document, float]]:
"""Run similarity search with Chroma with distance.
@ -325,12 +336,18 @@ class Chroma(VectorStore):
"""
if self._embedding_function is None:
results = self.__query_collection(
query_texts=[query], n_results=k, where=filter
query_texts=[query],
n_results=k,
where=filter,
where_document=where_document,
)
else:
query_embedding = self._embedding_function.embed_query(query)
results = self.__query_collection(
query_embeddings=[query_embedding], n_results=k, where=filter
query_embeddings=[query_embedding],
n_results=k,
where=filter,
where_document=where_document,
)
return _results_to_docs_and_scores(results)
@ -374,6 +391,7 @@ class Chroma(VectorStore):
fetch_k: int = 20,
lambda_mult: float = 0.5,
filter: Optional[Dict[str, str]] = None,
where_document: Optional[Dict[str, str]] = None,
**kwargs: Any,
) -> List[Document]:
"""Return docs selected using the maximal marginal relevance.
@ -398,6 +416,7 @@ class Chroma(VectorStore):
query_embeddings=embedding,
n_results=fetch_k,
where=filter,
where_document=where_document,
include=["metadatas", "documents", "distances", "embeddings"],
)
mmr_selected = maximal_marginal_relevance(
@ -419,6 +438,7 @@ class Chroma(VectorStore):
fetch_k: int = 20,
lambda_mult: float = 0.5,
filter: Optional[Dict[str, str]] = None,
where_document: Optional[Dict[str, str]] = None,
**kwargs: Any,
) -> List[Document]:
"""Return docs selected using the maximal marginal relevance.
@ -445,7 +465,12 @@ class Chroma(VectorStore):
embedding = self._embedding_function.embed_query(query)
docs = self.max_marginal_relevance_search_by_vector(
embedding, k, fetch_k, lambda_mult=lambda_mult, filter=filter
embedding,
k,
fetch_k,
lambda_mult=lambda_mult,
filter=filter,
where_document=where_document,
)
return docs
@ -472,7 +497,7 @@ class Chroma(VectorStore):
offset: The offset to start returning results from.
Useful for paging results with limit. Optional.
where_document: A WhereDocument type dict used to filter by the documents.
E.g. `{$contains: {"text": "hello"}}`. Optional.
E.g. `{$contains: "hello"}`. Optional.
include: A list of what to include in the results.
Can contain `"embeddings"`, `"metadatas"`, `"documents"`.
Ids are always included.

Loading…
Cancel
Save