diff --git a/libs/community/langchain_community/vectorstores/documentdb.py b/libs/community/langchain_community/vectorstores/documentdb.py index 847740d2e4..d56f1a5712 100644 --- a/libs/community/langchain_community/vectorstores/documentdb.py +++ b/libs/community/langchain_community/vectorstores/documentdb.py @@ -175,6 +175,10 @@ class DocumentDBVectorSearch(VectorStore): The maximum number of supported dimensions is 2000 similarity: Similarity algorithm to use with the HNSW index. + Possible options are: + - DocumentDBSimilarityType.COS (cosine distance), + - DocumentDBSimilarityType.EUC (Euclidean distance), and + - DocumentDBSimilarityType.DOT (dot product). m: Specifies the max number of connections for an HNSW index. Large impact on memory consumption. @@ -183,10 +187,6 @@ class DocumentDBVectorSearch(VectorStore): for constructing the graph for HNSW index. Higher values lead to more accurate results but slower indexing speed. - Possible options are: - - DocumentDBSimilarityType.COS (cosine distance), - - DocumentDBSimilarityType.EUC (Euclidean distance), and - - DocumentDBSimilarityType.DOT (dot product). Returns: An object describing the created index @@ -309,7 +309,11 @@ class DocumentDBVectorSearch(VectorStore): self._collection.delete_one({"_id": ObjectId(document_id)}) def _similarity_search_without_score( - self, embeddings: List[float], k: int = 4, ef_search: int = 40 + self, + embeddings: List[float], + k: int = 4, + ef_search: int = 40, + filter: Optional[Dict[str, Any]] = None, ) -> List[Document]: """Returns a list of documents. @@ -319,12 +323,13 @@ class DocumentDBVectorSearch(VectorStore): ef_search: Specifies the size of the dynamic candidate list that HNSW index uses during search. A higher value of efSearch provides better recall at cost of speed. - + filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None. Returns: A list of documents closest to the query vector """ pipeline: List[dict[str, Any]] = [ { + "$match": filter, "$search": { "vectorSearch": { "vector": embeddings, @@ -333,7 +338,7 @@ class DocumentDBVectorSearch(VectorStore): "k": k, "efSearch": ef_search, } - } + }, } ] @@ -352,10 +357,12 @@ class DocumentDBVectorSearch(VectorStore): query: str, k: int = 4, ef_search: int = 40, + *, + filter: Optional[Dict[str, Any]] = None, **kwargs: Any, ) -> List[Document]: embeddings = self._embedding.embed_query(query) docs = self._similarity_search_without_score( - embeddings=embeddings, k=k, ef_search=ef_search + embeddings=embeddings, k=k, ef_search=ef_search, filter=filter ) return [doc for doc in docs]