From f93c0114566f25ef5dbf3e397d994a82cfe7030f Mon Sep 17 00:00:00 2001 From: Jeff Huber Date: Thu, 16 Mar 2023 12:06:47 -0700 Subject: [PATCH] fallback to {} for None metadata from Chroma (#1714) The basic vector store example started breaking because `Document` required `not None` for metadata, but Chroma stores metadata as `None` if none is provided. This creates a fallback which fixes the basic tutorial https://langchain.readthedocs.io/en/latest/modules/indexes/examples/vectorstores.html Here is the error that was generated ``` Running Chroma using direct local API. Using DuckDB in-memory for database. Data will be transient. Traceback (most recent call last): File "/Users/jeff/src/temp/langchainchroma/test.py", line 17, in docs = docsearch.similarity_search(query) File "/Users/jeff/src/langchain/langchain/vectorstores/chroma.py", line 133, in similarity_search docs_and_scores = self.similarity_search_with_score(query, k) File "/Users/jeff/src/langchain/langchain/vectorstores/chroma.py", line 182, in similarity_search_with_score return _results_to_docs_and_scores(results) File "/Users/jeff/src/langchain/langchain/vectorstores/chroma.py", line 24, in _results_to_docs_and_scores return [ File "/Users/jeff/src/langchain/langchain/vectorstores/chroma.py", line 27, in (Document(page_content=result[0], metadata=result[1]), result[2]) File "pydantic/main.py", line 331, in pydantic.main.BaseModel.__init__ pydantic.error_wrappers.ValidationError: 1 validation error for Document metadata none is not an allowed value (type=type_error.none.not_allowed) Exiting: Cleaning up .chroma directory ``` --- langchain/vectorstores/chroma.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/langchain/vectorstores/chroma.py b/langchain/vectorstores/chroma.py index d4a8d63d..2f2b49db 100644 --- a/langchain/vectorstores/chroma.py +++ b/langchain/vectorstores/chroma.py @@ -24,7 +24,7 @@ def _results_to_docs_and_scores(results: Any) -> List[Tuple[Document, float]]: return [ # TODO: Chroma can do batch querying, # we shouldn't hard code to the 1st result - (Document(page_content=result[0], metadata=result[1]), result[2]) + (Document(page_content=result[0], metadata=result[1] or {}), result[2]) for result in zip( results["documents"][0], results["metadatas"][0],