From 822cdb161b6631d8255e341f3321d8f0b9e28e8a Mon Sep 17 00:00:00 2001 From: Matthieu Date: Tue, 18 Apr 2023 05:22:39 +0100 Subject: [PATCH] Adding shared chromaDB client option (#2886) This pull request addresses the need to share a single `chromadb.Client` instance across multiple instances of the `Chroma` class. By implementing a shared client, we can maintain consistency and reduce resource usage when multiple instances of the `Chroma` classes are created. This is especially relevant in a web app, where having multiple `Chroma` instances with a `persist_directory` leads to these clients not being synced. This PR implements this option while keeping the rest of the architecture unchanged. **Changes:** 1. Add a client attribute to the `Chroma` class to store the shared `chromadb.Client` instance. 2. Modify the `from_documents` method to accept an optional client parameter. 3. Update the `from_documents` method to use the shared client if provided or create a new client if not provided. Let me know if anything needs to be modified - thanks again for your work on this incredible repo --- langchain/vectorstores/chroma.py | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/langchain/vectorstores/chroma.py b/langchain/vectorstores/chroma.py index b7399bf6..7d29dbe5 100644 --- a/langchain/vectorstores/chroma.py +++ b/langchain/vectorstores/chroma.py @@ -60,6 +60,7 @@ class Chroma(VectorStore): persist_directory: Optional[str] = None, client_settings: Optional[chromadb.config.Settings] = None, collection_metadata: Optional[Dict] = None, + client: Optional[chromadb.Client] = None, ) -> None: """Initialize with Chroma client.""" try: @@ -71,15 +72,20 @@ class Chroma(VectorStore): "Please install it with `pip install chromadb`." ) - if client_settings: - self._client_settings = client_settings + if client is not None: + self._client = client else: - self._client_settings = chromadb.config.Settings() - if persist_directory is not None: - self._client_settings = chromadb.config.Settings( - chroma_db_impl="duckdb+parquet", persist_directory=persist_directory - ) - self._client = chromadb.Client(self._client_settings) + if client_settings: + self._client_settings = client_settings + else: + self._client_settings = chromadb.config.Settings() + if persist_directory is not None: + self._client_settings = chromadb.config.Settings( + chroma_db_impl="duckdb+parquet", + persist_directory=persist_directory, + ) + self._client = chromadb.Client(self._client_settings) + self._embedding_function = embedding_function self._persist_directory = persist_directory self._collection = self._client.get_or_create_collection( @@ -279,6 +285,7 @@ class Chroma(VectorStore): collection_name: str = _LANGCHAIN_DEFAULT_COLLECTION_NAME, persist_directory: Optional[str] = None, client_settings: Optional[chromadb.config.Settings] = None, + client: Optional[chromadb.Client] = None, **kwargs: Any, ) -> Chroma: """Create a Chroma vectorstore from a raw documents. @@ -303,6 +310,7 @@ class Chroma(VectorStore): embedding_function=embedding, persist_directory=persist_directory, client_settings=client_settings, + client=client, ) chroma_collection.add_texts(texts=texts, metadatas=metadatas, ids=ids) return chroma_collection @@ -316,6 +324,7 @@ class Chroma(VectorStore): collection_name: str = _LANGCHAIN_DEFAULT_COLLECTION_NAME, persist_directory: Optional[str] = None, client_settings: Optional[chromadb.config.Settings] = None, + client: Optional[chromadb.Client] = None, # Add this line **kwargs: Any, ) -> Chroma: """Create a Chroma vectorstore from a list of documents. @@ -343,4 +352,5 @@ class Chroma(VectorStore): collection_name=collection_name, persist_directory=persist_directory, client_settings=client_settings, + client=client, )