chroma: Add chroma partner package (#19292)

**Description:** Adds chroma to the partners package. Tests & code mirror those in the community package. **Dependencies:** None **Twitter handle:** @akiradev0x --------- Co-authored-by: Erick Friis <erick@langchain.dev>
6 months ago · f8a54d1d73
parent eef19954f3
commit f8a54d1d73
20 changed files with 5251 additions and 0 deletions
--- a/libs/partners/chroma/.gitignore
+++ b/libs/partners/chroma/.gitignore
@ -0,0 +1,2 @@
+__pycache__
+*/persist_dir
--- a/libs/partners/chroma/LICENSE
+++ b/libs/partners/chroma/LICENSE
@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2024 LangChain, Inc.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/libs/partners/chroma/Makefile
+++ b/libs/partners/chroma/Makefile
@ -0,0 +1,57 @@
+.PHONY: all format lint test tests integration_tests docker_tests help extended_tests
+
+# Default target executed when no arguments are given to make.
+all: help
+
+# Define a variable for the test file path.
+TEST_FILE ?= tests/unit_tests/
+integration_test integration_tests: TEST_FILE = tests/integration_tests/
+
+test tests integration_test integration_tests:
+	poetry run pytest $(TEST_FILE)
+
+
+######################
+# LINTING AND FORMATTING
+######################
+
+# Define a variable for Python and notebook files.
+PYTHON_FILES=.
+MYPY_CACHE=.mypy_cache
+lint format: PYTHON_FILES=.
+lint_diff format_diff: PYTHON_FILES=$(shell git diff --relative=libs/partners/chroma --name-only --diff-filter=d master | grep -E '\.py$$|\.ipynb$$')
+lint_package: PYTHON_FILES=langchain_chroma
+lint_tests: PYTHON_FILES=tests
+lint_tests: MYPY_CACHE=.mypy_cache_test
+
+lint lint_diff lint_package lint_tests:
+	poetry run ruff .
+	poetry run ruff format $(PYTHON_FILES) --diff
+	poetry run ruff --select I $(PYTHON_FILES)
+	mkdir $(MYPY_CACHE); poetry run mypy $(PYTHON_FILES) --cache-dir $(MYPY_CACHE)
+
+format format_diff:
+	poetry run ruff format $(PYTHON_FILES)
+	poetry run ruff --select I --fix $(PYTHON_FILES)
+
+spell_check:
+	poetry run codespell --toml pyproject.toml
+
+spell_fix:
+	poetry run codespell --toml pyproject.toml -w
+
+check_imports: $(shell find langchain_chroma -name '*.py')
+	poetry run python ./scripts/check_imports.py $^
+
+######################
+# HELP
+######################
+
+help:
+	@echo '----'
+	@echo 'check_imports				- check imports'
+	@echo 'format                       - run code formatters'
+	@echo 'lint                         - run linters'
+	@echo 'test                         - run unit tests'
+	@echo 'tests                        - run unit tests'
+	@echo 'test TEST_FILE=<test_file>   - run all tests in file'
--- a/libs/partners/chroma/README.md
+++ b/libs/partners/chroma/README.md
@ -0,0 +1,21 @@
+# langchain-chroma
+
+This package contains the LangChain integration with Chroma.
+
+## Installation
+
+```bash
+pip install -U langchain-chroma
+```
+
+## Usage
+
+The `Chroma` class exposes the connection to the Chroma vector store.
+
+```python
+from langchain_chroma import Chroma
+
+embeddings = ... # use a LangChain Embeddings class
+
+vectorstore = Chroma(embeddings=embeddings)
+```
--- a/libs/partners/chroma/langchain_chroma/init.py
+++ b/libs/partners/chroma/langchain_chroma/init.py
@ -0,0 +1,5 @@
+from langchain_chroma.vectorstores import Chroma
+
+__all__ = [
+    "Chroma",
+]
--- a/libs/partners/chroma/langchain_chroma/py.typed
+++ b/libs/partners/chroma/langchain_chroma/py.typed
--- a/libs/partners/chroma/langchain_chroma/vectorstores.py
+++ b/libs/partners/chroma/langchain_chroma/vectorstores.py
@ -0,0 +1,810 @@
+from __future__ import annotations
+
+import base64
+import logging
+import uuid
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Callable,
+    Dict,
+    Iterable,
+    List,
+    Optional,
+    Tuple,
+    Type,
+    Union,
+)
+
+import chromadb
+import chromadb.config
+import numpy as np
+from langchain_core.documents import Document
+from langchain_core.embeddings import Embeddings
+from langchain_core.utils import xor_args
+from langchain_core.vectorstores import VectorStore
+
+if TYPE_CHECKING:
+    from chromadb.api.types import ID, OneOrMany, Where, WhereDocument
+
+logger = logging.getLogger()
+DEFAULT_K = 4  # Number of Documents to return.
+
+
+def _results_to_docs(results: Any) -> List[Document]:
+    return [doc for doc, _ in _results_to_docs_and_scores(results)]
+
+
+def _results_to_docs_and_scores(results: Any) -> List[Tuple[Document, float]]:
+    return [
+        # TODO: Chroma can do batch querying,
+        # we shouldn't hard code to the 1st result
+        (Document(page_content=result[0], metadata=result[1] or {}), result[2])
+        for result in zip(
+            results["documents"][0],
+            results["metadatas"][0],
+            results["distances"][0],
+        )
+    ]
+
+
+Matrix = Union[List[List[float]], List[np.ndarray], np.ndarray]
+
+
+def cosine_similarity(X: Matrix, Y: Matrix) -> np.ndarray:
+    """Row-wise cosine similarity between two equal-width matrices."""
+    if len(X) == 0 or len(Y) == 0:
+        return np.array([])
+
+    X = np.array(X)
+    Y = np.array(Y)
+    if X.shape[1] != Y.shape[1]:
+        raise ValueError(
+            f"Number of columns in X and Y must be the same. X has shape {X.shape} "
+            f"and Y has shape {Y.shape}."
+        )
+
+    X_norm = np.linalg.norm(X, axis=1)
+    Y_norm = np.linalg.norm(Y, axis=1)
+    # Ignore divide by zero errors run time warnings as those are handled below.
+    with np.errstate(divide="ignore", invalid="ignore"):
+        similarity = np.dot(X, Y.T) / np.outer(X_norm, Y_norm)
+    similarity[np.isnan(similarity) | np.isinf(similarity)] = 0.0
+    return similarity
+
+
+def maximal_marginal_relevance(
+    query_embedding: np.ndarray,
+    embedding_list: list,
+    lambda_mult: float = 0.5,
+    k: int = 4,
+) -> List[int]:
+    """Calculate maximal marginal relevance."""
+    if min(k, len(embedding_list)) <= 0:
+        return []
+    if query_embedding.ndim == 1:
+        query_embedding = np.expand_dims(query_embedding, axis=0)
+    similarity_to_query = cosine_similarity(query_embedding, embedding_list)[0]
+    most_similar = int(np.argmax(similarity_to_query))
+    idxs = [most_similar]
+    selected = np.array([embedding_list[most_similar]])
+    while len(idxs) < min(k, len(embedding_list)):
+        best_score = -np.inf
+        idx_to_add = -1
+        similarity_to_selected = cosine_similarity(embedding_list, selected)
+        for i, query_score in enumerate(similarity_to_query):
+            if i in idxs:
+                continue
+            redundant_score = max(similarity_to_selected[i])
+            equation_score = (
+                lambda_mult * query_score - (1 - lambda_mult) * redundant_score
+            )
+            if equation_score > best_score:
+                best_score = equation_score
+                idx_to_add = i
+        idxs.append(idx_to_add)
+        selected = np.append(selected, [embedding_list[idx_to_add]], axis=0)
+    return idxs
+
+
+class Chroma(VectorStore):
+    """`ChromaDB` vector store.
+
+    To use, you should have the ``chromadb`` python package installed.
+
+    Example:
+        .. code-block:: python
+
+                from langchain_community.vectorstores import Chroma
+                from langchain_community.embeddings.openai import OpenAIEmbeddings
+
+                embeddings = OpenAIEmbeddings()
+                vectorstore = Chroma("langchain_store", embeddings)
+    """
+
+    _LANGCHAIN_DEFAULT_COLLECTION_NAME = "langchain"
+
+    def __init__(
+        self,
+        collection_name: str = _LANGCHAIN_DEFAULT_COLLECTION_NAME,
+        embedding_function: Optional[Embeddings] = None,
+        persist_directory: Optional[str] = None,
+        client_settings: Optional[chromadb.config.Settings] = None,
+        collection_metadata: Optional[Dict] = None,
+        client: Optional[chromadb.ClientAPI] = None,
+        relevance_score_fn: Optional[Callable[[float], float]] = None,
+    ) -> None:
+        """Initialize with a Chroma client."""
+
+        if client is not None:
+            self._client_settings = client_settings
+            self._client = client
+            self._persist_directory = persist_directory
+        else:
+            if client_settings:
+                # If client_settings is provided with persist_directory specified,
+                # then it is "in-memory and persisting to disk" mode.
+                client_settings.persist_directory = (
+                    persist_directory or client_settings.persist_directory
+                )
+
+                _client_settings = client_settings
+            elif persist_directory:
+                _client_settings = chromadb.config.Settings(is_persistent=True)
+                _client_settings.persist_directory = persist_directory
+            else:
+                _client_settings = chromadb.config.Settings()
+            self._client_settings = _client_settings
+            self._client = chromadb.Client(_client_settings)
+            self._persist_directory = (
+                _client_settings.persist_directory or persist_directory
+            )
+
+        self._embedding_function = embedding_function
+        self._collection = self._client.get_or_create_collection(
+            name=collection_name,
+            embedding_function=None,
+            metadata=collection_metadata,
+        )
+        self.override_relevance_score_fn = relevance_score_fn
+
+    @property
+    def embeddings(self) -> Optional[Embeddings]:
+        return self._embedding_function
+
+    @xor_args(("query_texts", "query_embeddings"))
+    def __query_collection(
+        self,
+        query_texts: Optional[List[str]] = None,
+        query_embeddings: Optional[List[List[float]]] = None,
+        n_results: int = 4,
+        where: Optional[Dict[str, str]] = None,
+        where_document: Optional[Dict[str, str]] = None,
+        **kwargs: Any,
+    ) -> Union[List[Document], chromadb.QueryResult]:
+        """Query the chroma collection."""
+        return self._collection.query(
+            query_texts=query_texts,
+            query_embeddings=query_embeddings,  # type: ignore
+            n_results=n_results,
+            where=where,  # type: ignore
+            where_document=where_document,  # type: ignore
+            **kwargs,
+        )
+
+    def encode_image(self, uri: str) -> str:
+        """Get base64 string from image URI."""
+        with open(uri, "rb") as image_file:
+            return base64.b64encode(image_file.read()).decode("utf-8")
+
+    def add_images(
+        self,
+        uris: List[str],
+        metadatas: Optional[List[dict]] = None,
+        ids: Optional[List[str]] = None,
+        **kwargs: Any,
+    ) -> List[str]:
+        """Run more images through the embeddings and add to the vectorstore.
+
+        Args:
+            uris List[str]: File path to the image.
+            metadatas (Optional[List[dict]], optional): Optional list of metadatas.
+            ids (Optional[List[str]], optional): Optional list of IDs.
+
+        Returns:
+            List[str]: List of IDs of the added images.
+        """
+        # Map from uris to b64 encoded strings
+        b64_texts = [self.encode_image(uri=uri) for uri in uris]
+        # Populate IDs
+        if ids is None:
+            ids = [str(uuid.uuid4()) for _ in uris]
+        embeddings = None
+        # Set embeddings
+        if self._embedding_function is not None and hasattr(
+            self._embedding_function, "embed_image"
+        ):
+            embeddings = self._embedding_function.embed_image(uris=uris)
+        if metadatas:
+            # fill metadatas with empty dicts if somebody
+            # did not specify metadata for all images
+            length_diff = len(uris) - len(metadatas)
+            if length_diff:
+                metadatas = metadatas + [{}] * length_diff
+            empty_ids = []
+            non_empty_ids = []
+            for idx, m in enumerate(metadatas):
+                if m:
+                    non_empty_ids.append(idx)
+                else:
+                    empty_ids.append(idx)
+            if non_empty_ids:
+                metadatas = [metadatas[idx] for idx in non_empty_ids]
+                images_with_metadatas = [b64_texts[idx] for idx in non_empty_ids]
+                embeddings_with_metadatas = (
+                    [embeddings[idx] for idx in non_empty_ids] if embeddings else None
+                )
+                ids_with_metadata = [ids[idx] for idx in non_empty_ids]
+                try:
+                    self._collection.upsert(
+                        metadatas=metadatas,  # type: ignore
+                        embeddings=embeddings_with_metadatas,  # type: ignore
+                        documents=images_with_metadatas,
+                        ids=ids_with_metadata,
+                    )
+                except ValueError as e:
+                    if "Expected metadata value to be" in str(e):
+                        msg = (
+                            "Try filtering complex metadata using "
+                            "langchain_community.vectorstores.utils.filter_complex_metadata."
+                        )
+                        raise ValueError(e.args[0] + "\n\n" + msg)
+                    else:
+                        raise e
+            if empty_ids:
+                images_without_metadatas = [b64_texts[j] for j in empty_ids]
+                embeddings_without_metadatas = (
+                    [embeddings[j] for j in empty_ids] if embeddings else None
+                )
+                ids_without_metadatas = [ids[j] for j in empty_ids]
+                self._collection.upsert(
+                    embeddings=embeddings_without_metadatas,
+                    documents=images_without_metadatas,
+                    ids=ids_without_metadatas,
+                )
+        else:
+            self._collection.upsert(
+                embeddings=embeddings,
+                documents=b64_texts,
+                ids=ids,
+            )
+        return ids
+
+    def add_texts(
+        self,
+        texts: Iterable[str],
+        metadatas: Optional[List[dict]] = None,
+        ids: Optional[List[str]] = None,
+        **kwargs: Any,
+    ) -> List[str]:
+        """Run more texts through the embeddings and add to the vectorstore.
+
+        Args:
+            texts (Iterable[str]): Texts to add to the vectorstore.
+            metadatas (Optional[List[dict]], optional): Optional list of metadatas.
+            ids (Optional[List[str]], optional): Optional list of IDs.
+
+        Returns:
+            List[str]: List of IDs of the added texts.
+        """
+        # TODO: Handle the case where the user doesn't provide ids on the Collection
+        if ids is None:
+            ids = [str(uuid.uuid4()) for _ in texts]
+        embeddings = None
+        texts = list(texts)
+        if self._embedding_function is not None:
+            embeddings = self._embedding_function.embed_documents(texts)
+        if metadatas:
+            # fill metadatas with empty dicts if somebody
+            # did not specify metadata for all texts
+            length_diff = len(texts) - len(metadatas)
+            if length_diff:
+                metadatas = metadatas + [{}] * length_diff
+            empty_ids = []
+            non_empty_ids = []
+            for idx, m in enumerate(metadatas):
+                if m:
+                    non_empty_ids.append(idx)
+                else:
+                    empty_ids.append(idx)
+            if non_empty_ids:
+                metadatas = [metadatas[idx] for idx in non_empty_ids]
+                texts_with_metadatas = [texts[idx] for idx in non_empty_ids]
+                embeddings_with_metadatas = (
+                    [embeddings[idx] for idx in non_empty_ids] if embeddings else None
+                )
+                ids_with_metadata = [ids[idx] for idx in non_empty_ids]
+                try:
+                    self._collection.upsert(
+                        metadatas=metadatas,  # type: ignore
+                        embeddings=embeddings_with_metadatas,  # type: ignore
+                        documents=texts_with_metadatas,
+                        ids=ids_with_metadata,
+                    )
+                except ValueError as e:
+                    if "Expected metadata value to be" in str(e):
+                        msg = (
+                            "Try filtering complex metadata from the document using "
+                            "langchain_community.vectorstores.utils.filter_complex_metadata."
+                        )
+                        raise ValueError(e.args[0] + "\n\n" + msg)
+                    else:
+                        raise e
+            if empty_ids:
+                texts_without_metadatas = [texts[j] for j in empty_ids]
+                embeddings_without_metadatas = (
+                    [embeddings[j] for j in empty_ids] if embeddings else None
+                )
+                ids_without_metadatas = [ids[j] for j in empty_ids]
+                self._collection.upsert(
+                    embeddings=embeddings_without_metadatas,  # type: ignore
+                    documents=texts_without_metadatas,
+                    ids=ids_without_metadatas,
+                )
+        else:
+            self._collection.upsert(
+                embeddings=embeddings,  # type: ignore
+                documents=texts,
+                ids=ids,
+            )
+        return ids
+
+    def similarity_search(
+        self,
+        query: str,
+        k: int = DEFAULT_K,
+        filter: Optional[Dict[str, str]] = None,
+        **kwargs: Any,
+    ) -> List[Document]:
+        """Run similarity search with Chroma.
+
+        Args:
+            query (str): Query text to search for.
+            k (int): Number of results to return. Defaults to 4.
+            filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.
+
+        Returns:
+            List[Document]: List of documents most similar to the query text.
+        """
+        docs_and_scores = self.similarity_search_with_score(
+            query, k, filter=filter, **kwargs
+        )
+        return [doc for doc, _ in docs_and_scores]
+
+    def similarity_search_by_vector(
+        self,
+        embedding: List[float],
+        k: int = DEFAULT_K,
+        filter: Optional[Dict[str, str]] = None,
+        where_document: Optional[Dict[str, str]] = None,
+        **kwargs: Any,
+    ) -> List[Document]:
+        """Return docs most similar to embedding vector.
+        Args:
+            embedding (List[float]): Embedding to look up documents similar to.
+            k (int): Number of Documents to return. Defaults to 4.
+            filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.
+        Returns:
+            List of Documents most similar to the query vector.
+        """
+        results = self.__query_collection(
+            query_embeddings=embedding,
+            n_results=k,
+            where=filter,
+            where_document=where_document,
+            **kwargs,
+        )
+        return _results_to_docs(results)
+
+    def similarity_search_by_vector_with_relevance_scores(
+        self,
+        embedding: List[float],
+        k: int = DEFAULT_K,
+        filter: Optional[Dict[str, str]] = None,
+        where_document: Optional[Dict[str, str]] = None,
+        **kwargs: Any,
+    ) -> List[Tuple[Document, float]]:
+        """
+        Return docs most similar to embedding vector and similarity score.
+
+        Args:
+            embedding (List[float]): Embedding to look up documents similar to.
+            k (int): Number of Documents to return. Defaults to 4.
+            filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.
+
+        Returns:
+            List[Tuple[Document, float]]: List of documents most similar to
+            the query text and cosine distance in float for each.
+            Lower score represents more similarity.
+        """
+        results = self.__query_collection(
+            query_embeddings=embedding,
+            n_results=k,
+            where=filter,
+            where_document=where_document,
+            **kwargs,
+        )
+        return _results_to_docs_and_scores(results)
+
+    def similarity_search_with_score(
+        self,
+        query: str,
+        k: int = DEFAULT_K,
+        filter: Optional[Dict[str, str]] = None,
+        where_document: Optional[Dict[str, str]] = None,
+        **kwargs: Any,
+    ) -> List[Tuple[Document, float]]:
+        """Run similarity search with Chroma with distance.
+
+        Args:
+            query (str): Query text to search for.
+            k (int): Number of results to return. Defaults to 4.
+            filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.
+
+        Returns:
+            List[Tuple[Document, float]]: List of documents most similar to
+            the query text and cosine distance in float for each.
+            Lower score represents more similarity.
+        """
+        if self._embedding_function is None:
+            results = self.__query_collection(
+                query_texts=[query],
+                n_results=k,
+                where=filter,
+                where_document=where_document,
+                **kwargs,
+            )
+        else:
+            query_embedding = self._embedding_function.embed_query(query)
+            results = self.__query_collection(
+                query_embeddings=[query_embedding],
+                n_results=k,
+                where=filter,
+                where_document=where_document,
+                **kwargs,
+            )
+
+        return _results_to_docs_and_scores(results)
+
+    def _select_relevance_score_fn(self) -> Callable[[float], float]:
+        """
+        The 'correct' relevance function
+        may differ depending on a few things, including:
+        - the distance / similarity metric used by the VectorStore
+        - the scale of your embeddings (OpenAI's are unit normed. Many others are not!)
+        - embedding dimensionality
+        - etc.
+        """
+        if self.override_relevance_score_fn:
+            return self.override_relevance_score_fn
+
+        distance = "l2"
+        distance_key = "hnsw:space"
+        metadata = self._collection.metadata
+
+        if metadata and distance_key in metadata:
+            distance = metadata[distance_key]
+
+        if distance == "cosine":
+            return self._cosine_relevance_score_fn
+        elif distance == "l2":
+            return self._euclidean_relevance_score_fn
+        elif distance == "ip":
+            return self._max_inner_product_relevance_score_fn
+        else:
+            raise ValueError(
+                "No supported normalization function"
+                f" for distance metric of type: {distance}."
+                "Consider providing relevance_score_fn to Chroma constructor."
+            )
+
+    def max_marginal_relevance_search_by_vector(
+        self,
+        embedding: List[float],
+        k: int = DEFAULT_K,
+        fetch_k: int = 20,
+        lambda_mult: float = 0.5,
+        filter: Optional[Dict[str, str]] = None,
+        where_document: Optional[Dict[str, str]] = None,
+        **kwargs: Any,
+    ) -> List[Document]:
+        """Return docs selected using the maximal marginal relevance.
+        Maximal marginal relevance optimizes for similarity to query AND diversity
+        among selected documents.
+
+        Args:
+            embedding: Embedding to look up documents similar to.
+            k: Number of Documents to return. Defaults to 4.
+            fetch_k: Number of Documents to fetch to pass to MMR algorithm.
+            lambda_mult: Number between 0 and 1 that determines the degree
+                        of diversity among the results with 0 corresponding
+                        to maximum diversity and 1 to minimum diversity.
+                        Defaults to 0.5.
+            filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.
+
+        Returns:
+            List of Documents selected by maximal marginal relevance.
+        """
+
+        results = self.__query_collection(
+            query_embeddings=embedding,
+            n_results=fetch_k,
+            where=filter,
+            where_document=where_document,
+            include=["metadatas", "documents", "distances", "embeddings"],
+            **kwargs,
+        )
+        mmr_selected = maximal_marginal_relevance(
+            np.array(embedding, dtype=np.float32),
+            results["embeddings"][0],
+            k=k,
+            lambda_mult=lambda_mult,
+        )
+
+        candidates = _results_to_docs(results)
+
+        selected_results = [r for i, r in enumerate(candidates) if i in mmr_selected]
+        return selected_results
+
+    def max_marginal_relevance_search(
+        self,
+        query: str,
+        k: int = DEFAULT_K,
+        fetch_k: int = 20,
+        lambda_mult: float = 0.5,
+        filter: Optional[Dict[str, str]] = None,
+        where_document: Optional[Dict[str, str]] = None,
+        **kwargs: Any,
+    ) -> List[Document]:
+        """Return docs selected using the maximal marginal relevance.
+        Maximal marginal relevance optimizes for similarity to query AND diversity
+        among selected documents.
+
+        Args:
+            query: Text to look up documents similar to.
+            k: Number of Documents to return. Defaults to 4.
+            fetch_k: Number of Documents to fetch to pass to MMR algorithm.
+            lambda_mult: Number between 0 and 1 that determines the degree
+                        of diversity among the results with 0 corresponding
+                        to maximum diversity and 1 to minimum diversity.
+                        Defaults to 0.5.
+            filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.
+
+        Returns:
+            List of Documents selected by maximal marginal relevance.
+        """
+        if self._embedding_function is None:
+            raise ValueError(
+                "For MMR search, you must specify an embedding function on" "creation."
+            )
+
+        embedding = self._embedding_function.embed_query(query)
+        docs = self.max_marginal_relevance_search_by_vector(
+            embedding,
+            k,
+            fetch_k,
+            lambda_mult=lambda_mult,
+            filter=filter,
+            where_document=where_document,
+        )
+        return docs
+
+    def delete_collection(self) -> None:
+        """Delete the collection."""
+        self._client.delete_collection(self._collection.name)
+
+    def get(
+        self,
+        ids: Optional[OneOrMany[ID]] = None,
+        where: Optional[Where] = None,
+        limit: Optional[int] = None,
+        offset: Optional[int] = None,
+        where_document: Optional[WhereDocument] = None,
+        include: Optional[List[str]] = None,
+    ) -> Dict[str, Any]:
+        """Gets the collection.
+
+        Args:
+            ids: The ids of the embeddings to get. Optional.
+            where: A Where type dict used to filter results by.
+                   E.g. `{"color" : "red", "price": 4.20}`. Optional.
+            limit: The number of documents to return. Optional.
+            offset: The offset to start returning results from.
+                    Useful for paging results with limit. Optional.
+            where_document: A WhereDocument type dict used to filter by the documents.
+                            E.g. `{$contains: "hello"}`. Optional.
+            include: A list of what to include in the results.
+                     Can contain `"embeddings"`, `"metadatas"`, `"documents"`.
+                     Ids are always included.
+                     Defaults to `["metadatas", "documents"]`. Optional.
+        """
+        kwargs = {
+            "ids": ids,
+            "where": where,
+            "limit": limit,
+            "offset": offset,
+            "where_document": where_document,
+        }
+
+        if include is not None:
+            kwargs["include"] = include
+
+        return self._collection.get(**kwargs)  # type: ignore
+
+    def update_document(self, document_id: str, document: Document) -> None:
+        """Update a document in the collection.
+
+        Args:
+            document_id (str): ID of the document to update.
+            document (Document): Document to update.
+        """
+        return self.update_documents([document_id], [document])
+
+    def update_documents(self, ids: List[str], documents: List[Document]) -> None:  # type: ignore
+        """Update a document in the collection.
+
+        Args:
+            ids (List[str]): List of ids of the document to update.
+            documents (List[Document]): List of documents to update.
+        """
+        text = [document.page_content for document in documents]
+        metadata = [document.metadata for document in documents]
+        if self._embedding_function is None:
+            raise ValueError(
+                "For update, you must specify an embedding function on creation."
+            )
+        embeddings = self._embedding_function.embed_documents(text)
+
+        if hasattr(
+            self._collection._client, "max_batch_size"
+        ):  # for Chroma 0.4.10 and above
+            from chromadb.utils.batch_utils import create_batches
+
+            for batch in create_batches(
+                api=self._collection._client,
+                ids=ids,
+                metadatas=metadata,  # type: ignore
+                documents=text,
+                embeddings=embeddings,  # type: ignore
+            ):
+                self._collection.update(
+                    ids=batch[0],
+                    embeddings=batch[1],
+                    documents=batch[3],
+                    metadatas=batch[2],
+                )
+        else:
+            self._collection.update(
+                ids=ids,
+                embeddings=embeddings,  # type: ignore
+                documents=text,
+                metadatas=metadata,  # type: ignore
+            )
+
+    @classmethod
+    def from_texts(
+        cls: Type[Chroma],
+        texts: List[str],
+        embedding: Optional[Embeddings] = None,
+        metadatas: Optional[List[dict]] = None,
+        ids: Optional[List[str]] = None,
+        collection_name: str = _LANGCHAIN_DEFAULT_COLLECTION_NAME,
+        persist_directory: Optional[str] = None,
+        client_settings: Optional[chromadb.config.Settings] = None,
+        client: Optional[chromadb.ClientAPI] = None,
+        collection_metadata: Optional[Dict] = None,
+        **kwargs: Any,
+    ) -> Chroma:
+        """Create a Chroma vectorstore from a raw documents.
+
+        If a persist_directory is specified, the collection will be persisted there.
+        Otherwise, the data will be ephemeral in-memory.
+
+        Args:
+            texts (List[str]): List of texts to add to the collection.
+            collection_name (str): Name of the collection to create.
+            persist_directory (Optional[str]): Directory to persist the collection.
+            embedding (Optional[Embeddings]): Embedding function. Defaults to None.
+            metadatas (Optional[List[dict]]): List of metadatas. Defaults to None.
+            ids (Optional[List[str]]): List of document IDs. Defaults to None.
+            client_settings (Optional[chromadb.config.Settings]): Chroma client settings
+            collection_metadata (Optional[Dict]): Collection configurations.
+                                                  Defaults to None.
+
+        Returns:
+            Chroma: Chroma vectorstore.
+        """
+        chroma_collection = cls(
+            collection_name=collection_name,
+            embedding_function=embedding,
+            persist_directory=persist_directory,
+            client_settings=client_settings,
+            client=client,
+            collection_metadata=collection_metadata,
+            **kwargs,
+        )
+        if ids is None:
+            ids = [str(uuid.uuid4()) for _ in texts]
+        if hasattr(
+            chroma_collection._client, "max_batch_size"
+        ):  # for Chroma 0.4.10 and above
+            from chromadb.utils.batch_utils import create_batches
+
+            for batch in create_batches(
+                api=chroma_collection._client,
+                ids=ids,
+                metadatas=metadatas,  # type: ignore
+                documents=texts,
+            ):
+                chroma_collection.add_texts(
+                    texts=batch[3] if batch[3] else [],
+                    metadatas=batch[2] if batch[2] else None,  # type: ignore
+                    ids=batch[0],
+                )
+        else:
+            chroma_collection.add_texts(texts=texts, metadatas=metadatas, ids=ids)
+        return chroma_collection
+
+    @classmethod
+    def from_documents(
+        cls: Type[Chroma],
+        documents: List[Document],
+        embedding: Optional[Embeddings] = None,
+        ids: Optional[List[str]] = None,
+        collection_name: str = _LANGCHAIN_DEFAULT_COLLECTION_NAME,
+        persist_directory: Optional[str] = None,
+        client_settings: Optional[chromadb.config.Settings] = None,
+        client: Optional[chromadb.ClientAPI] = None,  # Add this line
+        collection_metadata: Optional[Dict] = None,
+        **kwargs: Any,
+    ) -> Chroma:
+        """Create a Chroma vectorstore from a list of documents.
+
+        If a persist_directory is specified, the collection will be persisted there.
+        Otherwise, the data will be ephemeral in-memory.
+
+        Args:
+            collection_name (str): Name of the collection to create.
+            persist_directory (Optional[str]): Directory to persist the collection.
+            ids (Optional[List[str]]): List of document IDs. Defaults to None.
+            documents (List[Document]): List of documents to add to the vectorstore.
+            embedding (Optional[Embeddings]): Embedding function. Defaults to None.
+            client_settings (Optional[chromadb.config.Settings]): Chroma client settings
+            collection_metadata (Optional[Dict]): Collection configurations.
+                                                  Defaults to None.
+
+        Returns:
+            Chroma: Chroma vectorstore.
+        """
+        texts = [doc.page_content for doc in documents]
+        metadatas = [doc.metadata for doc in documents]
+        return cls.from_texts(
+            texts=texts,
+            embedding=embedding,
+            metadatas=metadatas,
+            ids=ids,
+            collection_name=collection_name,
+            persist_directory=persist_directory,
+            client_settings=client_settings,
+            client=client,
+            collection_metadata=collection_metadata,
+            **kwargs,
+        )
+
+    def delete(self, ids: Optional[List[str]] = None, **kwargs: Any) -> None:
+        """Delete by vector IDs.
+
+        Args:
+            ids: List of ids to delete.
+        """
+        self._collection.delete(ids=ids)
--- a/libs/partners/chroma/poetry.lock
+++ b/libs/partners/chroma/poetry.lock
--- a/libs/partners/chroma/pyproject.toml
+++ b/libs/partners/chroma/pyproject.toml
@ -0,0 +1,100 @@
+[tool.poetry]
+name = "langchain-chroma"
+version = "0.1.0rc0"
+description = "An integration package connecting Chroma and LangChain"
+authors = []
+readme = "README.md"
+repository = "https://github.com/langchain-ai/langchain"
+license = "MIT"
+
+[tool.poetry.urls]
+"Source Code" = "https://github.com/langchain-ai/langchain/tree/master/libs/partners/chroma"
+
+[tool.poetry.dependencies]
+python = ">=3.8.1,<3.13"
+langchain-core = "^0.1.40"
+chromadb = { version = "^0.4.0" }
+numpy = "^1"
+
+[tool.poetry.group.test]
+optional = true
+
+[tool.poetry.group.test.dependencies]
+pytest = "^7.3.0"
+freezegun = "^1.2.2"
+pytest-mock = "^3.10.0"
+syrupy = "^4.0.2"
+pytest-watcher = "^0.3.4"
+pytest-asyncio = "^0.21.1"
+langchain-core = { path = "../../core", develop = true }
+langchain-community = { path = "../../community", develop = true }
+
+[tool.poetry.group.codespell]
+optional = true
+
+[tool.poetry.group.codespell.dependencies]
+codespell = "^2.2.0"
+
+[tool.poetry.group.test_integration]
+optional = true
+
+[tool.poetry.group.test_integration.dependencies]
+langchain-openai = ">=0.0.3,<0.1"
+
+[tool.poetry.group.lint]
+optional = true
+
+[tool.poetry.group.lint.dependencies]
+ruff = "^0.1.5"
+
+[tool.poetry.group.typing.dependencies]
+mypy = "^0.991"
+langchain-core = { path = "../../core", develop = true }
+langchain-community = { path = "../../community", develop = true }
+types-requests = "^2.31.0.20240406"
+
+[tool.poetry.group.dev]
+optional = true
+
+[tool.poetry.group.dev.dependencies]
+langchain-core = { path = "../../core", develop = true }
+langchain-community = { path = "../../community", develop = true }
+
+[tool.ruff]
+select = [
+  "E",    # pycodestyle
+  "F",    # pyflakes
+  "I",    # isort
+  "T201", # print
+
+]
+
+[tool.mypy]
+disallow_untyped_defs = "True"
+
+[tool.coverage.run]
+omit = ["tests/*"]
+
+[build-system]
+requires = ["poetry-core>=1.0.0"]
+build-backend = "poetry.core.masonry.api"
+
+[tool.pytest.ini_options]
+# --strict-markers will raise errors on unknown marks.
+# https://docs.pytest.org/en/7.1.x/how-to/mark.html#raising-errors-on-unknown-marks
+#
+# https://docs.pytest.org/en/7.1.x/reference/reference.html
+# --strict-config       any warnings encountered while parsing the `pytest`
+#                       section of the configuration file raise errors.
+#
+# https://github.com/tophat/syrupy
+# --snapshot-warn-unused    Prints a warning on unused snapshots rather than fail the test suite.
+addopts = " --strict-markers --strict-config --durations=5"
+# Registering custom markers.
+# https://docs.pytest.org/en/7.1.x/example/markers.html#registering-markers
+markers = [
+  "requires: mark tests as requiring a specific library",
+  "asyncio: mark tests as requiring asyncio",
+  "compile: mark placeholder test used to compile integration tests without running them",
+]
+#asyncio_mode = "auto"
--- a/libs/partners/chroma/scripts/check_imports.py
+++ b/libs/partners/chroma/scripts/check_imports.py
@ -0,0 +1,17 @@
+import sys
+import traceback
+from importlib.machinery import SourceFileLoader
+
+if __name__ == "__main__":
+    files = sys.argv[1:]
+    has_failure = False
+    for file in files:
+        try:
+            SourceFileLoader("x", file).load_module()
+        except Exception:
+            has_faillure = True
+            print(file)  # noqa: T201
+            traceback.print_exc()
+            print()  # noqa: T201
+
+    sys.exit(1 if has_failure else 0)
--- a/libs/partners/chroma/scripts/check_pydantic.sh
+++ b/libs/partners/chroma/scripts/check_pydantic.sh
@ -0,0 +1,27 @@
+#!/bin/bash
+#
+# This script searches for lines starting with "import pydantic" or "from pydantic"
+# in tracked files within a Git repository.
+#
+# Usage: ./scripts/check_pydantic.sh /path/to/repository
+
+# Check if a path argument is provided
+if [ $# -ne 1 ]; then
+  echo "Usage: $0 /path/to/repository"
+  exit 1
+fi
+
+repository_path="$1"
+
+# Search for lines matching the pattern within the specified repository
+result=$(git -C "$repository_path" grep -E '^import pydantic|^from pydantic')
+
+# Check if any matching lines were found
+if [ -n "$result" ]; then
+  echo "ERROR: The following lines need to be updated:"
+  echo "$result"
+  echo "Please replace the code with an import from langchain_core.pydantic_v1."
+  echo "For example, replace 'from pydantic import BaseModel'"
+  echo "with 'from langchain_core.pydantic_v1 import BaseModel'"
+  exit 1
+fi
--- a/libs/partners/chroma/scripts/lint_imports.sh
+++ b/libs/partners/chroma/scripts/lint_imports.sh
@ -0,0 +1,17 @@
+#!/bin/bash
+
+set -eu
+
+# Initialize a variable to keep track of errors
+errors=0
+
+# make sure not importing from langchain or langchain_experimental
+git --no-pager grep '^from langchain\.' . && errors=$((errors+1))
+git --no-pager grep '^from langchain_experimental\.' . && errors=$((errors+1))
+
+# Decide on an exit status based on the errors
+if [ "$errors" -gt 0 ]; then
+    exit 1
+else
+    exit 0
+fi
--- a/libs/partners/chroma/tests/init.py
+++ b/libs/partners/chroma/tests/init.py
--- a/libs/partners/chroma/tests/integration_tests/init.py
+++ b/libs/partners/chroma/tests/integration_tests/init.py
--- a/libs/partners/chroma/tests/integration_tests/fake_embeddings.py
+++ b/libs/partners/chroma/tests/integration_tests/fake_embeddings.py
@ -0,0 +1,82 @@
+"""Fake Embedding class for testing purposes."""
+
+import math
+from typing import List
+
+from langchain_core.embeddings import Embeddings
+
+fake_texts = ["foo", "bar", "baz"]
+
+
+class FakeEmbeddings(Embeddings):
+    """Fake embeddings functionality for testing."""
+
+    def embed_documents(self, texts: List[str]) -> List[List[float]]:
+        """Return simple embeddings.
+        Embeddings encode each text as its index."""
+        return [[float(1.0)] * 9 + [float(i)] for i in range(len(texts))]
+
+    async def aembed_documents(self, texts: List[str]) -> List[List[float]]:
+        return self.embed_documents(texts)
+
+    def embed_query(self, text: str) -> List[float]:
+        """Return constant query embeddings.
+        Embeddings are identical to embed_documents(texts)[0].
+        Distance to each text will be that text's index,
+        as it was passed to embed_documents."""
+        return [float(1.0)] * 9 + [float(0.0)]
+
+    async def aembed_query(self, text: str) -> List[float]:
+        return self.embed_query(text)
+
+
+class ConsistentFakeEmbeddings(FakeEmbeddings):
+    """Fake embeddings which remember all the texts seen so far to return consistent
+    vectors for the same texts."""
+
+    def __init__(self, dimensionality: int = 10) -> None:
+        self.known_texts: List[str] = []
+        self.dimensionality = dimensionality
+
+    def embed_documents(self, texts: List[str]) -> List[List[float]]:
+        """Return consistent embeddings for each text seen so far."""
+        out_vectors = []
+        for text in texts:
+            if text not in self.known_texts:
+                self.known_texts.append(text)
+            vector = [float(1.0)] * (self.dimensionality - 1) + [
+                float(self.known_texts.index(text))
+            ]
+            out_vectors.append(vector)
+        return out_vectors
+
+    def embed_query(self, text: str) -> List[float]:
+        """Return consistent embeddings for the text, if seen before, or a constant
+        one if the text is unknown."""
+        return self.embed_documents([text])[0]
+
+
+class AngularTwoDimensionalEmbeddings(Embeddings):
+    """
+    From angles (as strings in units of pi) to unit embedding vectors on a circle.
+    """
+
+    def embed_documents(self, texts: List[str]) -> List[List[float]]:
+        """
+        Make a list of texts into a list of embedding vectors.
+        """
+        return [self.embed_query(text) for text in texts]
+
+    def embed_query(self, text: str) -> List[float]:
+        """
+        Convert input text to a 'vector' (list of floats).
+        If the text is a number, use it as the angle for the
+        unit vector in units of pi.
+        Any other input text becomes the singular result [0, 0] !
+        """
+        try:
+            angle = float(text)
+            return [math.cos(angle * math.pi), math.sin(angle * math.pi)]
+        except ValueError:
+            # Assume: just test string, no attention is paid to values.
+            return [0.0, 0.0]
--- a/libs/partners/chroma/tests/integration_tests/test_compile.py
+++ b/libs/partners/chroma/tests/integration_tests/test_compile.py
@ -0,0 +1,7 @@
+import pytest
+
+
+@pytest.mark.compile
+def test_placeholder() -> None:
+    """Used for compiling integration tests without running any real tests."""
+    pass
--- a/libs/partners/chroma/tests/integration_tests/test_vectorstores.py
+++ b/libs/partners/chroma/tests/integration_tests/test_vectorstores.py
@ -0,0 +1,437 @@
+"""Test Chroma functionality."""
+
+import uuid
+
+import chromadb
+import pytest
+import requests
+from langchain_core.documents import Document
+from langchain_core.embeddings.fake import FakeEmbeddings as Fak
+
+from langchain_chroma.vectorstores import Chroma
+from tests.integration_tests.fake_embeddings import (
+    ConsistentFakeEmbeddings,
+    FakeEmbeddings,
+)
+
+
+def test_chroma() -> None:
+    """Test end to end construction and search."""
+    texts = ["foo", "bar", "baz"]
+    docsearch = Chroma.from_texts(
+        collection_name="test_collection", texts=texts, embedding=FakeEmbeddings()
+    )
+    output = docsearch.similarity_search("foo", k=1)
+
+    docsearch.delete_collection()
+
+    assert output == [Document(page_content="foo")]
+
+
+async def test_chroma_async() -> None:
+    """Test end to end construction and search."""
+    texts = ["foo", "bar", "baz"]
+    docsearch = Chroma.from_texts(
+        collection_name="test_collection", texts=texts, embedding=FakeEmbeddings()
+    )
+    output = await docsearch.asimilarity_search("foo", k=1)
+
+    docsearch.delete_collection()
+    assert output == [Document(page_content="foo")]
+
+
+def test_chroma_with_metadatas() -> None:
+    """Test end to end construction and search."""
+    texts = ["foo", "bar", "baz"]
+    metadatas = [{"page": str(i)} for i in range(len(texts))]
+    docsearch = Chroma.from_texts(
+        collection_name="test_collection",
+        texts=texts,
+        embedding=FakeEmbeddings(),
+        metadatas=metadatas,
+    )
+    output = docsearch.similarity_search("foo", k=1)
+    docsearch.delete_collection()
+    assert output == [Document(page_content="foo", metadata={"page": "0"})]
+
+
+def test_chroma_with_metadatas_with_scores() -> None:
+    """Test end to end construction and scored search."""
+    texts = ["foo", "bar", "baz"]
+    metadatas = [{"page": str(i)} for i in range(len(texts))]
+    docsearch = Chroma.from_texts(
+        collection_name="test_collection",
+        texts=texts,
+        embedding=FakeEmbeddings(),
+        metadatas=metadatas,
+    )
+    output = docsearch.similarity_search_with_score("foo", k=1)
+    docsearch.delete_collection()
+    assert output == [(Document(page_content="foo", metadata={"page": "0"}), 0.0)]
+
+
+def test_chroma_with_metadatas_with_scores_using_vector() -> None:
+    """Test end to end construction and scored search, using embedding vector."""
+    texts = ["foo", "bar", "baz"]
+    metadatas = [{"page": str(i)} for i in range(len(texts))]
+    embeddings = FakeEmbeddings()
+
+    docsearch = Chroma.from_texts(
+        collection_name="test_collection",
+        texts=texts,
+        embedding=embeddings,
+        metadatas=metadatas,
+    )
+    embedded_query = embeddings.embed_query("foo")
+    output = docsearch.similarity_search_by_vector_with_relevance_scores(
+        embedding=embedded_query, k=1
+    )
+    docsearch.delete_collection()
+    assert output == [(Document(page_content="foo", metadata={"page": "0"}), 0.0)]
+
+
+def test_chroma_search_filter() -> None:
+    """Test end to end construction and search with metadata filtering."""
+    texts = ["far", "bar", "baz"]
+    metadatas = [{"first_letter": "{}".format(text[0])} for text in texts]
+    docsearch = Chroma.from_texts(
+        collection_name="test_collection",
+        texts=texts,
+        embedding=FakeEmbeddings(),
+        metadatas=metadatas,
+    )
+    output1 = docsearch.similarity_search("far", k=1, filter={"first_letter": "f"})
+    output2 = docsearch.similarity_search("far", k=1, filter={"first_letter": "b"})
+    docsearch.delete_collection()
+    assert output1 == [Document(page_content="far", metadata={"first_letter": "f"})]
+    assert output2 == [Document(page_content="bar", metadata={"first_letter": "b"})]
+
+
+def test_chroma_search_filter_with_scores() -> None:
+    """Test end to end construction and scored search with metadata filtering."""
+    texts = ["far", "bar", "baz"]
+    metadatas = [{"first_letter": "{}".format(text[0])} for text in texts]
+    docsearch = Chroma.from_texts(
+        collection_name="test_collection",
+        texts=texts,
+        embedding=FakeEmbeddings(),
+        metadatas=metadatas,
+    )
+    output1 = docsearch.similarity_search_with_score(
+        "far", k=1, filter={"first_letter": "f"}
+    )
+    output2 = docsearch.similarity_search_with_score(
+        "far", k=1, filter={"first_letter": "b"}
+    )
+    docsearch.delete_collection()
+    assert output1 == [
+        (Document(page_content="far", metadata={"first_letter": "f"}), 0.0)
+    ]
+    assert output2 == [
+        (Document(page_content="bar", metadata={"first_letter": "b"}), 1.0)
+    ]
+
+
+def test_chroma_with_persistence() -> None:
+    """Test end to end construction and search, with persistence."""
+    chroma_persist_dir = "./tests/persist_dir"
+    collection_name = "test_collection"
+    texts = ["foo", "bar", "baz"]
+    docsearch = Chroma.from_texts(
+        collection_name=collection_name,
+        texts=texts,
+        embedding=FakeEmbeddings(),
+        persist_directory=chroma_persist_dir,
+    )
+
+    output = docsearch.similarity_search("foo", k=1)
+    assert output == [Document(page_content="foo")]
+
+    # Get a new VectorStore from the persisted directory
+    docsearch = Chroma(
+        collection_name=collection_name,
+        embedding_function=FakeEmbeddings(),
+        persist_directory=chroma_persist_dir,
+    )
+    output = docsearch.similarity_search("foo", k=1)
+
+    # Clean up
+    docsearch.delete_collection()
+
+    # Persist doesn't need to be called again
+    # Data will be automatically persisted on object deletion
+    # Or on program exit
+
+
+def test_chroma_mmr() -> None:
+    """Test end to end construction and search."""
+    texts = ["foo", "bar", "baz"]
+    docsearch = Chroma.from_texts(
+        collection_name="test_collection", texts=texts, embedding=FakeEmbeddings()
+    )
+    output = docsearch.max_marginal_relevance_search("foo", k=1)
+    docsearch.delete_collection()
+    assert output == [Document(page_content="foo")]
+
+
+def test_chroma_mmr_by_vector() -> None:
+    """Test end to end construction and search."""
+    texts = ["foo", "bar", "baz"]
+    embeddings = FakeEmbeddings()
+    docsearch = Chroma.from_texts(
+        collection_name="test_collection", texts=texts, embedding=embeddings
+    )
+    embedded_query = embeddings.embed_query("foo")
+    output = docsearch.max_marginal_relevance_search_by_vector(embedded_query, k=1)
+    docsearch.delete_collection()
+    assert output == [Document(page_content="foo")]
+
+
+def test_chroma_with_include_parameter() -> None:
+    """Test end to end construction and include parameter."""
+    texts = ["foo", "bar", "baz"]
+    docsearch = Chroma.from_texts(
+        collection_name="test_collection", texts=texts, embedding=FakeEmbeddings()
+    )
+    output1 = docsearch.get(include=["embeddings"])
+    output2 = docsearch.get()
+    docsearch.delete_collection()
+    assert output1["embeddings"] is not None
+    assert output2["embeddings"] is None
+
+
+def test_chroma_update_document() -> None:
+    """Test the update_document function in the Chroma class."""
+    # Make a consistent embedding
+    embedding = ConsistentFakeEmbeddings()
+
+    # Initial document content and id
+    initial_content = "foo"
+    document_id = "doc1"
+
+    # Create an instance of Document with initial content and metadata
+    original_doc = Document(page_content=initial_content, metadata={"page": "0"})
+
+    # Initialize a Chroma instance with the original document
+    docsearch = Chroma.from_documents(
+        collection_name="test_collection",
+        documents=[original_doc],
+        embedding=embedding,
+        ids=[document_id],
+    )
+    old_embedding = docsearch._collection.peek()["embeddings"][  # type: ignore
+        docsearch._collection.peek()["ids"].index(document_id)
+    ]
+
+    # Define updated content for the document
+    updated_content = "updated foo"
+
+    # Create a new Document instance with the updated content and the same id
+    updated_doc = Document(page_content=updated_content, metadata={"page": "0"})
+
+    # Update the document in the Chroma instance
+    docsearch.update_document(document_id=document_id, document=updated_doc)
+
+    # Perform a similarity search with the updated content
+    output = docsearch.similarity_search(updated_content, k=1)
+
+    # Assert that the new embedding is correct
+    new_embedding = docsearch._collection.peek()["embeddings"][  # type: ignore
+        docsearch._collection.peek()["ids"].index(document_id)
+    ]
+
+    docsearch.delete_collection()
+
+    # Assert that the updated document is returned by the search
+    assert output == [Document(page_content=updated_content, metadata={"page": "0"})]
+
+    assert new_embedding == embedding.embed_documents([updated_content])[0]
+    assert new_embedding != old_embedding
+
+
+# TODO: RELEVANCE SCORE IS BROKEN. FIX TEST
+def test_chroma_with_relevance_score() -> None:
+    """Test to make sure the relevance score is scaled to 0-1."""
+    texts = ["foo", "bar", "baz"]
+    metadatas = [{"page": str(i)} for i in range(len(texts))]
+    docsearch = Chroma.from_texts(
+        collection_name="test_collection",
+        texts=texts,
+        embedding=FakeEmbeddings(),
+        metadatas=metadatas,
+        collection_metadata={"hnsw:space": "l2"},
+    )
+    output = docsearch.similarity_search_with_relevance_scores("foo", k=3)
+    docsearch.delete_collection()
+    assert output == [
+        (Document(page_content="foo", metadata={"page": "0"}), 1.0),
+        (Document(page_content="bar", metadata={"page": "1"}), 0.8),
+        (Document(page_content="baz", metadata={"page": "2"}), 0.5),
+    ]
+
+
+# TODO: RELEVANCE SCORE IS BROKEN. FIX TEST
+def test_chroma_with_relevance_score_custom_normalization_fn() -> None:
+    """Test searching with relevance score and custom normalization function."""
+    texts = ["foo", "bar", "baz"]
+    metadatas = [{"page": str(i)} for i in range(len(texts))]
+    docsearch = Chroma.from_texts(
+        collection_name="test1_collection",
+        texts=texts,
+        embedding=FakeEmbeddings(),
+        metadatas=metadatas,
+        relevance_score_fn=lambda d: d * 0,
+        collection_metadata={"hnsw:space": "l2"},
+    )
+    output = docsearch.similarity_search_with_relevance_scores("foo", k=3)
+    docsearch.delete_collection()
+    assert output == [
+        (Document(page_content="foo", metadata={"page": "0"}), 0.0),
+        (Document(page_content="bar", metadata={"page": "1"}), 0.0),
+        (Document(page_content="baz", metadata={"page": "2"}), 0.0),
+    ]
+
+
+def test_init_from_client() -> None:
+    import chromadb
+
+    client = chromadb.Client(chromadb.config.Settings())
+    Chroma(client=client)
+
+
+def test_init_from_client_settings() -> None:
+    import chromadb
+
+    client_settings = chromadb.config.Settings()
+    Chroma(client_settings=client_settings)
+
+
+def test_chroma_add_documents_no_metadata() -> None:
+    db = Chroma(embedding_function=FakeEmbeddings())
+    db.add_documents([Document(page_content="foo")])
+
+    db.delete_collection()
+
+
+def test_chroma_add_documents_mixed_metadata() -> None:
+    db = Chroma(embedding_function=FakeEmbeddings())
+    docs = [
+        Document(page_content="foo"),
+        Document(page_content="bar", metadata={"baz": 1}),
+    ]
+    ids = ["0", "1"]
+    actual_ids = db.add_documents(docs, ids=ids)
+    search = db.similarity_search("foo bar")
+    db.delete_collection()
+
+    assert actual_ids == ids
+    assert sorted(search, key=lambda d: d.page_content) == sorted(
+        docs, key=lambda d: d.page_content
+    )
+
+
+def is_api_accessible(url: str) -> bool:
+    try:
+        response = requests.get(url)
+        return response.status_code == 200
+    except Exception:
+        return False
+
+
+def batch_support_chroma_version() -> bool:
+    major, minor, patch = chromadb.__version__.split(".")
+    if int(major) == 0 and int(minor) >= 4 and int(patch) >= 10:
+        return True
+    return False
+
+
+@pytest.mark.requires("chromadb")
+@pytest.mark.skipif(
+    not is_api_accessible("http://localhost:8000/api/v1/heartbeat"),
+    reason="API not accessible",
+)
+@pytest.mark.skipif(
+    not batch_support_chroma_version(),
+    reason="ChromaDB version does not support batching",
+)
+def test_chroma_large_batch() -> None:
+    client = chromadb.HttpClient()
+    embedding_function = Fak(size=255)
+    col = client.get_or_create_collection(
+        "my_collection",
+        embedding_function=embedding_function.embed_documents,  # type: ignore
+    )
+    docs = ["This is a test document"] * (client.max_batch_size + 100)
+    db = Chroma.from_texts(
+        client=client,
+        collection_name=col.name,
+        texts=docs,
+        embedding=embedding_function,
+        ids=[str(uuid.uuid4()) for _ in range(len(docs))],
+    )
+
+    db.delete_collection()
+
+
+@pytest.mark.requires("chromadb")
+@pytest.mark.skipif(
+    not is_api_accessible("http://localhost:8000/api/v1/heartbeat"),
+    reason="API not accessible",
+)
+@pytest.mark.skipif(
+    not batch_support_chroma_version(),
+    reason="ChromaDB version does not support batching",
+)
+def test_chroma_large_batch_update() -> None:
+    client = chromadb.HttpClient()
+    embedding_function = Fak(size=255)
+    col = client.get_or_create_collection(
+        "my_collection",
+        embedding_function=embedding_function.embed_documents,  # type: ignore
+    )
+    docs = ["This is a test document"] * (client.max_batch_size + 100)
+    ids = [str(uuid.uuid4()) for _ in range(len(docs))]
+    db = Chroma.from_texts(
+        client=client,
+        collection_name=col.name,
+        texts=docs,
+        embedding=embedding_function,
+        ids=ids,
+    )
+    new_docs = [
+        Document(
+            page_content="This is a new test document", metadata={"doc_id": f"{i}"}
+        )
+        for i in range(len(docs) - 10)
+    ]
+    new_ids = [_id for _id in ids[: len(new_docs)]]
+    db.update_documents(ids=new_ids, documents=new_docs)
+
+    db.delete_collection()
+
+
+@pytest.mark.requires("chromadb")
+@pytest.mark.skipif(
+    not is_api_accessible("http://localhost:8000/api/v1/heartbeat"),
+    reason="API not accessible",
+)
+@pytest.mark.skipif(
+    batch_support_chroma_version(), reason="ChromaDB version does not support batching"
+)
+def test_chroma_legacy_batching() -> None:
+    client = chromadb.HttpClient()
+    embedding_function = Fak(size=255)
+    col = client.get_or_create_collection(
+        "my_collection",
+        embedding_function=embedding_function.embed_documents,  # type: ignore
+    )
+    docs = ["This is a test document"] * 100
+    db = Chroma.from_texts(
+        client=client,
+        collection_name=col.name,
+        texts=docs,
+        embedding=embedding_function,
+        ids=[str(uuid.uuid4()) for _ in range(len(docs))],
+    )
+
+    db.delete_collection()
--- a/libs/partners/chroma/tests/unit_tests/init.py
+++ b/libs/partners/chroma/tests/unit_tests/init.py
--- a/libs/partners/chroma/tests/unit_tests/test_imports.py
+++ b/libs/partners/chroma/tests/unit_tests/test_imports.py
@ -0,0 +1,9 @@
+from langchain_chroma import __all__
+
+EXPECTED_ALL = [
+    "Chroma",
+]
+
+
+def test_all_imports() -> None:
+    assert sorted(EXPECTED_ALL) == sorted(__all__)
--- a/libs/partners/chroma/tests/unit_tests/test_vectorstores.py
+++ b/libs/partners/chroma/tests/unit_tests/test_vectorstores.py
@ -0,0 +1,15 @@
+from langchain_core.embeddings.fake import (
+    FakeEmbeddings,
+)
+
+from langchain_chroma.vectorstores import Chroma
+
+
+def test_initialization() -> None:
+    """Test integration vectorstore initialization."""
+    texts = ["foo", "bar", "baz"]
+    Chroma.from_texts(
+        collection_name="test_collection",
+        texts=texts,
+        embedding=FakeEmbeddings(size=10),
+    )