FEAT: Merge TileDB vecstore (#12811)

9 months ago · 658a3a8607
parent c04647bb4e a2bb0dd445
commit 658a3a8607
6 changed files with 1336 additions and 0 deletions
--- a/docs/docs/integrations/vectorstores/tiledb.ipynb
+++ b/docs/docs/integrations/vectorstores/tiledb.ipynb
@ -0,0 +1,178 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "25bce5eb-8599-40fe-947e-4932cfae8184",
+   "metadata": {},
+   "source": [
+    "# TileDB\n",
+    "\n",
+    "> [TileDB](https://github.com/TileDB-Inc/TileDB) is a powerful engine for indexing and querying dense and sparse multi-dimensional arrays.\n",
+    "\n",
+    "> TileDB offers ANN search capabilities using the [TileDB-Vector-Search](https://github.com/TileDB-Inc/TileDB-Vector-Search) module. It provides serverless execution of ANN queries and storage of vector indexes both on local disk and cloud object stores (i.e. AWS S3).\n",
+    "\n",
+    "More details in:\n",
+    "-  [Why TileDB as a Vector Database](https://tiledb.com/blog/why-tiledb-as-a-vector-database)\n",
+    "-  [TileDB 101: Vector Search](https://tiledb.com/blog/tiledb-101-vector-search)\n",
+    "\n",
+    "This notebook shows how to use the `TileDB` vector database."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f45f46f2-7229-4859-9797-30bbead1b8e0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!pip install tiledb-vector-search"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2f65caa9-8383-409a-bccb-6e91fc8d5e8f",
+   "metadata": {},
+   "source": [
+    "## Basic Example"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c96d4fe0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain.document_loaders import TextLoader\n",
+    "from langchain.embeddings import HuggingFaceEmbeddings\n",
+    "from langchain.text_splitter import CharacterTextSplitter\n",
+    "from langchain.vectorstores import TileDB\n",
+    "\n",
+    "raw_documents = TextLoader(\"../../modules/state_of_the_union.txt\").load()\n",
+    "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
+    "documents = text_splitter.split_documents(raw_documents)\n",
+    "embeddings = HuggingFaceEmbeddings()\n",
+    "db = TileDB.from_documents(\n",
+    "    documents, embeddings, index_uri=\"/tmp/tiledb_index\", index_type=\"FLAT\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b0a6797c-2bb0-45db-a636-5d2437f7a4c0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "query = \"What did the president say about Ketanji Brown Jackson\"\n",
+    "docs = db.similarity_search(query)\n",
+    "docs[0].page_content"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c4c4e06d-6def-44ce-ac9a-4c01673c29a2",
+   "metadata": {},
+   "source": [
+    "### Similarity search by vector"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1eb72610-d451-4158-880c-9f0d45fa5909",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "embedding_vector = embeddings.embed_query(query)\n",
+    "docs = db.similarity_search_by_vector(embedding_vector)\n",
+    "docs[0].page_content"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d33588d4-67c2-4bd3-b251-76ae783cbafb",
+   "metadata": {},
+   "source": [
+    "### Similarity search with score"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1a41e382-0336-4e6d-b2ef-44cc77db2696",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "docs_and_scores = db.similarity_search_with_score(query)\n",
+    "docs_and_scores[0]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "57f930f2-41a0-4795-ad9e-44a33c8f88ec",
+   "metadata": {},
+   "source": [
+    "## Maximal Marginal Relevance Search (MMR)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4790e437-3207-45cb-b121-d857ab5aabd8",
+   "metadata": {},
+   "source": [
+    "In addition to using similarity search in the retriever object, you can also use `mmr` as retriever."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "495754b1-5cdb-4af6-9733-f68700bb7232",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "retriever = db.as_retriever(search_type=\"mmr\")\n",
+    "retriever.get_relevant_documents(query)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e213d957-e439-4bd6-90f2-8909323f5f09",
+   "metadata": {},
+   "source": [
+    "Or use `max_marginal_relevance_search` directly:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "99d928d0-3b79-4588-925e-32230e12af47",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "db.max_marginal_relevance_search(query, k=2, fetch_k=10)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.18"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/libs/langchain/langchain/vectorstores/init.py
+++ b/libs/langchain/langchain/vectorstores/init.py
@ -350,6 +350,12 @@ def _import_tencentvectordb() -> Any:
    return TencentVectorDB


+def _import_tiledb() -> Any:
+    from langchain.vectorstores.tiledb import TileDB
+
+    return TileDB
+
+
 def _import_tigris() -> Any:
    from langchain.vectorstores.tigris import Tigris

@ -517,6 +523,8 @@ def __getattr__(name: str) -> Any:
        return _import_tair()
    elif name == "TencentVectorDB":
        return _import_tencentvectordb()
+    elif name == "TileDB":
+        return _import_tiledb()
    elif name == "Tigris":
        return _import_tigris()
    elif name == "TimescaleVector":
@ -594,6 +602,7 @@ __all__ = [
    "StarRocks",
    "SupabaseVectorStore",
    "Tair",
+    "TileDB",
    "Tigris",
    "TimescaleVector",
    "Typesense",
--- a/libs/langchain/langchain/vectorstores/tiledb.py
+++ b/libs/langchain/langchain/vectorstores/tiledb.py
@ -0,0 +1,789 @@
+"""Wrapper around TileDB vector database."""
+from __future__ import annotations
+
+import pickle
+import random
+import sys
+from typing import Any, Dict, Iterable, List, Mapping, Optional, Tuple
+
+import numpy as np
+
+from langchain.docstore.document import Document
+from langchain.schema.embeddings import Embeddings
+from langchain.schema.vectorstore import VectorStore
+from langchain.vectorstores.utils import maximal_marginal_relevance
+
+INDEX_METRICS = frozenset(["euclidean"])
+DEFAULT_METRIC = "euclidean"
+DOCUMENTS_ARRAY_NAME = "documents"
+VECTOR_INDEX_NAME = "vectors"
+MAX_UINT64 = np.iinfo(np.dtype("uint64")).max
+MAX_FLOAT_32 = np.finfo(np.dtype("float32")).max
+MAX_FLOAT = sys.float_info.max
+
+
+def dependable_tiledb_import() -> Any:
+    """Import tiledb-vector-search if available, otherwise raise error."""
+    try:
+        import tiledb as tiledb
+        import tiledb.vector_search as tiledb_vs
+    except ImportError:
+        raise ValueError(
+            "Could not import tiledb-vector-search python package. "
+            "Please install it with `conda install -c tiledb tiledb-vector-search` "
+            "or `pip install tiledb-vector-search`"
+        )
+    return tiledb_vs, tiledb
+
+
+def get_vector_index_uri_from_group(group: Any) -> str:
+    return group[VECTOR_INDEX_NAME].uri
+
+
+def get_documents_array_uri_from_group(group: Any) -> str:
+    return group[DOCUMENTS_ARRAY_NAME].uri
+
+
+def get_vector_index_uri(uri: str) -> str:
+    return f"{uri}/{VECTOR_INDEX_NAME}"
+
+
+def get_documents_array_uri(uri: str) -> str:
+    return f"{uri}/{DOCUMENTS_ARRAY_NAME}"
+
+
+class TileDB(VectorStore):
+    """Wrapper around TileDB vector database.
+
+    To use, you should have the ``tiledb-vector-search`` python package installed.
+
+    Example:
+        .. code-block:: python
+
+            from langchain import TileDB
+            embeddings = OpenAIEmbeddings()
+            db = TileDB(embeddings, index_uri, metric)
+
+    """
+
+    def __init__(
+        self,
+        embedding: Embeddings,
+        index_uri: str,
+        metric: str,
+        *,
+        vector_index_uri: str = "",
+        docs_array_uri: str = "",
+        config: Optional[Mapping[str, Any]] = None,
+        timestamp: Any = None,
+        **kwargs: Any,
+    ):
+        """Initialize with necessary components."""
+        self.embedding = embedding
+        self.embedding_function = embedding.embed_query
+        self.index_uri = index_uri
+        self.metric = metric
+        self.config = config
+
+        tiledb_vs, tiledb = dependable_tiledb_import()
+        with tiledb.scope_ctx(ctx_or_config=config):
+            index_group = tiledb.Group(self.index_uri, "r")
+            self.vector_index_uri = (
+                vector_index_uri
+                if vector_index_uri != ""
+                else get_vector_index_uri_from_group(index_group)
+            )
+            self.docs_array_uri = (
+                docs_array_uri
+                if docs_array_uri != ""
+                else get_documents_array_uri_from_group(index_group)
+            )
+            index_group.close()
+            group = tiledb.Group(self.vector_index_uri, "r")
+            self.index_type = group.meta.get("index_type")
+            group.close()
+            self.timestamp = timestamp
+            if self.index_type == "FLAT":
+                self.vector_index = tiledb_vs.flat_index.FlatIndex(
+                    uri=self.vector_index_uri,
+                    config=self.config,
+                    timestamp=self.timestamp,
+                    **kwargs,
+                )
+            elif self.index_type == "IVF_FLAT":
+                self.vector_index = tiledb_vs.ivf_flat_index.IVFFlatIndex(
+                    uri=self.vector_index_uri,
+                    config=self.config,
+                    timestamp=self.timestamp,
+                    **kwargs,
+                )
+
+    @property
+    def embeddings(self) -> Optional[Embeddings]:
+        return self.embedding
+
+    def process_index_results(
+        self,
+        ids: List[int],
+        scores: List[float],
+        *,
+        k: int = 4,
+        filter: Optional[Dict[str, Any]] = None,
+        score_threshold: float = MAX_FLOAT,
+    ) -> List[Tuple[Document, float]]:
+        """Turns TileDB results into a list of documents and scores.
+
+        Args:
+            ids: List of indices of the documents in the index.
+            scores: List of distances of the documents in the index.
+            k: Number of Documents to return. Defaults to 4.
+            filter (Optional[Dict[str, Any]]): Filter by metadata. Defaults to None.
+            score_threshold: Optional, a floating point value to filter the
+                resulting set of retrieved docs
+        Returns:
+            List of Documents and scores.
+        """
+        tiledb_vs, tiledb = dependable_tiledb_import()
+        docs = []
+        docs_array = tiledb.open(
+            self.docs_array_uri, "r", timestamp=self.timestamp, config=self.config
+        )
+        for idx, score in zip(ids, scores):
+            if idx == 0 and score == 0:
+                continue
+            if idx == MAX_UINT64 and score == MAX_FLOAT_32:
+                continue
+            doc = docs_array[idx]
+            if doc is None or len(doc["text"]) == 0:
+                raise ValueError(f"Could not find document for id {idx}, got {doc}")
+            pickled_metadata = doc.get("metadata")
+            result_doc = Document(page_content=str(doc["text"][0]))
+            if pickled_metadata is not None:
+                metadata = pickle.loads(
+                    np.array(pickled_metadata.tolist()).astype(np.uint8).tobytes()
+                )
+                result_doc.metadata = metadata
+            if filter is not None:
+                filter = {
+                    key: [value] if not isinstance(value, list) else value
+                    for key, value in filter.items()
+                }
+                if all(
+                    result_doc.metadata.get(key) in value
+                    for key, value in filter.items()
+                ):
+                    docs.append((result_doc, score))
+            else:
+                docs.append((result_doc, score))
+        docs_array.close()
+        docs = [(doc, score) for doc, score in docs if score <= score_threshold]
+        return docs[:k]
+
+    def similarity_search_with_score_by_vector(
+        self,
+        embedding: List[float],
+        *,
+        k: int = 4,
+        filter: Optional[Dict[str, Any]] = None,
+        fetch_k: int = 20,
+        **kwargs: Any,
+    ) -> List[Tuple[Document, float]]:
+        """Return docs most similar to query.
+
+        Args:
+            embedding: Embedding vector to look up documents similar to.
+            k: Number of Documents to return. Defaults to 4.
+            filter (Optional[Dict[str, Any]]): Filter by metadata. Defaults to None.
+            fetch_k: (Optional[int]) Number of Documents to fetch before filtering.
+                      Defaults to 20.
+            **kwargs: kwargs to be passed to similarity search. Can include:
+                nprobe: Optional, number of partitions to check if using IVF_FLAT index
+                score_threshold: Optional, a floating point value to filter the
+                    resulting set of retrieved docs
+
+        Returns:
+            List of documents most similar to the query text and distance
+            in float for each. Lower score represents more similarity.
+        """
+        if "score_threshold" in kwargs:
+            score_threshold = kwargs.pop("score_threshold")
+        else:
+            score_threshold = MAX_FLOAT
+        d, i = self.vector_index.query(
+            np.array([np.array(embedding).astype(np.float32)]).astype(np.float32),
+            k=k if filter is None else fetch_k,
+            **kwargs,
+        )
+        return self.process_index_results(
+            ids=i[0], scores=d[0], filter=filter, k=k, score_threshold=score_threshold
+        )
+
+    def similarity_search_with_score(
+        self,
+        query: str,
+        *,
+        k: int = 4,
+        filter: Optional[Dict[str, Any]] = None,
+        fetch_k: int = 20,
+        **kwargs: Any,
+    ) -> List[Tuple[Document, float]]:
+        """Return docs most similar to query.
+
+        Args:
+            query: Text to look up documents similar to.
+            k: Number of Documents to return. Defaults to 4.
+            filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.
+            fetch_k: (Optional[int]) Number of Documents to fetch before filtering.
+                      Defaults to 20.
+
+        Returns:
+            List of documents most similar to the query text with
+            Distance as float. Lower score represents more similarity.
+        """
+        embedding = self.embedding_function(query)
+        docs = self.similarity_search_with_score_by_vector(
+            embedding,
+            k=k,
+            filter=filter,
+            fetch_k=fetch_k,
+            **kwargs,
+        )
+        return docs
+
+    def similarity_search_by_vector(
+        self,
+        embedding: List[float],
+        k: int = 4,
+        filter: Optional[Dict[str, Any]] = None,
+        fetch_k: int = 20,
+        **kwargs: Any,
+    ) -> List[Document]:
+        """Return docs most similar to embedding vector.
+
+        Args:
+            embedding: Embedding to look up documents similar to.
+            k: Number of Documents to return. Defaults to 4.
+            filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.
+            fetch_k: (Optional[int]) Number of Documents to fetch before filtering.
+                      Defaults to 20.
+
+        Returns:
+            List of Documents most similar to the embedding.
+        """
+        docs_and_scores = self.similarity_search_with_score_by_vector(
+            embedding,
+            k=k,
+            filter=filter,
+            fetch_k=fetch_k,
+            **kwargs,
+        )
+        return [doc for doc, _ in docs_and_scores]
+
+    def similarity_search(
+        self,
+        query: str,
+        k: int = 4,
+        filter: Optional[Dict[str, Any]] = None,
+        fetch_k: int = 20,
+        **kwargs: Any,
+    ) -> List[Document]:
+        """Return docs most similar to query.
+
+        Args:
+            query: Text to look up documents similar to.
+            k: Number of Documents to return. Defaults to 4.
+            filter: (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.
+            fetch_k: (Optional[int]) Number of Documents to fetch before filtering.
+                      Defaults to 20.
+
+        Returns:
+            List of Documents most similar to the query.
+        """
+        docs_and_scores = self.similarity_search_with_score(
+            query, k=k, filter=filter, fetch_k=fetch_k, **kwargs
+        )
+        return [doc for doc, _ in docs_and_scores]
+
+    def max_marginal_relevance_search_with_score_by_vector(
+        self,
+        embedding: List[float],
+        *,
+        k: int = 4,
+        fetch_k: int = 20,
+        lambda_mult: float = 0.5,
+        filter: Optional[Dict[str, Any]] = None,
+        **kwargs: Any,
+    ) -> List[Tuple[Document, float]]:
+        """Return docs and their similarity scores selected using the maximal marginal
+            relevance.
+
+        Maximal marginal relevance optimizes for similarity to query AND diversity
+        among selected documents.
+
+        Args:
+            embedding: Embedding to look up documents similar to.
+            k: Number of Documents to return. Defaults to 4.
+            fetch_k: Number of Documents to fetch before filtering to
+                     pass to MMR algorithm.
+            lambda_mult: Number between 0 and 1 that determines the degree
+                        of diversity among the results with 0 corresponding
+                        to maximum diversity and 1 to minimum diversity.
+                        Defaults to 0.5.
+        Returns:
+            List of Documents and similarity scores selected by maximal marginal
+                relevance and score for each.
+        """
+        if "score_threshold" in kwargs:
+            score_threshold = kwargs.pop("score_threshold")
+        else:
+            score_threshold = MAX_FLOAT
+        scores, indices = self.vector_index.query(
+            np.array([np.array(embedding).astype(np.float32)]).astype(np.float32),
+            k=fetch_k if filter is None else fetch_k * 2,
+            **kwargs,
+        )
+        results = self.process_index_results(
+            ids=indices[0],
+            scores=scores[0],
+            filter=filter,
+            k=fetch_k if filter is None else fetch_k * 2,
+            score_threshold=score_threshold,
+        )
+        embeddings = [
+            self.embedding.embed_documents([doc.page_content])[0] for doc, _ in results
+        ]
+        mmr_selected = maximal_marginal_relevance(
+            np.array([embedding], dtype=np.float32),
+            embeddings,
+            k=k,
+            lambda_mult=lambda_mult,
+        )
+        docs_and_scores = []
+        for i in mmr_selected:
+            docs_and_scores.append(results[i])
+        return docs_and_scores
+
+    def max_marginal_relevance_search_by_vector(
+        self,
+        embedding: List[float],
+        k: int = 4,
+        fetch_k: int = 20,
+        lambda_mult: float = 0.5,
+        filter: Optional[Dict[str, Any]] = None,
+        **kwargs: Any,
+    ) -> List[Document]:
+        """Return docs selected using the maximal marginal relevance.
+
+        Maximal marginal relevance optimizes for similarity to query AND diversity
+        among selected documents.
+
+        Args:
+            embedding: Embedding to look up documents similar to.
+            k: Number of Documents to return. Defaults to 4.
+            fetch_k: Number of Documents to fetch before filtering to
+                     pass to MMR algorithm.
+            lambda_mult: Number between 0 and 1 that determines the degree
+                        of diversity among the results with 0 corresponding
+                        to maximum diversity and 1 to minimum diversity.
+                        Defaults to 0.5.
+        Returns:
+            List of Documents selected by maximal marginal relevance.
+        """
+        docs_and_scores = self.max_marginal_relevance_search_with_score_by_vector(
+            embedding,
+            k=k,
+            fetch_k=fetch_k,
+            lambda_mult=lambda_mult,
+            filter=filter,
+            **kwargs,
+        )
+        return [doc for doc, _ in docs_and_scores]
+
+    def max_marginal_relevance_search(
+        self,
+        query: str,
+        k: int = 4,
+        fetch_k: int = 20,
+        lambda_mult: float = 0.5,
+        filter: Optional[Dict[str, Any]] = None,
+        **kwargs: Any,
+    ) -> List[Document]:
+        """Return docs selected using the maximal marginal relevance.
+
+        Maximal marginal relevance optimizes for similarity to query AND diversity
+        among selected documents.
+
+        Args:
+            query: Text to look up documents similar to.
+            k: Number of Documents to return. Defaults to 4.
+            fetch_k: Number of Documents to fetch before filtering (if needed) to
+                     pass to MMR algorithm.
+            lambda_mult: Number between 0 and 1 that determines the degree
+                        of diversity among the results with 0 corresponding
+                        to maximum diversity and 1 to minimum diversity.
+                        Defaults to 0.5.
+        Returns:
+            List of Documents selected by maximal marginal relevance.
+        """
+        embedding = self.embedding_function(query)
+        docs = self.max_marginal_relevance_search_by_vector(
+            embedding,
+            k=k,
+            fetch_k=fetch_k,
+            lambda_mult=lambda_mult,
+            filter=filter,
+            **kwargs,
+        )
+        return docs
+
+    @classmethod
+    def create(
+        cls,
+        index_uri: str,
+        index_type: str,
+        dimensions: int,
+        vector_type: np.dtype,
+        *,
+        metadatas: bool = True,
+        config: Optional[Mapping[str, Any]] = None,
+    ) -> None:
+        tiledb_vs, tiledb = dependable_tiledb_import()
+        with tiledb.scope_ctx(ctx_or_config=config):
+            try:
+                tiledb.group_create(index_uri)
+            except tiledb.TileDBError as err:
+                raise err
+            group = tiledb.Group(index_uri, "w")
+            vector_index_uri = get_vector_index_uri(group.uri)
+            docs_uri = get_documents_array_uri(group.uri)
+            if index_type == "FLAT":
+                tiledb_vs.flat_index.create(
+                    uri=vector_index_uri,
+                    dimensions=dimensions,
+                    vector_type=vector_type,
+                    config=config,
+                )
+            elif index_type == "IVF_FLAT":
+                tiledb_vs.ivf_flat_index.create(
+                    uri=vector_index_uri,
+                    dimensions=dimensions,
+                    vector_type=vector_type,
+                    config=config,
+                )
+            group.add(vector_index_uri, name=VECTOR_INDEX_NAME)
+
+            # Create TileDB array to store Documents
+            # TODO add a Document store API to tiledb-vector-search to allow storing
+            #  different types of objects and metadata in a more generic way.
+            dim = tiledb.Dim(
+                name="id",
+                domain=(0, MAX_UINT64 - 1),
+                dtype=np.dtype(np.uint64),
+            )
+            dom = tiledb.Domain(dim)
+
+            text_attr = tiledb.Attr(name="text", dtype=np.dtype("U1"), var=True)
+            attrs = [text_attr]
+            if metadatas:
+                metadata_attr = tiledb.Attr(name="metadata", dtype=np.uint8, var=True)
+                attrs.append(metadata_attr)
+            schema = tiledb.ArraySchema(
+                domain=dom,
+                sparse=True,
+                allows_duplicates=False,
+                attrs=attrs,
+            )
+            tiledb.Array.create(docs_uri, schema)
+            group.add(docs_uri, name=DOCUMENTS_ARRAY_NAME)
+            group.close()
+
+    @classmethod
+    def __from(
+        cls,
+        texts: List[str],
+        embeddings: List[List[float]],
+        embedding: Embeddings,
+        index_uri: str,
+        *,
+        metadatas: Optional[List[dict]] = None,
+        ids: Optional[List[str]] = None,
+        metric: str = DEFAULT_METRIC,
+        index_type: str = "FLAT",
+        config: Optional[Mapping[str, Any]] = None,
+        index_timestamp: int = 0,
+        **kwargs: Any,
+    ) -> TileDB:
+        if metric not in INDEX_METRICS:
+            raise ValueError(
+                (
+                    f"Unsupported distance metric: {metric}. "
+                    f"Expected one of {list(INDEX_METRICS)}"
+                )
+            )
+        tiledb_vs, tiledb = dependable_tiledb_import()
+        input_vectors = np.array(embeddings).astype(np.float32)
+        cls.create(
+            index_uri=index_uri,
+            index_type=index_type,
+            dimensions=input_vectors.shape[1],
+            vector_type=input_vectors.dtype,
+            metadatas=metadatas is not None,
+            config=config,
+        )
+        with tiledb.scope_ctx(ctx_or_config=config):
+            if not embeddings:
+                raise ValueError("embeddings must be provided to build a TileDB index")
+
+            vector_index_uri = get_vector_index_uri(index_uri)
+            docs_uri = get_documents_array_uri(index_uri)
+            if ids is None:
+                ids = [str(random.randint(0, MAX_UINT64 - 1)) for _ in texts]
+            external_ids = np.array(ids).astype(np.uint64)
+
+            tiledb_vs.ingestion.ingest(
+                index_type=index_type,
+                index_uri=vector_index_uri,
+                input_vectors=input_vectors,
+                external_ids=external_ids,
+                index_timestamp=index_timestamp if index_timestamp != 0 else None,
+                config=config,
+                **kwargs,
+            )
+            with tiledb.open(docs_uri, "w") as A:
+                if external_ids is None:
+                    external_ids = np.zeros(len(texts), dtype=np.uint64)
+                    for i in range(len(texts)):
+                        external_ids[i] = i
+                data = {}
+                data["text"] = np.array(texts)
+                if metadatas is not None:
+                    metadata_attr = np.empty([len(metadatas)], dtype=object)
+                    i = 0
+                    for metadata in metadatas:
+                        metadata_attr[i] = np.frombuffer(
+                            pickle.dumps(metadata), dtype=np.uint8
+                        )
+                        i += 1
+                    data["metadata"] = metadata_attr
+
+                A[external_ids] = data
+        return cls(
+            embedding=embedding,
+            index_uri=index_uri,
+            metric=metric,
+            config=config,
+            **kwargs,
+        )
+
+    def delete(
+        self, ids: Optional[List[str]] = None, timestamp: int = 0, **kwargs: Any
+    ) -> Optional[bool]:
+        """Delete by vector ID or other criteria.
+
+        Args:
+            ids: List of ids to delete.
+            timestamp: Optional timestamp to delete with.
+            **kwargs: Other keyword arguments that subclasses might use.
+
+        Returns:
+            Optional[bool]: True if deletion is successful,
+            False otherwise, None if not implemented.
+        """
+
+        external_ids = np.array(ids).astype(np.uint64)
+        self.vector_index.delete_batch(
+            external_ids=external_ids, timestamp=timestamp if timestamp != 0 else None
+        )
+        return True
+
+    def add_texts(
+        self,
+        texts: Iterable[str],
+        metadatas: Optional[List[dict]] = None,
+        ids: Optional[List[str]] = None,
+        timestamp: int = 0,
+        **kwargs: Any,
+    ) -> List[str]:
+        """Run more texts through the embeddings and add to the vectorstore.
+
+        Args:
+            texts: Iterable of strings to add to the vectorstore.
+            metadatas: Optional list of metadatas associated with the texts.
+            ids: Optional ids of each text object.
+            timestamp: Optional timestamp to write new texts with.
+            kwargs: vectorstore specific parameters
+
+        Returns:
+            List of ids from adding the texts into the vectorstore.
+        """
+        tiledb_vs, tiledb = dependable_tiledb_import()
+        embeddings = self.embedding.embed_documents(list(texts))
+        if ids is None:
+            ids = [str(random.randint(0, MAX_UINT64 - 1)) for _ in texts]
+
+        external_ids = np.array(ids).astype(np.uint64)
+        vectors = np.empty((len(embeddings)), dtype="O")
+        for i in range(len(embeddings)):
+            vectors[i] = np.array(embeddings[i], dtype=np.float32)
+        self.vector_index.update_batch(
+            vectors=vectors,
+            external_ids=external_ids,
+            timestamp=timestamp if timestamp != 0 else None,
+        )
+
+        docs = {}
+        docs["text"] = np.array(texts)
+        if metadatas is not None:
+            metadata_attr = np.empty([len(metadatas)], dtype=object)
+            i = 0
+            for metadata in metadatas:
+                metadata_attr[i] = np.frombuffer(pickle.dumps(metadata), dtype=np.uint8)
+                i += 1
+            docs["metadata"] = metadata_attr
+
+        docs_array = tiledb.open(
+            self.docs_array_uri,
+            "w",
+            timestamp=timestamp if timestamp != 0 else None,
+            config=self.config,
+        )
+        docs_array[external_ids] = docs
+        docs_array.close()
+        return ids
+
+    @classmethod
+    def from_texts(
+        cls,
+        texts: List[str],
+        embedding: Embeddings,
+        metadatas: Optional[List[dict]] = None,
+        ids: Optional[List[str]] = None,
+        metric: str = DEFAULT_METRIC,
+        index_uri: str = "/tmp/tiledb_array",
+        index_type: str = "FLAT",
+        config: Optional[Mapping[str, Any]] = None,
+        index_timestamp: int = 0,
+        **kwargs: Any,
+    ) -> TileDB:
+        """Construct a TileDB index from raw documents.
+
+        Args:
+            texts: List of documents to index.
+            embedding: Embedding function to use.
+            metadatas: List of metadata dictionaries to associate with documents.
+            ids: Optional ids of each text object.
+            metric: Metric to use for indexing. Defaults to "euclidean".
+            index_uri: The URI to write the TileDB arrays
+            index_type: Optional,  Vector index type ("FLAT", IVF_FLAT")
+            config: Optional, TileDB config
+            index_timestamp: Optional, timestamp to write new texts with.
+
+        Example:
+            .. code-block:: python
+
+                from langchain import TileDB
+                from langchain.embeddings import OpenAIEmbeddings
+                embeddings = OpenAIEmbeddings()
+                index = TileDB.from_texts(texts, embeddings)
+        """
+        embeddings = []
+        embeddings = embedding.embed_documents(texts)
+        return cls.__from(
+            texts=texts,
+            embeddings=embeddings,
+            embedding=embedding,
+            metadatas=metadatas,
+            ids=ids,
+            metric=metric,
+            index_uri=index_uri,
+            index_type=index_type,
+            config=config,
+            index_timestamp=index_timestamp,
+            **kwargs,
+        )
+
+    @classmethod
+    def from_embeddings(
+        cls,
+        text_embeddings: List[Tuple[str, List[float]]],
+        embedding: Embeddings,
+        index_uri: str,
+        *,
+        metadatas: Optional[List[dict]] = None,
+        ids: Optional[List[str]] = None,
+        metric: str = DEFAULT_METRIC,
+        index_type: str = "FLAT",
+        config: Optional[Mapping[str, Any]] = None,
+        index_timestamp: int = 0,
+        **kwargs: Any,
+    ) -> TileDB:
+        """Construct TileDB index from embeddings.
+
+        Args:
+            text_embeddings: List of tuples of (text, embedding)
+            embedding: Embedding function to use.
+            index_uri: The URI to write the TileDB arrays
+            metadatas: List of metadata dictionaries to associate with documents.
+            metric: Optional, Metric to use for indexing. Defaults to "euclidean".
+            index_type: Optional, Vector index type ("FLAT", IVF_FLAT")
+            config: Optional, TileDB config
+            index_timestamp: Optional, timestamp to write new texts with.
+
+        Example:
+            .. code-block:: python
+
+                from langchain import TileDB
+                from langchain.embeddings import OpenAIEmbeddings
+                embeddings = OpenAIEmbeddings()
+                text_embeddings = embeddings.embed_documents(texts)
+                text_embedding_pairs = list(zip(texts, text_embeddings))
+                db = TileDB.from_embeddings(text_embedding_pairs, embeddings)
+        """
+        texts = [t[0] for t in text_embeddings]
+        embeddings = [t[1] for t in text_embeddings]
+
+        return cls.__from(
+            texts=texts,
+            embeddings=embeddings,
+            embedding=embedding,
+            metadatas=metadatas,
+            ids=ids,
+            metric=metric,
+            index_uri=index_uri,
+            index_type=index_type,
+            config=config,
+            index_timestamp=index_timestamp,
+            **kwargs,
+        )
+
+    @classmethod
+    def load(
+        cls,
+        index_uri: str,
+        embedding: Embeddings,
+        *,
+        metric: str = DEFAULT_METRIC,
+        config: Optional[Mapping[str, Any]] = None,
+        timestamp: Any = None,
+        **kwargs: Any,
+    ) -> TileDB:
+        """Load a TileDB index from a URI.
+
+        Args:
+            index_uri: The URI of the TileDB vector index.
+            embedding: Embeddings to use when generating queries.
+            metric: Optional, Metric to use for indexing. Defaults to "euclidean".
+            config: Optional, TileDB config
+            timestamp: Optional, timestamp to use for opening the arrays.
+        """
+        return cls(
+            embedding=embedding,
+            index_uri=index_uri,
+            metric=metric,
+            config=config,
+            timestamp=timestamp,
+            **kwargs,
+        )
+
+    def consolidate_updates(self, **kwargs: Any) -> None:
+        self.vector_index = self.vector_index.consolidate_updates(**kwargs)
--- a/libs/langchain/tests/integration_tests/vectorstores/test_tiledb.py
+++ b/libs/langchain/tests/integration_tests/vectorstores/test_tiledb.py
@ -0,0 +1,358 @@
+from pathlib import Path
+
+import numpy as np
+import pytest
+
+from langchain.docstore.document import Document
+from langchain.vectorstores.tiledb import TileDB
+from tests.integration_tests.vectorstores.fake_embeddings import (
+    ConsistentFakeEmbeddings,
+    FakeEmbeddings,
+)
+
+
+@pytest.mark.requires("tiledb-vector-search")
+def test_tiledb(tmp_path: Path) -> None:
+    """Test end to end construction and search."""
+    texts = ["foo", "bar", "baz"]
+    docsearch = TileDB.from_texts(
+        texts=texts,
+        embedding=ConsistentFakeEmbeddings(),
+        index_uri=f"{str(tmp_path)}/flat",
+        index_type="FLAT",
+    )
+    output = docsearch.similarity_search("foo", k=1)
+    assert output == [Document(page_content="foo")]
+
+    docsearch = TileDB.from_texts(
+        texts=texts,
+        embedding=ConsistentFakeEmbeddings(),
+        index_uri=f"{str(tmp_path)}/ivf_flat",
+        index_type="IVF_FLAT",
+    )
+    output = docsearch.similarity_search(
+        "foo", k=1, nprobe=docsearch.vector_index.partitions
+    )
+    assert output == [Document(page_content="foo")]
+
+
+@pytest.mark.requires("tiledb-vector-search")
+def test_tiledb_vector_sim(tmp_path: Path) -> None:
+    """Test vector similarity."""
+    texts = ["foo", "bar", "baz"]
+    docsearch = TileDB.from_texts(
+        texts=texts,
+        embedding=ConsistentFakeEmbeddings(),
+        index_uri=f"{str(tmp_path)}/flat",
+        index_type="FLAT",
+    )
+    query_vec = FakeEmbeddings().embed_query(text="foo")
+    output = docsearch.similarity_search_by_vector(query_vec, k=1)
+    assert output == [Document(page_content="foo")]
+
+    docsearch = TileDB.from_texts(
+        texts=texts,
+        embedding=ConsistentFakeEmbeddings(),
+        index_uri=f"{str(tmp_path)}/ivf_flat",
+        index_type="IVF_FLAT",
+    )
+    query_vec = FakeEmbeddings().embed_query(text="foo")
+    output = docsearch.similarity_search_by_vector(
+        query_vec, k=1, nprobe=docsearch.vector_index.partitions
+    )
+    assert output == [Document(page_content="foo")]
+
+
+@pytest.mark.requires("tiledb-vector-search")
+def test_tiledb_vector_sim_with_score_threshold(tmp_path: Path) -> None:
+    """Test vector similarity."""
+    texts = ["foo", "bar", "baz"]
+    docsearch = TileDB.from_texts(
+        texts=texts,
+        embedding=ConsistentFakeEmbeddings(),
+        index_uri=f"{str(tmp_path)}/flat",
+        index_type="FLAT",
+    )
+    query_vec = FakeEmbeddings().embed_query(text="foo")
+    output = docsearch.similarity_search_by_vector(query_vec, k=2, score_threshold=0.2)
+    assert output == [Document(page_content="foo")]
+
+    docsearch = TileDB.from_texts(
+        texts=texts,
+        embedding=ConsistentFakeEmbeddings(),
+        index_uri=f"{str(tmp_path)}/ivf_flat",
+        index_type="IVF_FLAT",
+    )
+    query_vec = FakeEmbeddings().embed_query(text="foo")
+    output = docsearch.similarity_search_by_vector(
+        query_vec, k=2, score_threshold=0.2, nprobe=docsearch.vector_index.partitions
+    )
+    assert output == [Document(page_content="foo")]
+
+
+@pytest.mark.requires("tiledb-vector-search")
+def test_similarity_search_with_score_by_vector(tmp_path: Path) -> None:
+    """Test vector similarity with score by vector."""
+    texts = ["foo", "bar", "baz"]
+    docsearch = TileDB.from_texts(
+        texts=texts,
+        embedding=ConsistentFakeEmbeddings(),
+        index_uri=f"{str(tmp_path)}/flat",
+        index_type="FLAT",
+    )
+    query_vec = FakeEmbeddings().embed_query(text="foo")
+    output = docsearch.similarity_search_with_score_by_vector(query_vec, k=1)
+    assert len(output) == 1
+    assert output[0][0] == Document(page_content="foo")
+
+    docsearch = TileDB.from_texts(
+        texts=texts,
+        embedding=ConsistentFakeEmbeddings(),
+        index_uri=f"{str(tmp_path)}/ivf_flat",
+        index_type="IVF_FLAT",
+    )
+    query_vec = FakeEmbeddings().embed_query(text="foo")
+    output = docsearch.similarity_search_with_score_by_vector(
+        query_vec, k=1, nprobe=docsearch.vector_index.partitions
+    )
+    assert len(output) == 1
+    assert output[0][0] == Document(page_content="foo")
+
+
+@pytest.mark.requires("tiledb-vector-search")
+def test_similarity_search_with_score_by_vector_with_score_threshold(
+    tmp_path: Path,
+) -> None:
+    """Test vector similarity with score by vector."""
+    texts = ["foo", "bar", "baz"]
+    docsearch = TileDB.from_texts(
+        texts=texts,
+        embedding=ConsistentFakeEmbeddings(),
+        index_uri=f"{str(tmp_path)}/flat",
+        index_type="FLAT",
+    )
+    query_vec = FakeEmbeddings().embed_query(text="foo")
+    output = docsearch.similarity_search_with_score_by_vector(
+        query_vec,
+        k=2,
+        score_threshold=0.2,
+    )
+    assert len(output) == 1
+    assert output[0][0] == Document(page_content="foo")
+    assert output[0][1] < 0.2
+
+    docsearch = TileDB.from_texts(
+        texts=texts,
+        embedding=ConsistentFakeEmbeddings(),
+        index_uri=f"{str(tmp_path)}/ivf_flat",
+        index_type="IVF_FLAT",
+    )
+    query_vec = FakeEmbeddings().embed_query(text="foo")
+    output = docsearch.similarity_search_with_score_by_vector(
+        query_vec, k=2, score_threshold=0.2, nprobe=docsearch.vector_index.partitions
+    )
+    assert len(output) == 1
+    assert output[0][0] == Document(page_content="foo")
+    assert output[0][1] < 0.2
+
+
+@pytest.mark.requires("tiledb-vector-search")
+def test_tiledb_mmr(tmp_path: Path) -> None:
+    texts = ["foo", "foo", "fou", "foy"]
+    docsearch = TileDB.from_texts(
+        texts=texts,
+        embedding=ConsistentFakeEmbeddings(),
+        index_uri=f"{str(tmp_path)}/flat",
+        index_type="FLAT",
+    )
+    query_vec = ConsistentFakeEmbeddings().embed_query(text="foo")
+    output = docsearch.max_marginal_relevance_search_with_score_by_vector(
+        query_vec, k=3, lambda_mult=0.1
+    )
+    assert output[0][0] == Document(page_content="foo")
+    assert output[0][1] == 0.0
+    assert output[1][0] != Document(page_content="foo")
+    assert output[2][0] != Document(page_content="foo")
+
+    docsearch = TileDB.from_texts(
+        texts=texts,
+        embedding=ConsistentFakeEmbeddings(),
+        index_uri=f"{str(tmp_path)}/ivf_flat",
+        index_type="IVF_FLAT",
+    )
+    query_vec = ConsistentFakeEmbeddings().embed_query(text="foo")
+    output = docsearch.max_marginal_relevance_search_with_score_by_vector(
+        query_vec, k=3, lambda_mult=0.1, nprobe=docsearch.vector_index.partitions
+    )
+    assert output[0][0] == Document(page_content="foo")
+    assert output[0][1] == 0.0
+    assert output[1][0] != Document(page_content="foo")
+    assert output[2][0] != Document(page_content="foo")
+
+
+@pytest.mark.requires("tiledb-vector-search")
+def test_tiledb_mmr_with_metadatas_and_filter(tmp_path: Path) -> None:
+    texts = ["foo", "foo", "fou", "foy"]
+    metadatas = [{"page": i} for i in range(len(texts))]
+    docsearch = TileDB.from_texts(
+        texts=texts,
+        metadatas=metadatas,
+        embedding=ConsistentFakeEmbeddings(),
+        index_uri=f"{str(tmp_path)}/flat",
+        index_type="FLAT",
+    )
+    query_vec = ConsistentFakeEmbeddings().embed_query(text="foo")
+    output = docsearch.max_marginal_relevance_search_with_score_by_vector(
+        query_vec, k=3, lambda_mult=0.1, filter={"page": 1}
+    )
+    assert len(output) == 1
+    assert output[0][0] == Document(page_content="foo", metadata={"page": 1})
+    assert output[0][1] == 0.0
+
+    docsearch = TileDB.from_texts(
+        texts=texts,
+        metadatas=metadatas,
+        embedding=ConsistentFakeEmbeddings(),
+        index_uri=f"{str(tmp_path)}/ivf_flat",
+        index_type="IVF_FLAT",
+    )
+    query_vec = ConsistentFakeEmbeddings().embed_query(text="foo")
+    output = docsearch.max_marginal_relevance_search_with_score_by_vector(
+        query_vec,
+        k=3,
+        lambda_mult=0.1,
+        filter={"page": 1},
+        nprobe=docsearch.vector_index.partitions,
+    )
+    assert len(output) == 1
+    assert output[0][0] == Document(page_content="foo", metadata={"page": 1})
+    assert output[0][1] == 0.0
+
+
+@pytest.mark.requires("tiledb-vector-search")
+def test_tiledb_mmr_with_metadatas_and_list_filter(tmp_path: Path) -> None:
+    texts = ["foo", "fou", "foy", "foo"]
+    metadatas = [{"page": i} for i in range(len(texts))]
+    docsearch = TileDB.from_texts(
+        texts=texts,
+        metadatas=metadatas,
+        embedding=ConsistentFakeEmbeddings(),
+        index_uri=f"{str(tmp_path)}/flat",
+        index_type="FLAT",
+    )
+    query_vec = ConsistentFakeEmbeddings().embed_query(text="foo")
+    output = docsearch.max_marginal_relevance_search_with_score_by_vector(
+        query_vec, k=3, lambda_mult=0.1, filter={"page": [0, 1, 2]}
+    )
+    assert len(output) == 3
+    assert output[0][0] == Document(page_content="foo", metadata={"page": 0})
+    assert output[0][1] == 0.0
+    assert output[1][0] != Document(page_content="foo", metadata={"page": 0})
+    assert output[2][0] != Document(page_content="foo", metadata={"page": 0})
+
+    docsearch = TileDB.from_texts(
+        texts=texts,
+        metadatas=metadatas,
+        embedding=ConsistentFakeEmbeddings(),
+        index_uri=f"{str(tmp_path)}/ivf_flat",
+        index_type="IVF_FLAT",
+    )
+    query_vec = ConsistentFakeEmbeddings().embed_query(text="foo")
+    output = docsearch.max_marginal_relevance_search_with_score_by_vector(
+        query_vec,
+        k=3,
+        lambda_mult=0.1,
+        filter={"page": [0, 1, 2]},
+        nprobe=docsearch.vector_index.partitions,
+    )
+    assert len(output) == 3
+    assert output[0][0] == Document(page_content="foo", metadata={"page": 0})
+    assert output[0][1] == 0.0
+    assert output[1][0] != Document(page_content="foo", metadata={"page": 0})
+    assert output[2][0] != Document(page_content="foo", metadata={"page": 0})
+
+
+@pytest.mark.requires("tiledb-vector-search")
+def test_tiledb_flat_updates(tmp_path: Path) -> None:
+    """Test end to end construction and search."""
+    dimensions = 10
+    index_uri = str(tmp_path)
+    embedding = ConsistentFakeEmbeddings(dimensionality=dimensions)
+    TileDB.create(
+        index_uri=index_uri,
+        index_type="FLAT",
+        dimensions=dimensions,
+        vector_type=np.dtype("float32"),
+        metadatas=False,
+    )
+    docsearch = TileDB.load(
+        index_uri=index_uri,
+        embedding=embedding,
+    )
+    output = docsearch.similarity_search("foo", k=2)
+    assert output == []
+
+    docsearch.add_texts(texts=["foo", "bar", "baz"], ids=["1", "2", "3"])
+    output = docsearch.similarity_search("foo", k=1)
+    assert output == [Document(page_content="foo")]
+
+    docsearch.delete(["1", "3"])
+    output = docsearch.similarity_search("foo", k=1)
+    assert output == [Document(page_content="bar")]
+    output = docsearch.similarity_search("baz", k=1)
+    assert output == [Document(page_content="bar")]
+
+    docsearch.add_texts(texts=["fooo", "bazz"], ids=["4", "5"])
+    output = docsearch.similarity_search("fooo", k=1)
+    assert output == [Document(page_content="fooo")]
+    output = docsearch.similarity_search("bazz", k=1)
+    assert output == [Document(page_content="bazz")]
+
+    docsearch.consolidate_updates()
+    output = docsearch.similarity_search("fooo", k=1)
+    assert output == [Document(page_content="fooo")]
+    output = docsearch.similarity_search("bazz", k=1)
+    assert output == [Document(page_content="bazz")]
+
+
+@pytest.mark.requires("tiledb-vector-search")
+def test_tiledb_ivf_flat_updates(tmp_path: Path) -> None:
+    """Test end to end construction and search."""
+    dimensions = 10
+    index_uri = str(tmp_path)
+    embedding = ConsistentFakeEmbeddings(dimensionality=dimensions)
+    TileDB.create(
+        index_uri=index_uri,
+        index_type="IVF_FLAT",
+        dimensions=dimensions,
+        vector_type=np.dtype("float32"),
+        metadatas=False,
+    )
+    docsearch = TileDB.load(
+        index_uri=index_uri,
+        embedding=embedding,
+    )
+    output = docsearch.similarity_search("foo", k=2)
+    assert output == []
+
+    docsearch.add_texts(texts=["foo", "bar", "baz"], ids=["1", "2", "3"])
+    output = docsearch.similarity_search("foo", k=1)
+    assert output == [Document(page_content="foo")]
+
+    docsearch.delete(["1", "3"])
+    output = docsearch.similarity_search("foo", k=1)
+    assert output == [Document(page_content="bar")]
+    output = docsearch.similarity_search("baz", k=1)
+    assert output == [Document(page_content="bar")]
+
+    docsearch.add_texts(texts=["fooo", "bazz"], ids=["4", "5"])
+    output = docsearch.similarity_search("fooo", k=1)
+    assert output == [Document(page_content="fooo")]
+    output = docsearch.similarity_search("bazz", k=1)
+    assert output == [Document(page_content="bazz")]
+
+    docsearch.consolidate_updates()
+    output = docsearch.similarity_search("fooo", k=1)
+    assert output == [Document(page_content="fooo")]
+    output = docsearch.similarity_search("bazz", k=1)
+    assert output == [Document(page_content="bazz")]
--- a/libs/langchain/tests/unit_tests/indexes/test_indexing.py
+++ b/libs/langchain/tests/unit_tests/indexes/test_indexing.py
@ -1145,6 +1145,7 @@ def test_compatible_vectorstore_documentation() -> None:
        "ScaNN",
        "SemaDB",
        "SupabaseVectorStore",
+        "TileDB",
        "TimescaleVector",
        "Vald",
        "Vearch",
--- a/libs/langchain/tests/unit_tests/vectorstores/test_public_api.py
+++ b/libs/langchain/tests/unit_tests/vectorstores/test_public_api.py
@ -52,6 +52,7 @@ _EXPECTED = [
    "StarRocks",
    "SupabaseVectorStore",
    "Tair",
+    "TileDB",
    "Tigris",
    "TimescaleVector",
    "Typesense",