Fix SupbaseVectoreStore write operation timeout (#12318)

**Description** This small change will make chunk_size a configurable parameter for loading documents into a Supabase database. **Issue** https://github.com/langchain-ai/langchain/issues/11422 **Dependencies** No chanages **Twitter** @ j1philli **Reminder** If no one reviews your PR within a few days, please @-mention one of @baskaryan, @eyurtsev, @hwchase17. --------- Co-authored-by: Greg Richardson <greg.nmr@gmail.com>
9 months ago · 01c5cd365b
parent b10cefb160
commit 01c5cd365b
3 changed files with 15 additions and 8 deletions
--- a/docs/docs/integrations/vectorstores/supabase.ipynb
+++ b/docs/docs/integrations/vectorstores/supabase.ipynb
@ -197,7 +197,7 @@
   "id": "5abb9b93",
   "metadata": {},
   "source": [
-    "Insert the above documents into the database. Embeddings will automatically be generated for each document."
+    "Insert the above documents into the database. Embeddings will automatically be generated for each document. You can adjust the chunk_size based on the amount of documents you have. The default is 500 but lowering it may be necessary."
   ]
  },
  {
@ -208,7 +208,7 @@
   "outputs": [],
   "source": [
    "\n",
-    "vector_store = SupabaseVectorStore.from_documents(docs, embeddings, client=supabase, table_name=\"documents\", query_name=\"match_documents\")"
+    "vector_store = SupabaseVectorStore.from_documents(docs, embeddings, client=supabase, table_name=\"documents\", query_name=\"match_documents\", chunk_size=500)"
   ]
  },
  {
--- a/libs/langchain/langchain/vectorstores/supabase.py
+++ b/libs/langchain/langchain/vectorstores/supabase.py
@ -61,6 +61,7 @@ class SupabaseVectorStore(VectorStore):
            client=supabase_client,
            table_name="documents",
            query_name="match_documents",
+            chunk_size=500,
        )

    To load from an existing table:
@ -88,6 +89,7 @@ class SupabaseVectorStore(VectorStore):
        client: supabase.client.Client,
        embedding: Embeddings,
        table_name: str,
+        chunk_size: int = 500,
        query_name: Union[str, None] = None,
    ) -> None:
        """Initialize with supabase client."""
@ -103,6 +105,9 @@ class SupabaseVectorStore(VectorStore):
        self._embedding: Embeddings = embedding
        self.table_name = table_name or "documents"
        self.query_name = query_name or "match_documents"
+        self.chunk_size = chunk_size or 500
+        # According to the SupabaseVectorStore JS implementation, the best chunk size
+        # is 500. Though for large datasets it can be too large so it is configurable.

    @property
    def embeddings(self) -> Embeddings:
@ -130,6 +135,7 @@ class SupabaseVectorStore(VectorStore):
        client: Optional[supabase.client.Client] = None,
        table_name: Optional[str] = "documents",
        query_name: Union[str, None] = "match_documents",
+        chunk_size: int = 500,
        ids: Optional[List[str]] = None,
        **kwargs: Any,
    ) -> "SupabaseVectorStore":
@ -144,13 +150,14 @@ class SupabaseVectorStore(VectorStore):
        embeddings = embedding.embed_documents(texts)
        ids = [str(uuid.uuid4()) for _ in texts]
        docs = cls._texts_to_documents(texts, metadatas)
-        cls._add_vectors(client, table_name, embeddings, docs, ids)
+        cls._add_vectors(client, table_name, embeddings, docs, ids, chunk_size)

        return cls(
            client=client,
            embedding=embedding,
            table_name=table_name,
            query_name=query_name,
+            chunk_size=chunk_size,
        )

    def add_vectors(
@ -159,7 +166,9 @@ class SupabaseVectorStore(VectorStore):
        documents: List[Document],
        ids: List[str],
    ) -> List[str]:
-        return self._add_vectors(self._client, self.table_name, vectors, documents, ids)
+        return self._add_vectors(
+            self._client, self.table_name, vectors, documents, ids, self.chunk_size
+        )

    def similarity_search(
        self,
@ -300,6 +309,7 @@ class SupabaseVectorStore(VectorStore):
        vectors: List[List[float]],
        documents: List[Document],
        ids: List[str],
+        chunk_size: int,
    ) -> List[str]:
        """Add vectors to Supabase table."""

@ -313,9 +323,6 @@ class SupabaseVectorStore(VectorStore):
            for idx, embedding in enumerate(vectors)
        ]

-        # According to the SupabaseVectorStore JS implementation, the best chunk size
-        # is 500
-        chunk_size = 500
        id_list: List[str] = []
        for i in range(0, len(rows), chunk_size):
            chunk = rows[i : i + chunk_size]
--- a/libs/langchain/tests/integration_tests/examples/hello_world.py
+++ b/libs/langchain/tests/integration_tests/examples/hello_world.py
@ -3,7 +3,7 @@
 import sys


-def main():
+def main() -> int:
    print("Hello World!")

    return 0