From 01c5cd365b46b768de6bddbacc3afbf5aaab2294 Mon Sep 17 00:00:00 2001
From: Josh Phillips <jphilli1996@gmail.com>
Date: Thu, 26 Oct 2023 15:19:17 -0600
Subject: [PATCH] Fix SupbaseVectoreStore write operation timeout (#12318)

**Description**
This small change will make chunk_size a configurable parameter for
loading documents into a Supabase database.

**Issue**
https://github.com/langchain-ai/langchain/issues/11422

**Dependencies**
No chanages

**Twitter**
@ j1philli

**Reminder**
If no one reviews your PR within a few days, please @-mention one of
@baskaryan, @eyurtsev, @hwchase17.

---------

Co-authored-by: Greg Richardson <greg.nmr@gmail.com>
---
 .../integrations/vectorstores/supabase.ipynb    |  4 ++--
 .../langchain/vectorstores/supabase.py          | 17 ++++++++++++-----
 .../integration_tests/examples/hello_world.py   |  2 +-
 3 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/docs/docs/integrations/vectorstores/supabase.ipynb b/docs/docs/integrations/vectorstores/supabase.ipynb
index 24008d9a71..033a5d381f 100644
--- a/docs/docs/integrations/vectorstores/supabase.ipynb
+++ b/docs/docs/integrations/vectorstores/supabase.ipynb
@@ -197,7 +197,7 @@
    "id": "5abb9b93",
    "metadata": {},
    "source": [
-    "Insert the above documents into the database. Embeddings will automatically be generated for each document."
+    "Insert the above documents into the database. Embeddings will automatically be generated for each document. You can adjust the chunk_size based on the amount of documents you have. The default is 500 but lowering it may be necessary."
    ]
   },
   {
@@ -208,7 +208,7 @@
    "outputs": [],
    "source": [
     "\n",
-    "vector_store = SupabaseVectorStore.from_documents(docs, embeddings, client=supabase, table_name=\"documents\", query_name=\"match_documents\")"
+    "vector_store = SupabaseVectorStore.from_documents(docs, embeddings, client=supabase, table_name=\"documents\", query_name=\"match_documents\", chunk_size=500)"
    ]
   },
   {
diff --git a/libs/langchain/langchain/vectorstores/supabase.py b/libs/langchain/langchain/vectorstores/supabase.py
index 2cb04385ae..f5dbdad9af 100644
--- a/libs/langchain/langchain/vectorstores/supabase.py
+++ b/libs/langchain/langchain/vectorstores/supabase.py
@@ -61,6 +61,7 @@ class SupabaseVectorStore(VectorStore):
             client=supabase_client,
             table_name="documents",
             query_name="match_documents",
+            chunk_size=500,
         )
 
     To load from an existing table:
@@ -88,6 +89,7 @@ class SupabaseVectorStore(VectorStore):
         client: supabase.client.Client,
         embedding: Embeddings,
         table_name: str,
+        chunk_size: int = 500,
         query_name: Union[str, None] = None,
     ) -> None:
         """Initialize with supabase client."""
@@ -103,6 +105,9 @@ class SupabaseVectorStore(VectorStore):
         self._embedding: Embeddings = embedding
         self.table_name = table_name or "documents"
         self.query_name = query_name or "match_documents"
+        self.chunk_size = chunk_size or 500
+        # According to the SupabaseVectorStore JS implementation, the best chunk size
+        # is 500. Though for large datasets it can be too large so it is configurable.
 
     @property
     def embeddings(self) -> Embeddings:
@@ -130,6 +135,7 @@ class SupabaseVectorStore(VectorStore):
         client: Optional[supabase.client.Client] = None,
         table_name: Optional[str] = "documents",
         query_name: Union[str, None] = "match_documents",
+        chunk_size: int = 500,
         ids: Optional[List[str]] = None,
         **kwargs: Any,
     ) -> "SupabaseVectorStore":
@@ -144,13 +150,14 @@ class SupabaseVectorStore(VectorStore):
         embeddings = embedding.embed_documents(texts)
         ids = [str(uuid.uuid4()) for _ in texts]
         docs = cls._texts_to_documents(texts, metadatas)
-        cls._add_vectors(client, table_name, embeddings, docs, ids)
+        cls._add_vectors(client, table_name, embeddings, docs, ids, chunk_size)
 
         return cls(
             client=client,
             embedding=embedding,
             table_name=table_name,
             query_name=query_name,
+            chunk_size=chunk_size,
         )
 
     def add_vectors(
@@ -159,7 +166,9 @@ class SupabaseVectorStore(VectorStore):
         documents: List[Document],
         ids: List[str],
     ) -> List[str]:
-        return self._add_vectors(self._client, self.table_name, vectors, documents, ids)
+        return self._add_vectors(
+            self._client, self.table_name, vectors, documents, ids, self.chunk_size
+        )
 
     def similarity_search(
         self,
@@ -300,6 +309,7 @@ class SupabaseVectorStore(VectorStore):
         vectors: List[List[float]],
         documents: List[Document],
         ids: List[str],
+        chunk_size: int,
     ) -> List[str]:
         """Add vectors to Supabase table."""
 
@@ -313,9 +323,6 @@ class SupabaseVectorStore(VectorStore):
             for idx, embedding in enumerate(vectors)
         ]
 
-        # According to the SupabaseVectorStore JS implementation, the best chunk size
-        # is 500
-        chunk_size = 500
         id_list: List[str] = []
         for i in range(0, len(rows), chunk_size):
             chunk = rows[i : i + chunk_size]
diff --git a/libs/langchain/tests/integration_tests/examples/hello_world.py b/libs/langchain/tests/integration_tests/examples/hello_world.py
index 53040a6c67..3f0294febb 100644
--- a/libs/langchain/tests/integration_tests/examples/hello_world.py
+++ b/libs/langchain/tests/integration_tests/examples/hello_world.py
@@ -3,7 +3,7 @@
 import sys
 
 
-def main():
+def main() -> int:
     print("Hello World!")
 
     return 0