Fix SupbaseVectoreStore write operation timeout (#12318)

**Description**
This small change will make chunk_size a configurable parameter for
loading documents into a Supabase database.

**Issue**
https://github.com/langchain-ai/langchain/issues/11422

**Dependencies**
No chanages

**Twitter**
@ j1philli

**Reminder**
If no one reviews your PR within a few days, please @-mention one of
@baskaryan, @eyurtsev, @hwchase17.

---------

Co-authored-by: Greg Richardson <greg.nmr@gmail.com>
pull/12376/head
Josh Phillips 9 months ago committed by GitHub
parent b10cefb160
commit 01c5cd365b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -197,7 +197,7 @@
"id": "5abb9b93",
"metadata": {},
"source": [
"Insert the above documents into the database. Embeddings will automatically be generated for each document."
"Insert the above documents into the database. Embeddings will automatically be generated for each document. You can adjust the chunk_size based on the amount of documents you have. The default is 500 but lowering it may be necessary."
]
},
{
@ -208,7 +208,7 @@
"outputs": [],
"source": [
"\n",
"vector_store = SupabaseVectorStore.from_documents(docs, embeddings, client=supabase, table_name=\"documents\", query_name=\"match_documents\")"
"vector_store = SupabaseVectorStore.from_documents(docs, embeddings, client=supabase, table_name=\"documents\", query_name=\"match_documents\", chunk_size=500)"
]
},
{

@ -61,6 +61,7 @@ class SupabaseVectorStore(VectorStore):
client=supabase_client,
table_name="documents",
query_name="match_documents",
chunk_size=500,
)
To load from an existing table:
@ -88,6 +89,7 @@ class SupabaseVectorStore(VectorStore):
client: supabase.client.Client,
embedding: Embeddings,
table_name: str,
chunk_size: int = 500,
query_name: Union[str, None] = None,
) -> None:
"""Initialize with supabase client."""
@ -103,6 +105,9 @@ class SupabaseVectorStore(VectorStore):
self._embedding: Embeddings = embedding
self.table_name = table_name or "documents"
self.query_name = query_name or "match_documents"
self.chunk_size = chunk_size or 500
# According to the SupabaseVectorStore JS implementation, the best chunk size
# is 500. Though for large datasets it can be too large so it is configurable.
@property
def embeddings(self) -> Embeddings:
@ -130,6 +135,7 @@ class SupabaseVectorStore(VectorStore):
client: Optional[supabase.client.Client] = None,
table_name: Optional[str] = "documents",
query_name: Union[str, None] = "match_documents",
chunk_size: int = 500,
ids: Optional[List[str]] = None,
**kwargs: Any,
) -> "SupabaseVectorStore":
@ -144,13 +150,14 @@ class SupabaseVectorStore(VectorStore):
embeddings = embedding.embed_documents(texts)
ids = [str(uuid.uuid4()) for _ in texts]
docs = cls._texts_to_documents(texts, metadatas)
cls._add_vectors(client, table_name, embeddings, docs, ids)
cls._add_vectors(client, table_name, embeddings, docs, ids, chunk_size)
return cls(
client=client,
embedding=embedding,
table_name=table_name,
query_name=query_name,
chunk_size=chunk_size,
)
def add_vectors(
@ -159,7 +166,9 @@ class SupabaseVectorStore(VectorStore):
documents: List[Document],
ids: List[str],
) -> List[str]:
return self._add_vectors(self._client, self.table_name, vectors, documents, ids)
return self._add_vectors(
self._client, self.table_name, vectors, documents, ids, self.chunk_size
)
def similarity_search(
self,
@ -300,6 +309,7 @@ class SupabaseVectorStore(VectorStore):
vectors: List[List[float]],
documents: List[Document],
ids: List[str],
chunk_size: int,
) -> List[str]:
"""Add vectors to Supabase table."""
@ -313,9 +323,6 @@ class SupabaseVectorStore(VectorStore):
for idx, embedding in enumerate(vectors)
]
# According to the SupabaseVectorStore JS implementation, the best chunk size
# is 500
chunk_size = 500
id_list: List[str] = []
for i in range(0, len(rows), chunk_size):
chunk = rows[i : i + chunk_size]

@ -3,7 +3,7 @@
import sys
def main():
def main() -> int:
print("Hello World!")
return 0

Loading…
Cancel
Save