From 3874bb256e09d377032ae54b1592ca3dd7cf9e4d Mon Sep 17 00:00:00 2001 From: Ben Perry Date: Thu, 13 Jul 2023 19:57:58 -0500 Subject: [PATCH] Weaviate: Batch embed texts (#5903) When a custom Embeddings object is set, embed all given texts in a batch instead of passing them through individually. Any code calling add_texts can then appropriately size the chunks of texts that are passed through to take full advantage of the hardware it's running on. --- langchain/vectorstores/weaviate.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/langchain/vectorstores/weaviate.py b/langchain/vectorstores/weaviate.py index 16d5bc1573..5bee40b4e6 100644 --- a/langchain/vectorstores/weaviate.py +++ b/langchain/vectorstores/weaviate.py @@ -135,6 +135,12 @@ class Weaviate(VectorStore): from weaviate.util import get_valid_uuid ids = [] + embeddings: Optional[List[List[float]]] = None + if self._embedding: + if not isinstance(texts, list): + texts = list(texts) + embeddings = self._embedding.embed_documents(texts) + with self._client.batch as batch: for i, text in enumerate(texts): data_properties = {self._text_key: text} @@ -152,15 +158,11 @@ class Weaviate(VectorStore): elif "ids" in kwargs: _id = kwargs["ids"][i] - if self._embedding is not None: - vector = self._embedding.embed_documents([text])[0] - else: - vector = None batch.add_data_object( data_object=data_properties, class_name=self._index_name, uuid=_id, - vector=vector, + vector=embeddings[i] if embeddings else None, ) ids.append(_id) return ids