Weaviate: Batch embed texts (#5903)

When a custom Embeddings object is set, embed all given texts in a batch instead of passing them through individually. Any code calling add_texts can then appropriately size the chunks of texts that are passed through to take full advantage of the hardware it's running on.
2024-11-08 07:10:35 +00:00 · 2023-07-13 19:57:58 -05:00 · 2023-07-13 19:57:58 -05:00 · 3874bb256e
commit 3874bb256e
parent 574698a5fb
1 changed files with 7 additions and 5 deletions
--- a/langchain/vectorstores/weaviate.py
+++ b/langchain/vectorstores/weaviate.py
@ -135,6 +135,12 @@ class Weaviate(VectorStore):
        from weaviate.util import get_valid_uuid
        ids = []
        embeddings: Optional[List[List[float]]] = None
        if self._embedding:
            if not isinstance(texts, list):
                texts = list(texts)
            embeddings = self._embedding.embed_documents(texts)
        with self._client.batch as batch:
            for i, text in enumerate(texts):
                data_properties = {self._text_key: text}
@ -152,15 +158,11 @@ class Weaviate(VectorStore):
                elif "ids" in kwargs:
                    _id = kwargs["ids"][i]
                if self._embedding is not None:
                    vector = self._embedding.embed_documents([text])[0]
                else:
                    vector = None
                batch.add_data_object(
                    data_object=data_properties,
                    class_name=self._index_name,
                    uuid=_id,
-                    vector=vector,
+                    vector=embeddings[i] if embeddings else None,
                )
                ids.append(_id)
        return ids