mirror of
https://github.com/hwchase17/langchain
synced 2024-11-06 03:20:49 +00:00
Weaviate: Batch embed texts (#5903)
When a custom Embeddings object is set, embed all given texts in a batch instead of passing them through individually. Any code calling add_texts can then appropriately size the chunks of texts that are passed through to take full advantage of the hardware it's running on.
This commit is contained in:
parent
574698a5fb
commit
3874bb256e
@ -135,6 +135,12 @@ class Weaviate(VectorStore):
|
||||
from weaviate.util import get_valid_uuid
|
||||
|
||||
ids = []
|
||||
embeddings: Optional[List[List[float]]] = None
|
||||
if self._embedding:
|
||||
if not isinstance(texts, list):
|
||||
texts = list(texts)
|
||||
embeddings = self._embedding.embed_documents(texts)
|
||||
|
||||
with self._client.batch as batch:
|
||||
for i, text in enumerate(texts):
|
||||
data_properties = {self._text_key: text}
|
||||
@ -152,15 +158,11 @@ class Weaviate(VectorStore):
|
||||
elif "ids" in kwargs:
|
||||
_id = kwargs["ids"][i]
|
||||
|
||||
if self._embedding is not None:
|
||||
vector = self._embedding.embed_documents([text])[0]
|
||||
else:
|
||||
vector = None
|
||||
batch.add_data_object(
|
||||
data_object=data_properties,
|
||||
class_name=self._index_name,
|
||||
uuid=_id,
|
||||
vector=vector,
|
||||
vector=embeddings[i] if embeddings else None,
|
||||
)
|
||||
ids.append(_id)
|
||||
return ids
|
||||
|
Loading…
Reference in New Issue
Block a user