mirror of
https://github.com/hwchase17/langchain
synced 2024-11-08 07:10:35 +00:00
Weaviate: Batch embed texts (#5903)
When a custom Embeddings object is set, embed all given texts in a batch instead of passing them through individually. Any code calling add_texts can then appropriately size the chunks of texts that are passed through to take full advantage of the hardware it's running on.
This commit is contained in:
parent
574698a5fb
commit
3874bb256e
@ -135,6 +135,12 @@ class Weaviate(VectorStore):
|
|||||||
from weaviate.util import get_valid_uuid
|
from weaviate.util import get_valid_uuid
|
||||||
|
|
||||||
ids = []
|
ids = []
|
||||||
|
embeddings: Optional[List[List[float]]] = None
|
||||||
|
if self._embedding:
|
||||||
|
if not isinstance(texts, list):
|
||||||
|
texts = list(texts)
|
||||||
|
embeddings = self._embedding.embed_documents(texts)
|
||||||
|
|
||||||
with self._client.batch as batch:
|
with self._client.batch as batch:
|
||||||
for i, text in enumerate(texts):
|
for i, text in enumerate(texts):
|
||||||
data_properties = {self._text_key: text}
|
data_properties = {self._text_key: text}
|
||||||
@ -152,15 +158,11 @@ class Weaviate(VectorStore):
|
|||||||
elif "ids" in kwargs:
|
elif "ids" in kwargs:
|
||||||
_id = kwargs["ids"][i]
|
_id = kwargs["ids"][i]
|
||||||
|
|
||||||
if self._embedding is not None:
|
|
||||||
vector = self._embedding.embed_documents([text])[0]
|
|
||||||
else:
|
|
||||||
vector = None
|
|
||||||
batch.add_data_object(
|
batch.add_data_object(
|
||||||
data_object=data_properties,
|
data_object=data_properties,
|
||||||
class_name=self._index_name,
|
class_name=self._index_name,
|
||||||
uuid=_id,
|
uuid=_id,
|
||||||
vector=vector,
|
vector=embeddings[i] if embeddings else None,
|
||||||
)
|
)
|
||||||
ids.append(_id)
|
ids.append(_id)
|
||||||
return ids
|
return ids
|
||||||
|
Loading…
Reference in New Issue
Block a user