mirror of
https://github.com/hwchase17/langchain
synced 2024-11-16 06:13:16 +00:00
community[patch]: Fixed duplicate input id issue in clarifai vectorstore (#14914)
- **Description:** This PR fixes the issue faces with duplicate input id in Clarifai vectorstore class when ingesting documents into the vectorstore more than the batch size. --------- Co-authored-by: Bagatur <baskaryan@gmail.com>
This commit is contained in:
parent
5642132c0c
commit
c53fab63a3
@ -116,21 +116,23 @@ class Clarifai(VectorStore):
|
||||
batch_metadatas = (
|
||||
metadatas[idx : idx + batch_size] if metadatas else None
|
||||
)
|
||||
if ids is None:
|
||||
batch_ids = [uuid.uuid4().hex for _ in range(len(batch_texts))]
|
||||
else:
|
||||
batch_ids = ids[idx : idx + batch_size]
|
||||
if batch_metadatas is not None:
|
||||
meta_list = []
|
||||
for meta in batch_metadatas:
|
||||
meta_struct = Struct()
|
||||
meta_struct.update(meta)
|
||||
meta_list.append(meta_struct)
|
||||
if ids is None:
|
||||
ids = [uuid.uuid4().hex for _ in range(len(batch_texts))]
|
||||
input_batch = [
|
||||
input_obj.get_text_input(
|
||||
input_id=ids[id],
|
||||
raw_text=inp,
|
||||
metadata=meta_list[id] if batch_metadatas else None,
|
||||
input_id=batch_ids[i],
|
||||
raw_text=text,
|
||||
metadata=meta_list[i] if batch_metadatas else None,
|
||||
)
|
||||
for id, inp in enumerate(batch_texts)
|
||||
for i, text in enumerate(batch_texts)
|
||||
]
|
||||
result_id = input_obj.upload_inputs(inputs=input_batch)
|
||||
input_job_ids.extend(result_id)
|
||||
|
Loading…
Reference in New Issue
Block a user