From c53fab63a3fd480449dbfac8317a9dd06f8ce4d6 Mon Sep 17 00:00:00 2001 From: mogith-pn <143642606+mogith-pn@users.noreply.github.com> Date: Wed, 20 Dec 2023 12:51:36 +0530 Subject: [PATCH] community[patch]: Fixed duplicate input id issue in clarifai vectorstore (#14914) - **Description:** This PR fixes the issue faces with duplicate input id in Clarifai vectorstore class when ingesting documents into the vectorstore more than the batch size. --------- Co-authored-by: Bagatur --- .../langchain_community/vectorstores/clarifai.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/libs/community/langchain_community/vectorstores/clarifai.py b/libs/community/langchain_community/vectorstores/clarifai.py index 4bded7652b..d5c63232b7 100644 --- a/libs/community/langchain_community/vectorstores/clarifai.py +++ b/libs/community/langchain_community/vectorstores/clarifai.py @@ -116,21 +116,23 @@ class Clarifai(VectorStore): batch_metadatas = ( metadatas[idx : idx + batch_size] if metadatas else None ) + if ids is None: + batch_ids = [uuid.uuid4().hex for _ in range(len(batch_texts))] + else: + batch_ids = ids[idx : idx + batch_size] if batch_metadatas is not None: meta_list = [] for meta in batch_metadatas: meta_struct = Struct() meta_struct.update(meta) meta_list.append(meta_struct) - if ids is None: - ids = [uuid.uuid4().hex for _ in range(len(batch_texts))] input_batch = [ input_obj.get_text_input( - input_id=ids[id], - raw_text=inp, - metadata=meta_list[id] if batch_metadatas else None, + input_id=batch_ids[i], + raw_text=text, + metadata=meta_list[i] if batch_metadatas else None, ) - for id, inp in enumerate(batch_texts) + for i, text in enumerate(batch_texts) ] result_id = input_obj.upload_inputs(inputs=input_batch) input_job_ids.extend(result_id)