diff --git a/libs/langchain/langchain/vectorstores/matching_engine.py b/libs/langchain/langchain/vectorstores/matching_engine.py index 2c725969fe..cd0a26d9d6 100644 --- a/libs/langchain/langchain/vectorstores/matching_engine.py +++ b/libs/langchain/langchain/vectorstores/matching_engine.py @@ -116,15 +116,24 @@ class MatchingEngine(VectorStore): Returns: List of ids from adding the texts into the vectorstore. """ + texts = list(texts) + if metadatas is not None and len(texts) != len(metadatas): + raise ValueError( + "texts and metadatas do not have the same length. Received " + f"{len(texts)} texts and {len(metadatas)} metadatas." + ) logger.debug("Embedding documents.") - embeddings = self.embedding.embed_documents(list(texts)) + embeddings = self.embedding.embed_documents(texts) jsons = [] ids = [] # Could be improved with async. - for embedding, text in zip(embeddings, texts): + for idx, (embedding, text) in enumerate(zip(embeddings, texts)): id = str(uuid.uuid4()) ids.append(id) - jsons.append({"id": id, "embedding": embedding}) + json_: dict = {"id": id, "embedding": embedding} + if metadatas is not None: + json_["metadata"] = metadatas[idx] + jsons.append(json) self._upload_to_gcs(text, f"documents/{id}") logger.debug(f"Uploaded {len(ids)} documents to GCS.")