From ff4a25b841f1cca6f4a973067d39dcc5ec2dcf81 Mon Sep 17 00:00:00 2001 From: Kushal Chordiya <88190553+kushal-ti@users.noreply.github.com> Date: Wed, 22 Mar 2023 23:57:32 +0530 Subject: [PATCH] Fix minor bug in opensearch vector store add_texts function (#1878) In the langchain.vectorstores.opensearch_vector_search.py, in the add_texts function, around line 247, we have the following code ```python embeddings = [ self.embedding_function.embed_documents(list(text))[0] for text in texts ] ``` the goal of the `list(text)` part I believe is to pass a list to the embed_documents list instead of a a str. However, `list(text)` is a subtle bug `list(text)` would convert the string text into an array, where each element of the array is a character of the string Screenshot 2023-03-22 at 1 27 18 PM The correct way should be to change the code to ```python embeddings = [ self.embedding_function.embed_documents([text])[0] for text in texts ] ``` Which wraps the string inside a list. --- langchain/vectorstores/opensearch_vector_search.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/langchain/vectorstores/opensearch_vector_search.py b/langchain/vectorstores/opensearch_vector_search.py index 9218ffaa..06c855eb 100644 --- a/langchain/vectorstores/opensearch_vector_search.py +++ b/langchain/vectorstores/opensearch_vector_search.py @@ -243,7 +243,7 @@ class OpenSearchVectorSearch(VectorStore): List of ids from adding the texts into the vectorstore. """ embeddings = [ - self.embedding_function.embed_documents(list(text))[0] for text in texts + self.embedding_function.embed_documents([text])[0] for text in texts ] _validate_embeddings_and_bulk_size(len(embeddings), bulk_size) return _bulk_ingest_embeddings(