From b0f21e2b50a46f74c0847f8efefa5cb7bada5a6b Mon Sep 17 00:00:00 2001 From: Naveen Tatikonda Date: Sat, 23 Sep 2023 18:12:51 -0500 Subject: [PATCH] [OpenSearch] Pass ids using from_texts and indexname in add_texts and search (#10969) ### Description This PR makes the following changes to OpenSearch: 1. Pass optional ids with `from_texts` 2. Pass an optional index name with `add_texts` and `search` instead of using the same index name that was used during `from_texts` ### Issue https://github.com/langchain-ai/langchain/issues/10967 ### Maintainers @rlancemartin, @eyurtsev, @navneet1v Signed-off-by: Naveen Tatikonda --- .../langchain/vectorstores/opensearch_vector_search.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/libs/langchain/langchain/vectorstores/opensearch_vector_search.py b/libs/langchain/langchain/vectorstores/opensearch_vector_search.py index 6de74de297..bb63c2f806 100644 --- a/libs/langchain/langchain/vectorstores/opensearch_vector_search.py +++ b/libs/langchain/langchain/vectorstores/opensearch_vector_search.py @@ -374,6 +374,7 @@ class OpenSearchVectorSearch(VectorStore): """ embeddings = self.embedding_function.embed_documents(list(texts)) _validate_embeddings_and_bulk_size(len(embeddings), bulk_size) + index_name = _get_kwargs_value(kwargs, "index_name", self.index_name) text_field = _get_kwargs_value(kwargs, "text_field", "text") dim = len(embeddings[0]) engine = _get_kwargs_value(kwargs, "engine", "nmslib") @@ -392,7 +393,7 @@ class OpenSearchVectorSearch(VectorStore): return _bulk_ingest_embeddings( self.client, - self.index_name, + index_name, embeddings, texts, metadatas=metadatas, @@ -526,6 +527,7 @@ class OpenSearchVectorSearch(VectorStore): embedding = self.embedding_function.embed_query(query) search_type = _get_kwargs_value(kwargs, "search_type", "approximate_search") vector_field = _get_kwargs_value(kwargs, "vector_field", "vector_field") + index_name = _get_kwargs_value(kwargs, "index_name", self.index_name) if ( self.is_aoss @@ -601,7 +603,7 @@ class OpenSearchVectorSearch(VectorStore): else: raise ValueError("Invalid `search_type` provided as an argument") - response = self.client.search(index=self.index_name, body=search_query) + response = self.client.search(index=index_name, body=search_query) return [hit for hit in response["hits"]["hits"]] @@ -663,6 +665,7 @@ class OpenSearchVectorSearch(VectorStore): embedding: Embeddings, metadatas: Optional[List[dict]] = None, bulk_size: int = 500, + ids: Optional[List[str]] = None, **kwargs: Any, ) -> OpenSearchVectorSearch: """Construct OpenSearchVectorSearch wrapper from raw documents. @@ -772,6 +775,7 @@ class OpenSearchVectorSearch(VectorStore): embeddings, texts, metadatas=metadatas, + ids=ids, vector_field=vector_field, text_field=text_field, mapping=mapping,