[OpenSearch] Pass ids using from_texts and indexname in add_texts and search (#10969)

### Description
This PR makes the following changes to OpenSearch:
1. Pass optional ids with `from_texts`
2. Pass an optional index name with `add_texts` and `search` instead of
using the same index name that was used during `from_texts`

### Issue
https://github.com/langchain-ai/langchain/issues/10967

### Maintainers
@rlancemartin, @eyurtsev, @navneet1v

Signed-off-by: Naveen Tatikonda <navtat@amazon.com>
This commit is contained in:
Naveen Tatikonda 2023-09-23 18:12:51 -05:00 committed by GitHub
parent f945426874
commit b0f21e2b50
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -374,6 +374,7 @@ class OpenSearchVectorSearch(VectorStore):
""" """
embeddings = self.embedding_function.embed_documents(list(texts)) embeddings = self.embedding_function.embed_documents(list(texts))
_validate_embeddings_and_bulk_size(len(embeddings), bulk_size) _validate_embeddings_and_bulk_size(len(embeddings), bulk_size)
index_name = _get_kwargs_value(kwargs, "index_name", self.index_name)
text_field = _get_kwargs_value(kwargs, "text_field", "text") text_field = _get_kwargs_value(kwargs, "text_field", "text")
dim = len(embeddings[0]) dim = len(embeddings[0])
engine = _get_kwargs_value(kwargs, "engine", "nmslib") engine = _get_kwargs_value(kwargs, "engine", "nmslib")
@ -392,7 +393,7 @@ class OpenSearchVectorSearch(VectorStore):
return _bulk_ingest_embeddings( return _bulk_ingest_embeddings(
self.client, self.client,
self.index_name, index_name,
embeddings, embeddings,
texts, texts,
metadatas=metadatas, metadatas=metadatas,
@ -526,6 +527,7 @@ class OpenSearchVectorSearch(VectorStore):
embedding = self.embedding_function.embed_query(query) embedding = self.embedding_function.embed_query(query)
search_type = _get_kwargs_value(kwargs, "search_type", "approximate_search") search_type = _get_kwargs_value(kwargs, "search_type", "approximate_search")
vector_field = _get_kwargs_value(kwargs, "vector_field", "vector_field") vector_field = _get_kwargs_value(kwargs, "vector_field", "vector_field")
index_name = _get_kwargs_value(kwargs, "index_name", self.index_name)
if ( if (
self.is_aoss self.is_aoss
@ -601,7 +603,7 @@ class OpenSearchVectorSearch(VectorStore):
else: else:
raise ValueError("Invalid `search_type` provided as an argument") raise ValueError("Invalid `search_type` provided as an argument")
response = self.client.search(index=self.index_name, body=search_query) response = self.client.search(index=index_name, body=search_query)
return [hit for hit in response["hits"]["hits"]] return [hit for hit in response["hits"]["hits"]]
@ -663,6 +665,7 @@ class OpenSearchVectorSearch(VectorStore):
embedding: Embeddings, embedding: Embeddings,
metadatas: Optional[List[dict]] = None, metadatas: Optional[List[dict]] = None,
bulk_size: int = 500, bulk_size: int = 500,
ids: Optional[List[str]] = None,
**kwargs: Any, **kwargs: Any,
) -> OpenSearchVectorSearch: ) -> OpenSearchVectorSearch:
"""Construct OpenSearchVectorSearch wrapper from raw documents. """Construct OpenSearchVectorSearch wrapper from raw documents.
@ -772,6 +775,7 @@ class OpenSearchVectorSearch(VectorStore):
embeddings, embeddings,
texts, texts,
metadatas=metadatas, metadatas=metadatas,
ids=ids,
vector_field=vector_field, vector_field=vector_field,
text_field=text_field, text_field=text_field,
mapping=mapping, mapping=mapping,