@ -866,9 +866,10 @@ class ElasticsearchStore(VectorStore):
)
)
self . client . indices . create ( index = index_name , * * indexSettings )
self . client . indices . create ( index = index_name , * * indexSettings )
def add_texts (
def __ add(
self ,
self ,
texts : Iterable [ str ] ,
texts : Iterable [ str ] ,
embeddings : Optional [ List [ List [ float ] ] ] ,
metadatas : Optional [ List [ Dict [ Any , Any ] ] ] = None ,
metadatas : Optional [ List [ Dict [ Any , Any ] ] ] = None ,
ids : Optional [ List [ str ] ] = None ,
ids : Optional [ List [ str ] ] = None ,
refresh_indices : bool = True ,
refresh_indices : bool = True ,
@ -876,23 +877,6 @@ class ElasticsearchStore(VectorStore):
bulk_kwargs : Optional [ Dict ] = None ,
bulk_kwargs : Optional [ Dict ] = None ,
* * kwargs : Any ,
* * kwargs : Any ,
) - > List [ str ] :
) - > List [ str ] :
""" Run more texts through the embeddings and add to the vectorstore.
Args :
texts : Iterable of strings to add to the vectorstore .
metadatas : Optional list of metadatas associated with the texts .
ids : Optional list of ids to associate with the texts .
refresh_indices : Whether to refresh the Elasticsearch indices
after adding the texts .
create_index_if_not_exists : Whether to create the Elasticsearch
index if it doesn ' t already exist.
* bulk_kwargs : Additional arguments to pass to Elasticsearch bulk .
- chunk_size : Optional . Number of texts to add to the
index at a time . Defaults to 500.
Returns :
List of ids from adding the texts into the vectorstore .
"""
try :
try :
from elasticsearch . helpers import BulkIndexError , bulk
from elasticsearch . helpers import BulkIndexError , bulk
except ImportError :
except ImportError :
@ -901,53 +885,33 @@ class ElasticsearchStore(VectorStore):
" Please install it with `pip install elasticsearch`. "
" Please install it with `pip install elasticsearch`. "
)
)
bulk_kwargs = bulk_kwargs or { }
bulk_kwargs = bulk_kwargs or { }
embeddings = [ ]
ids = ids or [ str ( uuid . uuid4 ( ) ) for _ in texts ]
ids = ids or [ str ( uuid . uuid4 ( ) ) for _ in texts ]
requests = [ ]
requests = [ ]
if self . embedding is not None :
if create_index_if_not_exists :
# If no search_type requires inference, we use the provided
if embeddings :
# embedding function to embed the texts.
dims_length = len ( embeddings [ 0 ] )
embeddings = self . embedding . embed_documents ( list ( texts ) )
else :
dims_length = len ( embeddings [ 0 ] )
dims_length = None
if create_index_if_not_exists :
self . _create_index_if_not_exists (
index_name = self . index_name , dims_length = dims_length
)
for i , ( text , vector ) in enumerate ( zip ( texts , embeddings ) ) :
metadata = metadatas [ i ] if metadatas else { }
requests . append (
self . _create_index_if_not_exists (
{
index_name = self . index_name , dims_length = dims_length
" _op_type " : " index " ,
)
" _index " : self . index_name ,
self . query_field : text ,
self . vector_query_field : vector ,
" metadata " : metadata ,
" _id " : ids [ i ] ,
}
)
else :
for i , text in enumerate ( texts ) :
# the search_type doesn't require inference, so we don't need to
metadata = metadatas [ i ] if metadatas else { }
# embed the texts.
if create_index_if_not_exists :
self . _create_index_if_not_exists ( index_name = self . index_name )
for i , text in enumerate ( texts ) :
request = {
metadata = metadatas [ i ] if metadatas else { }
" _op_type " : " index " ,
" _index " : self . index_name ,
self . query_field : text ,
" metadata " : metadata ,
" _id " : ids [ i ] ,
}
if embeddings :
request [ self . vector_query_field ] = embeddings [ i ]
requests . append (
requests . append ( request )
{
" _op_type " : " index " ,
" _index " : self . index_name ,
self . query_field : text ,
" metadata " : metadata ,
" _id " : ids [ i ] ,
}
)
if len ( requests ) > 0 :
if len ( requests ) > 0 :
try :
try :
@ -974,6 +938,93 @@ class ElasticsearchStore(VectorStore):
logger . debug ( " No texts to add to index " )
logger . debug ( " No texts to add to index " )
return [ ]
return [ ]
def add_texts (
self ,
texts : Iterable [ str ] ,
metadatas : Optional [ List [ Dict [ Any , Any ] ] ] = None ,
ids : Optional [ List [ str ] ] = None ,
refresh_indices : bool = True ,
create_index_if_not_exists : bool = True ,
bulk_kwargs : Optional [ Dict ] = None ,
* * kwargs : Any ,
) - > List [ str ] :
""" Run more texts through the embeddings and add to the vectorstore.
Args :
texts : Iterable of strings to add to the vectorstore .
metadatas : Optional list of metadatas associated with the texts .
ids : Optional list of ids to associate with the texts .
refresh_indices : Whether to refresh the Elasticsearch indices
after adding the texts .
create_index_if_not_exists : Whether to create the Elasticsearch
index if it doesn ' t already exist.
* bulk_kwargs : Additional arguments to pass to Elasticsearch bulk .
- chunk_size : Optional . Number of texts to add to the
index at a time . Defaults to 500.
Returns :
List of ids from adding the texts into the vectorstore .
"""
if self . embedding is not None :
# If no search_type requires inference, we use the provided
# embedding function to embed the texts.
embeddings = self . embedding . embed_documents ( list ( texts ) )
else :
# the search_type doesn't require inference, so we don't need to
# embed the texts.
embeddings = None
return self . __add (
texts ,
embeddings ,
metadatas = metadatas ,
ids = ids ,
refresh_indices = refresh_indices ,
create_index_if_not_exists = create_index_if_not_exists ,
bulk_kwargs = bulk_kwargs ,
kwargs = kwargs ,
)
def add_embeddings (
self ,
text_embeddings : Iterable [ Tuple [ str , List [ float ] ] ] ,
metadatas : Optional [ List [ dict ] ] = None ,
ids : Optional [ List [ str ] ] = None ,
refresh_indices : bool = True ,
create_index_if_not_exists : bool = True ,
bulk_kwargs : Optional [ Dict ] = None ,
* * kwargs : Any ,
) - > List [ str ] :
""" Add the given texts and embeddings to the vectorstore.
Args :
text_embeddings : Iterable pairs of string and embedding to
add to the vectorstore .
metadatas : Optional list of metadatas associated with the texts .
ids : Optional list of unique IDs .
refresh_indices : Whether to refresh the Elasticsearch indices
after adding the texts .
create_index_if_not_exists : Whether to create the Elasticsearch
index if it doesn ' t already exist.
* bulk_kwargs : Additional arguments to pass to Elasticsearch bulk .
- chunk_size : Optional . Number of texts to add to the
index at a time . Defaults to 500.
Returns :
List of ids from adding the texts into the vectorstore .
"""
texts , embeddings = zip ( * text_embeddings )
return self . __add (
list ( texts ) ,
list ( embeddings ) ,
metadatas = metadatas ,
ids = ids ,
refresh_indices = refresh_indices ,
create_index_if_not_exists = create_index_if_not_exists ,
bulk_kwargs = bulk_kwargs ,
kwargs = kwargs ,
)
@classmethod
@classmethod
def from_texts (
def from_texts (
cls ,
cls ,