|
|
@ -55,7 +55,7 @@ class ElasticVectorSearch(VectorStore):
|
|
|
|
except ImportError:
|
|
|
|
except ImportError:
|
|
|
|
raise ValueError(
|
|
|
|
raise ValueError(
|
|
|
|
"Could not import elasticsearch python package. "
|
|
|
|
"Could not import elasticsearch python package. "
|
|
|
|
"Please install it with `pip install elasticearch`."
|
|
|
|
"Please install it with `pip install elasticsearch`."
|
|
|
|
)
|
|
|
|
)
|
|
|
|
self.embedding_function = embedding_function
|
|
|
|
self.embedding_function = embedding_function
|
|
|
|
self.index_name = index_name
|
|
|
|
self.index_name = index_name
|
|
|
@ -69,29 +69,42 @@ class ElasticVectorSearch(VectorStore):
|
|
|
|
|
|
|
|
|
|
|
|
def add_texts(
|
|
|
|
def add_texts(
|
|
|
|
self, texts: Iterable[str], metadatas: Optional[List[dict]] = None
|
|
|
|
self, texts: Iterable[str], metadatas: Optional[List[dict]] = None
|
|
|
|
) -> None:
|
|
|
|
) -> List[str]:
|
|
|
|
"""Run more texts through the embeddings and add to the vectorstore."""
|
|
|
|
"""Run more texts through the embeddings and add to the vectorstore.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
|
|
|
texts: Iterable of strings to add to the vectorstore.
|
|
|
|
|
|
|
|
metadatas: Optional list of metadatas associated with the texts.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
|
|
|
List of ids from adding the texts into the vectorstore.
|
|
|
|
|
|
|
|
"""
|
|
|
|
try:
|
|
|
|
try:
|
|
|
|
from elasticsearch.helpers import bulk
|
|
|
|
from elasticsearch.helpers import bulk
|
|
|
|
except ImportError:
|
|
|
|
except ImportError:
|
|
|
|
raise ValueError(
|
|
|
|
raise ValueError(
|
|
|
|
"Could not import elasticsearch python package. "
|
|
|
|
"Could not import elasticsearch python package. "
|
|
|
|
"Please install it with `pip install elasticearch`."
|
|
|
|
"Please install it with `pip install elasticsearch`."
|
|
|
|
)
|
|
|
|
)
|
|
|
|
requests = []
|
|
|
|
requests = []
|
|
|
|
|
|
|
|
ids = []
|
|
|
|
for i, text in enumerate(texts):
|
|
|
|
for i, text in enumerate(texts):
|
|
|
|
metadata = metadatas[i] if metadatas else {}
|
|
|
|
metadata = metadatas[i] if metadatas else {}
|
|
|
|
|
|
|
|
_id = str(uuid.uuid4())
|
|
|
|
request = {
|
|
|
|
request = {
|
|
|
|
"_op_type": "index",
|
|
|
|
"_op_type": "index",
|
|
|
|
"_index": self.index_name,
|
|
|
|
"_index": self.index_name,
|
|
|
|
"vector": self.embedding_function(text),
|
|
|
|
"vector": self.embedding_function(text),
|
|
|
|
"text": text,
|
|
|
|
"text": text,
|
|
|
|
"metadata": metadata,
|
|
|
|
"metadata": metadata,
|
|
|
|
|
|
|
|
"_id": _id,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
ids.append(_id)
|
|
|
|
requests.append(request)
|
|
|
|
requests.append(request)
|
|
|
|
bulk(self.client, requests)
|
|
|
|
bulk(self.client, requests)
|
|
|
|
# TODO: add option not to refresh
|
|
|
|
# TODO: add option not to refresh
|
|
|
|
self.client.indices.refresh(index=self.index_name)
|
|
|
|
self.client.indices.refresh(index=self.index_name)
|
|
|
|
|
|
|
|
return ids
|
|
|
|
|
|
|
|
|
|
|
|
def similarity_search(self, query: str, k: int = 4) -> List[Document]:
|
|
|
|
def similarity_search(self, query: str, k: int = 4) -> List[Document]:
|
|
|
|
"""Return docs most similar to query.
|
|
|
|
"""Return docs most similar to query.
|
|
|
|