mirror of
https://github.com/hwchase17/langchain
synced 2024-11-06 03:20:49 +00:00
Adapt to the latest version of Alibaba Cloud OpenSearch vector store API (#11849)
Hello Folks, Alibaba Cloud OpenSearch has released a new version of the vector storage engine, which has significantly improved performance compared to the previous version. At the same time, the sdk has also undergone changes, requiring adjustments alibaba opensearch vector store code to adapt. This PR includes: Adapt to the latest version of Alibaba Cloud OpenSearch API. More comprehensive unit testing. Improve documentation. I have read your contributing guidelines. And I have passed the tests below - [x] make format - [x] make lint - [x] make coverage - [x] make test --------- Co-authored-by: zhaoshengbo <shengbo.zsb@alibaba-inc.com>
This commit is contained in:
parent
96e3e06d50
commit
cb7e12f6ba
@ -4,7 +4,7 @@
|
|||||||
|
|
||||||
OpenSearch helps you develop high quality, maintenance-free, and high performance intelligent search services to provide your users with high search efficiency and accuracy.
|
OpenSearch helps you develop high quality, maintenance-free, and high performance intelligent search services to provide your users with high search efficiency and accuracy.
|
||||||
|
|
||||||
OpenSearch provides the vector search feature. In specific scenarios, especially test question search and image search scenarios, you can use the vector search feature together with the multimodal search feature to improve the accuracy of search results. This topic describes the syntax and usage notes of vector indexes.
|
OpenSearch provides the vector search feature. In specific scenarios,especially in question retrieval and image search scenarios, you can use the vector search feature together with the multimodal search feature to improve the accuracy of search results.
|
||||||
|
|
||||||
## Purchase an instance and configure it
|
## Purchase an instance and configure it
|
||||||
|
|
||||||
@ -21,6 +21,8 @@ supported functions:
|
|||||||
- `similarity_search_by_vector`
|
- `similarity_search_by_vector`
|
||||||
- `asimilarity_search_by_vector`
|
- `asimilarity_search_by_vector`
|
||||||
- `similarity_search_with_relevance_scores`
|
- `similarity_search_with_relevance_scores`
|
||||||
|
- `delete_doc_by_texts`
|
||||||
|
|
||||||
|
|
||||||
For a more detailed walk through of the Alibaba Cloud OpenSearch wrapper, see [this notebook](../modules/indexes/vectorstores/examples/alibabacloud_opensearch.ipynb)
|
For a more detailed walk through of the Alibaba Cloud OpenSearch wrapper, see [this notebook](../modules/indexes/vectorstores/examples/alibabacloud_opensearch.ipynb)
|
||||||
|
|
||||||
|
@ -42,7 +42,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"#!pip install alibabacloud-ha3engine"
|
"#!pip install alibabacloud_ha3engine_vector"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -150,37 +150,45 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"settings = AlibabaCloudOpenSearchSettings(\n",
|
"settings = AlibabaCloudOpenSearchSettings(\n",
|
||||||
" endpoint=\"The endpoint of opensearch instance, You can find it from the console of Alibaba Cloud OpenSearch.\",\n",
|
" endpoint=\" The endpoint of opensearch instance, You can find it from the console of Alibaba Cloud OpenSearch.\",\n",
|
||||||
" instance_id=\"The identify of opensearch instance, You can find it from the console of Alibaba Cloud OpenSearch.\",\n",
|
" instance_id=\"The identify of opensearch instance, You can find it from the console of Alibaba Cloud OpenSearch.\",\n",
|
||||||
" datasource_name=\"The name of the data source specified when creating it.\",\n",
|
" protocol=\"Communication Protocol between SDK and Server, default is http.\",\n",
|
||||||
" username=\"The username specified when purchasing the instance.\",\n",
|
" username=\"The username specified when purchasing the instance.\",\n",
|
||||||
" password=\"The password specified when purchasing the instance.\",\n",
|
" password=\"The password specified when purchasing the instance.\",\n",
|
||||||
" embedding_index_name=\"The name of the vector attribute specified when configuring the instance attributes.\",\n",
|
" namespace=\"The instance data will be partitioned based on the namespace field. If the namespace is enabled, you need to specify the namespace field name during initialization. Otherwise, the queries cannot be executed correctly.\",\n",
|
||||||
|
" tablename=\"The table name specified during instance configuration.\",\n",
|
||||||
|
" embedding_field_separator=\"Delimiter specified for writing vector field data, default is comma.\",\n",
|
||||||
|
" output_fields=\"Specify the field list returned when invoking OpenSearch, by default it is the value list of the field mapping field.\",\n",
|
||||||
" field_name_mapping={\n",
|
" field_name_mapping={\n",
|
||||||
" \"id\": \"id\", # The id field name mapping of index document.\n",
|
" \"id\": \"id\", # The id field name mapping of index document.\n",
|
||||||
" \"document\": \"document\", # The text field name mapping of index document.\n",
|
" \"document\": \"document\", # The text field name mapping of index document.\n",
|
||||||
" \"embedding\": \"embedding\", # The embedding field name mapping of index document.\n",
|
" \"embedding\": \"embedding\", # The embedding field name mapping of index document.\n",
|
||||||
" \"name_of_the_metadata_specified_during_search\": \"opensearch_metadata_field_name,=\", # The metadata field name mapping of index document, could specify multiple, The value field contains mapping name and operator, the operator would be used when executing metadata filter query.\n",
|
" \"name_of_the_metadata_specified_during_search\": \"opensearch_metadata_field_name,=\",\n",
|
||||||
|
" # The metadata field name mapping of index document, could specify multiple, The value field contains mapping name and operator, the operator would be used when executing metadata filter query,\n",
|
||||||
|
" # Currently supported logical operators are: > (greater than), < (less than), = (equal to), <= (less than or equal to), >= (greater than or equal to), != (not equal to).\n",
|
||||||
|
" # Refer to this link: https://help.aliyun.com/zh/open-search/vector-search-edition/filter-expression\n",
|
||||||
" },\n",
|
" },\n",
|
||||||
")\n",
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# for example\n",
|
"# for example\n",
|
||||||
|
"\n",
|
||||||
"# settings = AlibabaCloudOpenSearchSettings(\n",
|
"# settings = AlibabaCloudOpenSearchSettings(\n",
|
||||||
"# endpoint=\"ha-cn-5yd39d83c03.public.ha.aliyuncs.com\",\n",
|
"# endpoint='ha-cn-5yd3fhdm102.public.ha.aliyuncs.com',\n",
|
||||||
"# instance_id=\"ha-cn-5yd39d83c03\",\n",
|
"# instance_id='ha-cn-5yd3fhdm102',\n",
|
||||||
"# datasource_name=\"ha-cn-5yd39d83c03_test\",\n",
|
"# username='instance user name',\n",
|
||||||
"# username=\"this is a user name\",\n",
|
"# password='instance password',\n",
|
||||||
"# password=\"this is a password\",\n",
|
"# table_name='test_table',\n",
|
||||||
"# embedding_index_name=\"index_embedding\",\n",
|
|
||||||
"# field_name_mapping={\n",
|
"# field_name_mapping={\n",
|
||||||
"# \"id\": \"id\",\n",
|
"# \"id\": \"id\",\n",
|
||||||
"# \"document\": \"document\",\n",
|
"# \"document\": \"document\",\n",
|
||||||
"# \"embedding\": \"embedding\",\n",
|
"# \"embedding\": \"embedding\",\n",
|
||||||
"# \"metadata_a\": \"metadata_a,=\" #The value field contains mapping name and operator, the operator would be used when executing metadata filter query\n",
|
"# \"string_field\": \"string_filed,=\",\n",
|
||||||
"# \"metadata_b\": \"metadata_b,>\"\n",
|
"# \"int_field\": \"int_filed,=\",\n",
|
||||||
"# \"metadata_c\": \"metadata_c,<\"\n",
|
"# \"float_field\": \"float_field,=\",\n",
|
||||||
"# \"metadata_else\": \"metadata_else,=\"\n",
|
"# \"double_field\": \"double_field,=\"\n",
|
||||||
"# })"
|
"#\n",
|
||||||
|
"# },\n",
|
||||||
|
"# )"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -256,7 +264,9 @@
|
|||||||
},
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"metadatas = {\"md_key_a\": \"md_val_a\", \"md_key_b\": \"md_val_b\"}\n",
|
"metadatas = [{'string_field': \"value1\", \"int_field\": 1, 'float_field': 1.0, 'double_field': 2.0},\n",
|
||||||
|
" {'string_field': \"value2\", \"int_field\": 2, 'float_field': 3.0, 'double_field': 4.0},\n",
|
||||||
|
" {'string_field': \"value3\", \"int_field\": 3, 'float_field': 5.0, 'double_field': 6.0}]\n",
|
||||||
"# the key of metadatas must match field_name_mapping in settings.\n",
|
"# the key of metadatas must match field_name_mapping in settings.\n",
|
||||||
"opensearch.add_texts(texts=docs, ids=[], metadatas=metadatas)"
|
"opensearch.add_texts(texts=docs, ids=[], metadatas=metadatas)"
|
||||||
]
|
]
|
||||||
@ -309,8 +319,8 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||||
"metadatas = {\"md_key_a\": \"md_val_a\"}\n",
|
"metadata = {'string_field': \"value1\", \"int_field\": 1, 'float_field': 1.0, 'double_field': 2.0}\n",
|
||||||
"docs = opensearch.similarity_search(query, filter=metadatas)\n",
|
"docs = opensearch.similarity_search(query, filter=metadata)\n",
|
||||||
"print(docs[0].page_content)"
|
"print(docs[0].page_content)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
@ -12,61 +12,70 @@ logger = logging.getLogger()
|
|||||||
|
|
||||||
|
|
||||||
class AlibabaCloudOpenSearchSettings:
|
class AlibabaCloudOpenSearchSettings:
|
||||||
"""`Alibaba Cloud Opensearch` client configuration.
|
"""Alibaba Cloud Opensearch` client configuration.
|
||||||
|
|
||||||
Attribute:
|
Attribute:
|
||||||
endpoint (str) : The endpoint of opensearch instance, You can find it
|
endpoint (str) : The endpoint of opensearch instance, You can find it
|
||||||
from the console of Alibaba Cloud OpenSearch.
|
from the console of Alibaba Cloud OpenSearch.
|
||||||
instance_id (str) : The identify of opensearch instance, You can find
|
instance_id (str) : The identify of opensearch instance, You can find
|
||||||
it from the console of Alibaba Cloud OpenSearch.
|
it from the console of Alibaba Cloud OpenSearch.
|
||||||
datasource_name (str): The name of the data source specified when creating it.
|
|
||||||
username (str) : The username specified when purchasing the instance.
|
username (str) : The username specified when purchasing the instance.
|
||||||
password (str) : The password specified when purchasing the instance.
|
password (str) : The password specified when purchasing the instance,
|
||||||
embedding_index_name (str) : The name of the vector attribute specified
|
After the instance is created, you can modify it on the console.
|
||||||
when configuring the instance attributes.
|
tablename (str): The table name specified during instance configuration.
|
||||||
field_name_mapping (Dict) : Using field name mapping between opensearch
|
field_name_mapping (Dict) : Using field name mapping between opensearch
|
||||||
vector store and opensearch instance configuration table field names:
|
vector store and opensearch instance configuration table field names:
|
||||||
{
|
{
|
||||||
'id': 'The id field name map of index document.',
|
'id': 'The id field name map of index document.',
|
||||||
'document': 'The text field name map of index document.',
|
'document': 'The text field name map of index document.',
|
||||||
'embedding': 'In the embedding field of the opensearch instance,
|
'embedding': 'In the embedding field of the opensearch instance,
|
||||||
the values must be in float16 multivalue type and separated by commas.',
|
the values must be in float type and separated by separator,
|
||||||
|
default is comma.',
|
||||||
'metadata_field_x': 'Metadata field mapping includes the mapped
|
'metadata_field_x': 'Metadata field mapping includes the mapped
|
||||||
field name and operator in the mapping value, separated by a comma
|
field name and operator in the mapping value, separated by a comma
|
||||||
between the mapped field name and the operator.',
|
between the mapped field name and the operator.',
|
||||||
}
|
}
|
||||||
|
protocol (str): Communication Protocol between SDK and Server, default is http.
|
||||||
|
namespace (str) : The instance data will be partitioned based on the "namespace"
|
||||||
|
field,If the namespace is enabled, you need to specify the namespace field
|
||||||
|
name during initialization, Otherwise, the queries cannot be executed
|
||||||
|
correctly.
|
||||||
|
embedding_field_separator(str): Delimiter specified for writing vector
|
||||||
|
field data, default is comma.
|
||||||
|
output_fields: Specify the field list returned when invoking OpenSearch,
|
||||||
|
by default it is the value list of the field mapping field.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
endpoint: str
|
|
||||||
instance_id: str
|
|
||||||
username: str
|
|
||||||
password: str
|
|
||||||
datasource_name: str
|
|
||||||
embedding_index_name: str
|
|
||||||
field_name_mapping: Dict[str, str] = {
|
|
||||||
"id": "id",
|
|
||||||
"document": "document",
|
|
||||||
"embedding": "embedding",
|
|
||||||
"metadata_field_x": "metadata_field_x,operator",
|
|
||||||
}
|
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
endpoint: str,
|
endpoint: str,
|
||||||
instance_id: str,
|
instance_id: str,
|
||||||
username: str,
|
username: str,
|
||||||
password: str,
|
password: str,
|
||||||
datasource_name: str,
|
table_name: str,
|
||||||
embedding_index_name: str,
|
|
||||||
field_name_mapping: Dict[str, str],
|
field_name_mapping: Dict[str, str],
|
||||||
|
protocol: str = "http",
|
||||||
|
namespace: str = "",
|
||||||
|
embedding_field_separator: str = ",",
|
||||||
|
output_fields: Optional[List[str]] = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
self.endpoint = endpoint
|
self.endpoint = endpoint
|
||||||
self.instance_id = instance_id
|
self.instance_id = instance_id
|
||||||
|
self.protocol = protocol
|
||||||
self.username = username
|
self.username = username
|
||||||
self.password = password
|
self.password = password
|
||||||
self.datasource_name = datasource_name
|
self.namespace = namespace
|
||||||
self.embedding_index_name = embedding_index_name
|
self.table_name = table_name
|
||||||
|
self.opt_table_name = "_".join([self.instance_id, self.table_name])
|
||||||
self.field_name_mapping = field_name_mapping
|
self.field_name_mapping = field_name_mapping
|
||||||
|
self.embedding_field_separator = embedding_field_separator
|
||||||
|
if output_fields is None:
|
||||||
|
self.output_fields = [
|
||||||
|
field.split(",")[0] for field in self.field_name_mapping.values()
|
||||||
|
]
|
||||||
|
self.inverse_field_name_mapping: Dict[str, str] = {}
|
||||||
|
for key, value in self.field_name_mapping.items():
|
||||||
|
self.inverse_field_name_mapping[value.split(",")[0]] = key
|
||||||
|
|
||||||
def __getitem__(self, item: str) -> Any:
|
def __getitem__(self, item: str) -> Any:
|
||||||
return getattr(self, item)
|
return getattr(self, item)
|
||||||
@ -99,12 +108,12 @@ class AlibabaCloudOpenSearch(VectorStore):
|
|||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> None:
|
) -> None:
|
||||||
try:
|
try:
|
||||||
from alibabacloud_ha3engine import client, models
|
from alibabacloud_ha3engine_vector import client, models
|
||||||
from alibabacloud_tea_util import models as util_models
|
from alibabacloud_tea_util import models as util_models
|
||||||
except ImportError:
|
except ImportError:
|
||||||
raise ImportError(
|
raise ImportError(
|
||||||
"Could not import alibaba cloud opensearch python package. "
|
"Could not import alibaba cloud opensearch python package. "
|
||||||
"Please install it with `pip install alibabacloud-ha3engine`."
|
"Please install it with `pip install alibabacloud-ha3engine-vector`."
|
||||||
)
|
)
|
||||||
|
|
||||||
self.config = config
|
self.config = config
|
||||||
@ -117,11 +126,11 @@ class AlibabaCloudOpenSearch(VectorStore):
|
|||||||
ignore_ssl=False,
|
ignore_ssl=False,
|
||||||
max_idle_conns=50,
|
max_idle_conns=50,
|
||||||
)
|
)
|
||||||
self.ha3EngineClient = client.Client(
|
self.ha3_engine_client = client.Client(
|
||||||
models.Config(
|
models.Config(
|
||||||
endpoint=config.endpoint,
|
endpoint=config.endpoint,
|
||||||
instance_id=config.instance_id,
|
instance_id=config.instance_id,
|
||||||
protocol="http",
|
protocol=config.protocol,
|
||||||
access_user_name=config.username,
|
access_user_name=config.username,
|
||||||
access_pass_word=config.password,
|
access_pass_word=config.password,
|
||||||
)
|
)
|
||||||
@ -135,15 +144,24 @@ class AlibabaCloudOpenSearch(VectorStore):
|
|||||||
metadatas: Optional[List[dict]] = None,
|
metadatas: Optional[List[dict]] = None,
|
||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> List[str]:
|
) -> List[str]:
|
||||||
|
"""Insert documents into the instance..
|
||||||
|
Args:
|
||||||
|
texts: The text segments to be inserted into the vector storage,
|
||||||
|
should not be empty.
|
||||||
|
metadatas: Metadata information.
|
||||||
|
Returns:
|
||||||
|
id_list: List of document IDs.
|
||||||
|
"""
|
||||||
|
|
||||||
def _upsert(push_doc_list: List[Dict]) -> List[str]:
|
def _upsert(push_doc_list: List[Dict]) -> List[str]:
|
||||||
if push_doc_list is None or len(push_doc_list) == 0:
|
if push_doc_list is None or len(push_doc_list) == 0:
|
||||||
return []
|
return []
|
||||||
try:
|
try:
|
||||||
push_request = models.PushDocumentsRequestModel(
|
push_request = models.PushDocumentsRequest(
|
||||||
self.options_headers, push_doc_list
|
self.options_headers, push_doc_list
|
||||||
)
|
)
|
||||||
push_response = self.ha3EngineClient.push_documents(
|
push_response = self.ha3_engine_client.push_documents(
|
||||||
self.config.datasource_name, field_name_map["id"], push_request
|
self.config.opt_table_name, field_name_map["id"], push_request
|
||||||
)
|
)
|
||||||
json_response = json.loads(push_response.body)
|
json_response = json.loads(push_response.body)
|
||||||
if json_response["status"] == "OK":
|
if json_response["status"] == "OK":
|
||||||
@ -160,15 +178,15 @@ class AlibabaCloudOpenSearch(VectorStore):
|
|||||||
)
|
)
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
from alibabacloud_ha3engine import models
|
from alibabacloud_ha3engine_vector import models
|
||||||
|
|
||||||
ids = [sha1(t.encode("utf-8")).hexdigest() for t in texts]
|
id_list = [sha1(t.encode("utf-8")).hexdigest() for t in texts]
|
||||||
embeddings = self.embedding.embed_documents(list(texts))
|
embeddings = self.embedding.embed_documents(list(texts))
|
||||||
metadatas = metadatas or [{} for _ in texts]
|
metadatas = metadatas or [{} for _ in texts]
|
||||||
field_name_map = self.config.field_name_mapping
|
field_name_map = self.config.field_name_mapping
|
||||||
add_doc_list = []
|
add_doc_list = []
|
||||||
text_list = list(texts)
|
text_list = list(texts)
|
||||||
for idx, doc_id in enumerate(ids):
|
for idx, doc_id in enumerate(id_list):
|
||||||
embedding = embeddings[idx] if idx < len(embeddings) else None
|
embedding = embeddings[idx] if idx < len(embeddings) else None
|
||||||
metadata = metadatas[idx] if idx < len(metadatas) else None
|
metadata = metadatas[idx] if idx < len(metadatas) else None
|
||||||
text = text_list[idx] if idx < len(text_list) else None
|
text = text_list[idx] if idx < len(text_list) else None
|
||||||
@ -179,7 +197,9 @@ class AlibabaCloudOpenSearch(VectorStore):
|
|||||||
if embedding is not None:
|
if embedding is not None:
|
||||||
add_doc_fields.__setitem__(
|
add_doc_fields.__setitem__(
|
||||||
field_name_map["embedding"],
|
field_name_map["embedding"],
|
||||||
",".join(str(unit) for unit in embedding),
|
self.config.embedding_field_separator.join(
|
||||||
|
str(unit) for unit in embedding
|
||||||
|
),
|
||||||
)
|
)
|
||||||
if metadata is not None:
|
if metadata is not None:
|
||||||
for md_key, md_value in metadata.items():
|
for md_key, md_value in metadata.items():
|
||||||
@ -198,6 +218,14 @@ class AlibabaCloudOpenSearch(VectorStore):
|
|||||||
search_filter: Optional[Dict[str, Any]] = None,
|
search_filter: Optional[Dict[str, Any]] = None,
|
||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> List[Document]:
|
) -> List[Document]:
|
||||||
|
"""Perform similarity retrieval based on text.
|
||||||
|
Args:
|
||||||
|
query: Vectorize text for retrieval.,should not be empty.
|
||||||
|
k: top n.
|
||||||
|
search_filter: Additional filtering conditions.
|
||||||
|
Returns:
|
||||||
|
document_list: List of documents.
|
||||||
|
"""
|
||||||
embedding = self.embedding.embed_query(query)
|
embedding = self.embedding.embed_query(query)
|
||||||
return self.create_results(
|
return self.create_results(
|
||||||
self.inner_embedding_query(
|
self.inner_embedding_query(
|
||||||
@ -212,6 +240,14 @@ class AlibabaCloudOpenSearch(VectorStore):
|
|||||||
search_filter: Optional[dict] = None,
|
search_filter: Optional[dict] = None,
|
||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> List[Tuple[Document, float]]:
|
) -> List[Tuple[Document, float]]:
|
||||||
|
"""Perform similarity retrieval based on text with scores.
|
||||||
|
Args:
|
||||||
|
query: Vectorize text for retrieval.,should not be empty.
|
||||||
|
k: top n.
|
||||||
|
search_filter: Additional filtering conditions.
|
||||||
|
Returns:
|
||||||
|
document_list: List of documents.
|
||||||
|
"""
|
||||||
embedding: List[float] = self.embedding.embed_query(query)
|
embedding: List[float] = self.embedding.embed_query(query)
|
||||||
return self.create_results_with_score(
|
return self.create_results_with_score(
|
||||||
self.inner_embedding_query(
|
self.inner_embedding_query(
|
||||||
@ -226,6 +262,14 @@ class AlibabaCloudOpenSearch(VectorStore):
|
|||||||
search_filter: Optional[dict] = None,
|
search_filter: Optional[dict] = None,
|
||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> List[Document]:
|
) -> List[Document]:
|
||||||
|
"""Perform retrieval directly using vectors.
|
||||||
|
Args:
|
||||||
|
embedding: vectors.
|
||||||
|
k: top n.
|
||||||
|
search_filter: Additional filtering conditions.
|
||||||
|
Returns:
|
||||||
|
document_list: List of documents.
|
||||||
|
"""
|
||||||
return self.create_results(
|
return self.create_results(
|
||||||
self.inner_embedding_query(
|
self.inner_embedding_query(
|
||||||
embedding=embedding, search_filter=search_filter, k=k
|
embedding=embedding, search_filter=search_filter, k=k
|
||||||
@ -238,27 +282,16 @@ class AlibabaCloudOpenSearch(VectorStore):
|
|||||||
search_filter: Optional[Dict[str, Any]] = None,
|
search_filter: Optional[Dict[str, Any]] = None,
|
||||||
k: int = 4,
|
k: int = 4,
|
||||||
) -> Dict[str, Any]:
|
) -> Dict[str, Any]:
|
||||||
def generate_embedding_query() -> str:
|
def generate_filter_query() -> str:
|
||||||
tmp_search_config_str = (
|
if search_filter is None:
|
||||||
f"config=start:0,hit:{k},format:json&&cluster=general&&kvpairs="
|
return ""
|
||||||
f"first_formula:proxima_score({self.config.embedding_index_name})&&sort=+RANK"
|
filter_clause = " AND ".join(
|
||||||
|
[
|
||||||
|
create_filter(md_key, md_value)
|
||||||
|
for md_key, md_value in search_filter.items()
|
||||||
|
]
|
||||||
)
|
)
|
||||||
tmp_query_str = (
|
return filter_clause
|
||||||
f"&&query={self.config.embedding_index_name}:"
|
|
||||||
+ "'"
|
|
||||||
+ ",".join(str(x) for x in embedding)
|
|
||||||
+ "'"
|
|
||||||
)
|
|
||||||
if search_filter is not None:
|
|
||||||
filter_clause = "&&filter=" + " AND ".join(
|
|
||||||
[
|
|
||||||
create_filter(md_key, md_value)
|
|
||||||
for md_key, md_value in search_filter.items()
|
|
||||||
]
|
|
||||||
)
|
|
||||||
tmp_query_str += filter_clause
|
|
||||||
|
|
||||||
return tmp_search_config_str + tmp_query_str
|
|
||||||
|
|
||||||
def create_filter(md_key: str, md_value: Any) -> str:
|
def create_filter(md_key: str, md_value: Any) -> str:
|
||||||
md_filter_expr = self.config.field_name_mapping[md_key]
|
md_filter_expr = self.config.field_name_mapping[md_key]
|
||||||
@ -277,22 +310,32 @@ class AlibabaCloudOpenSearch(VectorStore):
|
|||||||
return f"{md_filter_key} {md_filter_operator} {md_value}"
|
return f"{md_filter_key} {md_filter_operator} {md_value}"
|
||||||
return f'{md_filter_key}{md_filter_operator}"{md_value}"'
|
return f'{md_filter_key}{md_filter_operator}"{md_value}"'
|
||||||
|
|
||||||
def search_data(single_query_str: str) -> Dict[str, Any]:
|
def search_data() -> Dict[str, Any]:
|
||||||
search_query = models.SearchQuery(query=single_query_str)
|
request = QueryRequest(
|
||||||
search_request = models.SearchRequestModel(
|
table_name=self.config.table_name,
|
||||||
self.options_headers, search_query
|
namespace=self.config.namespace,
|
||||||
|
vector=embedding,
|
||||||
|
include_vector=True,
|
||||||
|
output_fields=self.config.output_fields,
|
||||||
|
filter=generate_filter_query(),
|
||||||
|
top_k=k,
|
||||||
)
|
)
|
||||||
return json.loads(self.ha3EngineClient.search(search_request).body)
|
|
||||||
|
|
||||||
from alibabacloud_ha3engine import models
|
query_result = self.ha3_engine_client.query(request)
|
||||||
|
return json.loads(query_result.body)
|
||||||
|
|
||||||
|
from alibabacloud_ha3engine_vector.models import QueryRequest
|
||||||
|
|
||||||
try:
|
try:
|
||||||
query_str = generate_embedding_query()
|
json_response = search_data()
|
||||||
json_response = search_data(query_str)
|
if (
|
||||||
if len(json_response["errors"]) != 0:
|
"errorCode" in json_response
|
||||||
|
and "errorMsg" in json_response
|
||||||
|
and len(json_response["errorMsg"]) > 0
|
||||||
|
):
|
||||||
logger.error(
|
logger.error(
|
||||||
f"query {self.config.endpoint} {self.config.instance_id} "
|
f"query {self.config.endpoint} {self.config.instance_id} "
|
||||||
f"errors:{json_response['errors']} failed."
|
f"failed:{json_response['errorMsg']}."
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
return json_response
|
return json_response
|
||||||
@ -305,22 +348,51 @@ class AlibabaCloudOpenSearch(VectorStore):
|
|||||||
return {}
|
return {}
|
||||||
|
|
||||||
def create_results(self, json_result: Dict[str, Any]) -> List[Document]:
|
def create_results(self, json_result: Dict[str, Any]) -> List[Document]:
|
||||||
items = json_result["result"]["items"]
|
"""Assemble documents."""
|
||||||
|
items = json_result["result"]
|
||||||
query_result_list: List[Document] = []
|
query_result_list: List[Document] = []
|
||||||
for item in items:
|
for item in items:
|
||||||
fields = item["fields"]
|
if (
|
||||||
query_result_list.append(
|
"fields" not in item
|
||||||
Document(
|
or self.config.field_name_mapping["document"] not in item["fields"]
|
||||||
page_content=fields[self.config.field_name_mapping["document"]],
|
):
|
||||||
metadata=create_metadata(fields),
|
query_result_list.append(Document())
|
||||||
|
else:
|
||||||
|
fields = item["fields"]
|
||||||
|
query_result_list.append(
|
||||||
|
Document(
|
||||||
|
page_content=fields[self.config.field_name_mapping["document"]],
|
||||||
|
metadata=self.create_inverse_metadata(fields),
|
||||||
|
)
|
||||||
)
|
)
|
||||||
)
|
|
||||||
return query_result_list
|
return query_result_list
|
||||||
|
|
||||||
|
def create_inverse_metadata(self, fields: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
|
"""Create metadata from fields.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
fields: The fields of the document. The fields must be a dict.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
metadata: The metadata of the document. The metadata must be a dict.
|
||||||
|
"""
|
||||||
|
metadata: Dict[str, Any] = {}
|
||||||
|
for key, value in fields.items():
|
||||||
|
if key == "id" or key == "document" or key == "embedding":
|
||||||
|
continue
|
||||||
|
metadata[self.config.inverse_field_name_mapping[key]] = value
|
||||||
|
return metadata
|
||||||
|
|
||||||
def create_results_with_score(
|
def create_results_with_score(
|
||||||
self, json_result: Dict[str, Any]
|
self, json_result: Dict[str, Any]
|
||||||
) -> List[Tuple[Document, float]]:
|
) -> List[Tuple[Document, float]]:
|
||||||
items = json_result["result"]["items"]
|
"""Parsing the returned results with scores.
|
||||||
|
Args:
|
||||||
|
json_result: Results from OpenSearch query.
|
||||||
|
Returns:
|
||||||
|
query_result_list: Results with scores.
|
||||||
|
"""
|
||||||
|
items = json_result["result"]
|
||||||
query_result_list: List[Tuple[Document, float]] = []
|
query_result_list: List[Tuple[Document, float]] = []
|
||||||
for item in items:
|
for item in items:
|
||||||
fields = item["fields"]
|
fields = item["fields"]
|
||||||
@ -328,13 +400,65 @@ class AlibabaCloudOpenSearch(VectorStore):
|
|||||||
(
|
(
|
||||||
Document(
|
Document(
|
||||||
page_content=fields[self.config.field_name_mapping["document"]],
|
page_content=fields[self.config.field_name_mapping["document"]],
|
||||||
metadata=create_metadata(fields),
|
metadata=self.create_inverse_metadata(fields),
|
||||||
),
|
),
|
||||||
float(item["sortExprValues"][0]),
|
float(item["score"]),
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
return query_result_list
|
return query_result_list
|
||||||
|
|
||||||
|
def delete_documents_with_texts(self, texts: List[str]) -> bool:
|
||||||
|
"""Delete documents based on their page content.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
texts: List of document page content.
|
||||||
|
Returns:
|
||||||
|
Whether the deletion was successful or not.
|
||||||
|
"""
|
||||||
|
id_list = [sha1(t.encode("utf-8")).hexdigest() for t in texts]
|
||||||
|
return self.delete_documents_with_document_id(id_list)
|
||||||
|
|
||||||
|
def delete_documents_with_document_id(self, id_list: List[str]) -> bool:
|
||||||
|
"""Delete documents based on their IDs.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
id_list: List of document IDs.
|
||||||
|
Returns:
|
||||||
|
Whether the deletion was successful or not.
|
||||||
|
"""
|
||||||
|
if id_list is None or len(id_list) == 0:
|
||||||
|
return True
|
||||||
|
|
||||||
|
from alibabacloud_ha3engine_vector import models
|
||||||
|
|
||||||
|
delete_doc_list = []
|
||||||
|
for doc_id in id_list:
|
||||||
|
delete_doc_list.append(
|
||||||
|
{
|
||||||
|
"fields": {self.config.field_name_mapping["id"]: doc_id},
|
||||||
|
"cmd": "delete",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
delete_request = models.PushDocumentsRequest(
|
||||||
|
self.options_headers, delete_doc_list
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
delete_response = self.ha3_engine_client.push_documents(
|
||||||
|
self.config.opt_table_name,
|
||||||
|
self.config.field_name_mapping["id"],
|
||||||
|
delete_request,
|
||||||
|
)
|
||||||
|
json_response = json.loads(delete_response.body)
|
||||||
|
return json_response["status"] == "OK"
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(
|
||||||
|
f"delete doc from :{self.config.endpoint} "
|
||||||
|
f"instance_id:{self.config.instance_id} failed.",
|
||||||
|
e,
|
||||||
|
)
|
||||||
|
raise e
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_texts(
|
def from_texts(
|
||||||
cls,
|
cls,
|
||||||
@ -344,8 +468,25 @@ class AlibabaCloudOpenSearch(VectorStore):
|
|||||||
config: Optional[AlibabaCloudOpenSearchSettings] = None,
|
config: Optional[AlibabaCloudOpenSearchSettings] = None,
|
||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> "AlibabaCloudOpenSearch":
|
) -> "AlibabaCloudOpenSearch":
|
||||||
|
"""Create alibaba cloud opensearch vector store instance.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
texts: The text segments to be inserted into the vector storage,
|
||||||
|
should not be empty.
|
||||||
|
embedding: Embedding function, Embedding function.
|
||||||
|
config: Alibaba OpenSearch instance configuration.
|
||||||
|
metadatas: Metadata information.
|
||||||
|
Returns:
|
||||||
|
AlibabaCloudOpenSearch: Alibaba cloud opensearch vector store instance.
|
||||||
|
"""
|
||||||
|
if texts is None or len(texts) == 0:
|
||||||
|
raise Exception("the inserted text segments, should not be empty.")
|
||||||
|
|
||||||
|
if embedding is None:
|
||||||
|
raise Exception("the embeddings should not be empty.")
|
||||||
|
|
||||||
if config is None:
|
if config is None:
|
||||||
raise Exception("config can't be none")
|
raise Exception("config should not be none.")
|
||||||
|
|
||||||
ctx = cls(embedding, config, **kwargs)
|
ctx = cls(embedding, config, **kwargs)
|
||||||
ctx.add_texts(texts=texts, metadatas=metadatas)
|
ctx.add_texts(texts=texts, metadatas=metadatas)
|
||||||
@ -356,10 +497,27 @@ class AlibabaCloudOpenSearch(VectorStore):
|
|||||||
cls,
|
cls,
|
||||||
documents: List[Document],
|
documents: List[Document],
|
||||||
embedding: Embeddings,
|
embedding: Embeddings,
|
||||||
ids: Optional[List[str]] = None,
|
|
||||||
config: Optional[AlibabaCloudOpenSearchSettings] = None,
|
config: Optional[AlibabaCloudOpenSearchSettings] = None,
|
||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> "AlibabaCloudOpenSearch":
|
) -> "AlibabaCloudOpenSearch":
|
||||||
|
"""Create alibaba cloud opensearch vector store instance.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
documents: Documents to be inserted into the vector storage,
|
||||||
|
should not be empty.
|
||||||
|
embedding: Embedding function, Embedding function.
|
||||||
|
config: Alibaba OpenSearch instance configuration.
|
||||||
|
ids: Specify the ID for the inserted document. If left empty, the ID will be
|
||||||
|
automatically generated based on the text content.
|
||||||
|
Returns:
|
||||||
|
AlibabaCloudOpenSearch: Alibaba cloud opensearch vector store instance.
|
||||||
|
"""
|
||||||
|
if documents is None or len(documents) == 0:
|
||||||
|
raise Exception("the inserted documents, should not be empty.")
|
||||||
|
|
||||||
|
if embedding is None:
|
||||||
|
raise Exception("the embeddings should not be empty.")
|
||||||
|
|
||||||
if config is None:
|
if config is None:
|
||||||
raise Exception("config can't be none")
|
raise Exception("config can't be none")
|
||||||
|
|
||||||
|
@ -1,11 +1,15 @@
|
|||||||
|
import time
|
||||||
from typing import List
|
from typing import List
|
||||||
|
|
||||||
|
from libs.langchain.tests.integration_tests.vectorstores.fake_embeddings import (
|
||||||
|
FakeEmbeddings,
|
||||||
|
)
|
||||||
|
|
||||||
from langchain.schema import Document
|
from langchain.schema import Document
|
||||||
from langchain.vectorstores.alibabacloud_opensearch import (
|
from langchain.vectorstores.alibabacloud_opensearch import (
|
||||||
AlibabaCloudOpenSearch,
|
AlibabaCloudOpenSearch,
|
||||||
AlibabaCloudOpenSearchSettings,
|
AlibabaCloudOpenSearchSettings,
|
||||||
)
|
)
|
||||||
from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings
|
|
||||||
|
|
||||||
OS_TOKEN_COUNT = 1536
|
OS_TOKEN_COUNT = 1536
|
||||||
|
|
||||||
@ -27,16 +31,22 @@ class FakeEmbeddingsWithOsDimension(FakeEmbeddings):
|
|||||||
return [float(1.0)] * (OS_TOKEN_COUNT - 1) + [float(texts.index(text))]
|
return [float(1.0)] * (OS_TOKEN_COUNT - 1) + [float(texts.index(text))]
|
||||||
|
|
||||||
|
|
||||||
|
"""
|
||||||
settings = AlibabaCloudOpenSearchSettings(
|
settings = AlibabaCloudOpenSearchSettings(
|
||||||
endpoint="The endpoint of opensearch instance, "
|
endpoint="The endpoint of opensearch instance, If you want to access through
|
||||||
"You can find it from the console of Alibaba Cloud OpenSearch.",
|
the public network, you need to enable public network access in the network
|
||||||
instance_id="The identify of opensearch instance, "
|
information of the instance details. If you want to access within
|
||||||
"You can find it from the console of Alibaba Cloud OpenSearch.",
|
the Alibaba Cloud VPC, you can directly use the API domain name.",
|
||||||
datasource_name="The name of the data source specified when creating it.",
|
instance_id="The identify of opensearch instance",
|
||||||
|
protocol (str): "Communication Protocol between SDK and Server, default is http.",
|
||||||
username="The username specified when purchasing the instance.",
|
username="The username specified when purchasing the instance.",
|
||||||
password="The password specified when purchasing the instance.",
|
password="The password specified when purchasing the instance.",
|
||||||
embedding_index_name="The name of the vector attribute "
|
namespace (str) : "The instance data will be partitioned based on the
|
||||||
"specified when configuring the instance attributes.",
|
namespace field, If the namespace is enabled, you need to specify the
|
||||||
|
namespace field name during initialization. Otherwise, the queries cannot
|
||||||
|
be executed correctly, default is empty.",
|
||||||
|
table_name="The table name is specified when adding a table after completing
|
||||||
|
the instance configuration.",
|
||||||
field_name_mapping={
|
field_name_mapping={
|
||||||
# insert data into opensearch based on the mapping name of the field.
|
# insert data into opensearch based on the mapping name of the field.
|
||||||
"id": "The id field name map of index document.",
|
"id": "The id field name map of index document.",
|
||||||
@ -50,59 +60,169 @@ settings = AlibabaCloudOpenSearchSettings(
|
|||||||
"used when executing metadata filter query",
|
"used when executing metadata filter query",
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
"""
|
||||||
|
|
||||||
|
settings = AlibabaCloudOpenSearchSettings(
|
||||||
|
endpoint="ha-cn-5yd3fhdm102.public.ha.aliyuncs.com",
|
||||||
|
instance_id="ha-cn-5yd3fhdm102",
|
||||||
|
username="instance user name",
|
||||||
|
password="instance password",
|
||||||
|
table_name="instance table name",
|
||||||
|
field_name_mapping={
|
||||||
|
# insert data into opensearch based on the mapping name of the field.
|
||||||
|
"id": "id",
|
||||||
|
"document": "document",
|
||||||
|
"embedding": "embedding",
|
||||||
|
"string_field": "string_filed,=",
|
||||||
|
"int_field": "int_filed,=",
|
||||||
|
"float_field": "float_field,=",
|
||||||
|
"double_field": "double_field,=",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
embeddings = FakeEmbeddingsWithOsDimension()
|
embeddings = FakeEmbeddingsWithOsDimension()
|
||||||
|
|
||||||
|
|
||||||
def test_create_alibabacloud_opensearch() -> None:
|
def test_create_alibabacloud_opensearch() -> None:
|
||||||
opensearch = create_alibabacloud_opensearch()
|
opensearch = create_alibabacloud_opensearch()
|
||||||
|
time.sleep(1)
|
||||||
output = opensearch.similarity_search("foo", k=10)
|
output = opensearch.similarity_search("foo", k=10)
|
||||||
assert len(output) == 3
|
assert len(output) == 3
|
||||||
|
|
||||||
|
|
||||||
def test_alibabacloud_opensearch_with_text_query() -> None:
|
def test_alibabacloud_opensearch_with_text_query() -> None:
|
||||||
opensearch = create_alibabacloud_opensearch()
|
opensearch = create_alibabacloud_opensearch()
|
||||||
output = opensearch.similarity_search("foo", k=1)
|
output = opensearch.similarity_search(query="foo", k=1)
|
||||||
assert output == [Document(page_content="foo", metadata={"metadata": "0"})]
|
assert output == [
|
||||||
|
Document(
|
||||||
|
page_content="foo",
|
||||||
|
metadata={
|
||||||
|
"string_field": "value1",
|
||||||
|
"int_field": 1,
|
||||||
|
"float_field": 1.0,
|
||||||
|
"double_field": 2.0,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
output = opensearch.similarity_search("bar", k=1)
|
output = opensearch.similarity_search(query="bar", k=1)
|
||||||
assert output == [Document(page_content="bar", metadata={"metadata": "1"})]
|
assert output == [
|
||||||
|
Document(
|
||||||
|
page_content="bar",
|
||||||
|
metadata={
|
||||||
|
"string_field": "value2",
|
||||||
|
"int_field": 2,
|
||||||
|
"float_field": 3.0,
|
||||||
|
"double_field": 4.0,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
output = opensearch.similarity_search("baz", k=1)
|
output = opensearch.similarity_search(query="baz", k=1)
|
||||||
assert output == [Document(page_content="baz", metadata={"metadata": "2"})]
|
assert output == [
|
||||||
|
Document(
|
||||||
|
page_content="baz",
|
||||||
|
metadata={
|
||||||
|
"string_field": "value3",
|
||||||
|
"int_field": 3,
|
||||||
|
"float_field": 5.0,
|
||||||
|
"double_field": 6.0,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
def test_alibabacloud_opensearch_with_vector_query() -> None:
|
def test_alibabacloud_opensearch_with_vector_query() -> None:
|
||||||
opensearch = create_alibabacloud_opensearch()
|
opensearch = create_alibabacloud_opensearch()
|
||||||
output = opensearch.similarity_search_by_vector(embeddings.embed_query("foo"), k=1)
|
output = opensearch.similarity_search_by_vector(embeddings.embed_query("foo"), k=1)
|
||||||
assert output == [Document(page_content="foo", metadata={"metadata": "0"})]
|
assert output == [
|
||||||
|
Document(
|
||||||
|
page_content="foo",
|
||||||
|
metadata={
|
||||||
|
"string_field": "value1",
|
||||||
|
"int_field": 1,
|
||||||
|
"float_field": 1.0,
|
||||||
|
"double_field": 2.0,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
output = opensearch.similarity_search_by_vector(embeddings.embed_query("bar"), k=1)
|
output = opensearch.similarity_search_by_vector(embeddings.embed_query("bar"), k=1)
|
||||||
assert output == [Document(page_content="bar", metadata={"metadata": "1"})]
|
assert output == [
|
||||||
|
Document(
|
||||||
|
page_content="bar",
|
||||||
|
metadata={
|
||||||
|
"string_field": "value2",
|
||||||
|
"int_field": 2,
|
||||||
|
"float_field": 3.0,
|
||||||
|
"double_field": 4.0,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
output = opensearch.similarity_search_by_vector(embeddings.embed_query("baz"), k=1)
|
output = opensearch.similarity_search_by_vector(embeddings.embed_query("baz"), k=1)
|
||||||
assert output == [Document(page_content="baz", metadata={"metadata": "2"})]
|
assert output == [
|
||||||
|
Document(
|
||||||
|
page_content="baz",
|
||||||
|
metadata={
|
||||||
|
"string_field": "value3",
|
||||||
|
"int_field": 3,
|
||||||
|
"float_field": 5.0,
|
||||||
|
"double_field": 6.0,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
def test_alibabacloud_opensearch_with_text_and_meta_query() -> None:
|
def test_alibabacloud_opensearch_with_text_and_meta_query() -> None:
|
||||||
opensearch = create_alibabacloud_opensearch()
|
opensearch = create_alibabacloud_opensearch()
|
||||||
output = opensearch.similarity_search(
|
output = opensearch.similarity_search(
|
||||||
query="foo", search_filter={"metadata": "0"}, k=1
|
query="foo", search_filter={"string_field": "value1"}, k=1
|
||||||
)
|
)
|
||||||
assert output == [Document(page_content="foo", metadata={"metadata": "0"})]
|
assert output == [
|
||||||
|
Document(
|
||||||
|
page_content="foo",
|
||||||
|
metadata={
|
||||||
|
"string_field": "value1",
|
||||||
|
"int_field": 1,
|
||||||
|
"float_field": 1.0,
|
||||||
|
"double_field": 2.0,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
output = opensearch.similarity_search(
|
output = opensearch.similarity_search(
|
||||||
query="bar", search_filter={"metadata": "1"}, k=1
|
query="bar", search_filter={"int_field": 2}, k=1
|
||||||
)
|
)
|
||||||
assert output == [Document(page_content="bar", metadata={"metadata": "1"})]
|
assert output == [
|
||||||
|
Document(
|
||||||
|
page_content="bar",
|
||||||
|
metadata={
|
||||||
|
"string_field": "value2",
|
||||||
|
"int_field": 2,
|
||||||
|
"float_field": 3.0,
|
||||||
|
"double_field": 4.0,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
output = opensearch.similarity_search(
|
output = opensearch.similarity_search(
|
||||||
query="baz", search_filter={"metadata": "2"}, k=1
|
query="baz", search_filter={"float_field": 5.0}, k=1
|
||||||
)
|
)
|
||||||
assert output == [Document(page_content="baz", metadata={"metadata": "2"})]
|
assert output == [
|
||||||
|
Document(
|
||||||
|
page_content="baz",
|
||||||
|
metadata={
|
||||||
|
"string_field": "value3",
|
||||||
|
"int_field": 3,
|
||||||
|
"float_field": 5.0,
|
||||||
|
"double_field": 6.0,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
output = opensearch.similarity_search(
|
output = opensearch.similarity_search(
|
||||||
query="baz", search_filter={"metadata": "3"}, k=1
|
query="baz", search_filter={"float_field": 6.0}, k=1
|
||||||
)
|
)
|
||||||
assert len(output) == 0
|
assert len(output) == 0
|
||||||
|
|
||||||
@ -110,15 +230,63 @@ def test_alibabacloud_opensearch_with_text_and_meta_query() -> None:
|
|||||||
def test_alibabacloud_opensearch_with_text_and_meta_score_query() -> None:
|
def test_alibabacloud_opensearch_with_text_and_meta_score_query() -> None:
|
||||||
opensearch = create_alibabacloud_opensearch()
|
opensearch = create_alibabacloud_opensearch()
|
||||||
output = opensearch.similarity_search_with_relevance_scores(
|
output = opensearch.similarity_search_with_relevance_scores(
|
||||||
query="foo", search_filter={"metadata": "0"}, k=1
|
query="foo",
|
||||||
|
search_filter={
|
||||||
|
"string_field": "value1",
|
||||||
|
"int_field": 1,
|
||||||
|
"float_field": 1.0,
|
||||||
|
"double_field": 2.0,
|
||||||
|
},
|
||||||
|
k=1,
|
||||||
)
|
)
|
||||||
assert output == [
|
assert output == [
|
||||||
(Document(page_content="foo", metadata={"metadata": "0"}), 10000.0)
|
(
|
||||||
|
Document(
|
||||||
|
page_content="foo",
|
||||||
|
metadata={
|
||||||
|
"string_field": "value1",
|
||||||
|
"int_field": 1,
|
||||||
|
"float_field": 1.0,
|
||||||
|
"double_field": 2.0,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
0.0,
|
||||||
|
)
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def test_alibabacloud_opensearch_delete_doc() -> None:
|
||||||
|
opensearch = create_alibabacloud_opensearch()
|
||||||
|
delete_result = opensearch.delete_documents_with_texts(["bar"])
|
||||||
|
assert delete_result
|
||||||
|
time.sleep(1)
|
||||||
|
search_result = opensearch.similarity_search(
|
||||||
|
query="bar", search_filter={"int_field": 2}, k=1
|
||||||
|
)
|
||||||
|
assert len(search_result) == 0
|
||||||
|
|
||||||
|
|
||||||
def create_alibabacloud_opensearch() -> AlibabaCloudOpenSearch:
|
def create_alibabacloud_opensearch() -> AlibabaCloudOpenSearch:
|
||||||
metadatas = [{"metadata": str(i)} for i in range(len(texts))]
|
metadatas = [
|
||||||
|
{
|
||||||
|
"string_field": "value1",
|
||||||
|
"int_field": 1,
|
||||||
|
"float_field": 1.0,
|
||||||
|
"double_field": 2.0,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"string_field": "value2",
|
||||||
|
"int_field": 2,
|
||||||
|
"float_field": 3.0,
|
||||||
|
"double_field": 4.0,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"string_field": "value3",
|
||||||
|
"int_field": 3,
|
||||||
|
"float_field": 5.0,
|
||||||
|
"double_field": 6.0,
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
return AlibabaCloudOpenSearch.from_texts(
|
return AlibabaCloudOpenSearch.from_texts(
|
||||||
texts=texts,
|
texts=texts,
|
||||||
|
Loading…
Reference in New Issue
Block a user