forked from Archives/langchain
OpenSearch: Add Support for Boolean Filter with ANN search (#3038)
### Description Add Support for Boolean Filter with ANN search Documentation - https://opensearch.org/docs/latest/search-plugins/knn/filter-search-knn/#boolean-filter-with-ann-search ### Issues Resolved https://github.com/hwchase17/langchain/issues/2924 Signed-off-by: Naveen Tatikonda <navtat@amazon.com>
This commit is contained in:
parent
5420a0e404
commit
3453b7457c
@ -146,6 +146,28 @@ def _default_approximate_search_query(
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _approximate_search_query_with_boolean_filter(
|
||||||
|
query_vector: List[float],
|
||||||
|
boolean_filter: Dict,
|
||||||
|
size: int = 4,
|
||||||
|
k: int = 4,
|
||||||
|
vector_field: str = "vector_field",
|
||||||
|
subquery_clause: str = "must",
|
||||||
|
) -> Dict:
|
||||||
|
"""For Approximate k-NN Search, with Boolean Filter."""
|
||||||
|
return {
|
||||||
|
"size": size,
|
||||||
|
"query": {
|
||||||
|
"bool": {
|
||||||
|
"filter": boolean_filter,
|
||||||
|
subquery_clause: [
|
||||||
|
{"knn": {vector_field: {"vector": query_vector, "k": k}}}
|
||||||
|
],
|
||||||
|
}
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def _default_script_query(
|
def _default_script_query(
|
||||||
query_vector: List[float],
|
query_vector: List[float],
|
||||||
space_type: str = "l2",
|
space_type: str = "l2",
|
||||||
@ -317,6 +339,11 @@ class OpenSearchVectorSearch(VectorStore):
|
|||||||
|
|
||||||
size: number of results the query actually returns; default: 4
|
size: number of results the query actually returns; default: 4
|
||||||
|
|
||||||
|
boolean_filter: A Boolean filter consists of a Boolean query that
|
||||||
|
contains a k-NN query and a filter
|
||||||
|
|
||||||
|
subquery_clause: Query clause on the knn vector field; default: "must"
|
||||||
|
|
||||||
Optional Args for Script Scoring Search:
|
Optional Args for Script Scoring Search:
|
||||||
search_type: "script_scoring"; default: "approximate_search"
|
search_type: "script_scoring"; default: "approximate_search"
|
||||||
|
|
||||||
@ -339,11 +366,19 @@ class OpenSearchVectorSearch(VectorStore):
|
|||||||
text_field = _get_kwargs_value(kwargs, "text_field", "text")
|
text_field = _get_kwargs_value(kwargs, "text_field", "text")
|
||||||
metadata_field = _get_kwargs_value(kwargs, "metadata_field", "metadata")
|
metadata_field = _get_kwargs_value(kwargs, "metadata_field", "metadata")
|
||||||
vector_field = _get_kwargs_value(kwargs, "vector_field", "vector_field")
|
vector_field = _get_kwargs_value(kwargs, "vector_field", "vector_field")
|
||||||
|
|
||||||
if search_type == "approximate_search":
|
if search_type == "approximate_search":
|
||||||
size = _get_kwargs_value(kwargs, "size", 4)
|
size = _get_kwargs_value(kwargs, "size", 4)
|
||||||
search_query = _default_approximate_search_query(
|
boolean_filter = _get_kwargs_value(kwargs, "boolean_filter", {})
|
||||||
embedding, size, k, vector_field
|
subquery_clause = _get_kwargs_value(kwargs, "subquery_clause", "must")
|
||||||
)
|
if boolean_filter != {}:
|
||||||
|
search_query = _approximate_search_query_with_boolean_filter(
|
||||||
|
embedding, boolean_filter, size, k, vector_field, subquery_clause
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
search_query = _default_approximate_search_query(
|
||||||
|
embedding, size, k, vector_field
|
||||||
|
)
|
||||||
elif search_type == SCRIPT_SCORING_SEARCH:
|
elif search_type == SCRIPT_SCORING_SEARCH:
|
||||||
space_type = _get_kwargs_value(kwargs, "space_type", "l2")
|
space_type = _get_kwargs_value(kwargs, "space_type", "l2")
|
||||||
pre_filter = _get_kwargs_value(kwargs, "pre_filter", MATCH_ALL_QUERY)
|
pre_filter = _get_kwargs_value(kwargs, "pre_filter", MATCH_ALL_QUERY)
|
||||||
|
@ -150,3 +150,17 @@ def test_opensearch_embedding_size_zero() -> None:
|
|||||||
OpenSearchVectorSearch.from_texts(
|
OpenSearchVectorSearch.from_texts(
|
||||||
[], FakeEmbeddings(), opensearch_url=DEFAULT_OPENSEARCH_URL
|
[], FakeEmbeddings(), opensearch_url=DEFAULT_OPENSEARCH_URL
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_appx_search_with_boolean_filter() -> None:
|
||||||
|
"""Test Approximate Search with Boolean Filter."""
|
||||||
|
boolean_filter_val = {"bool": {"must": [{"term": {"text": "bar"}}]}}
|
||||||
|
docsearch = OpenSearchVectorSearch.from_texts(
|
||||||
|
texts,
|
||||||
|
FakeEmbeddings(),
|
||||||
|
opensearch_url=DEFAULT_OPENSEARCH_URL,
|
||||||
|
)
|
||||||
|
output = docsearch.similarity_search(
|
||||||
|
"foo", k=3, boolean_filter=boolean_filter_val, subquery_clause="should"
|
||||||
|
)
|
||||||
|
assert output == [Document(page_content="bar")]
|
||||||
|
Loading…
Reference in New Issue
Block a user