OpenSearch: Add Support for Boolean Filter with ANN search (#3038)

### Description
Add Support for Boolean Filter with ANN search
Documentation -
https://opensearch.org/docs/latest/search-plugins/knn/filter-search-knn/#boolean-filter-with-ann-search

### Issues Resolved
https://github.com/hwchase17/langchain/issues/2924

Signed-off-by: Naveen Tatikonda <navtat@amazon.com>
This commit is contained in:
Naveen Tatikonda 2023-04-17 22:26:26 -05:00 committed by GitHub
parent 5420a0e404
commit 3453b7457c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 52 additions and 3 deletions

View File

@ -146,6 +146,28 @@ def _default_approximate_search_query(
}
def _approximate_search_query_with_boolean_filter(
query_vector: List[float],
boolean_filter: Dict,
size: int = 4,
k: int = 4,
vector_field: str = "vector_field",
subquery_clause: str = "must",
) -> Dict:
"""For Approximate k-NN Search, with Boolean Filter."""
return {
"size": size,
"query": {
"bool": {
"filter": boolean_filter,
subquery_clause: [
{"knn": {vector_field: {"vector": query_vector, "k": k}}}
],
}
},
}
def _default_script_query(
query_vector: List[float],
space_type: str = "l2",
@ -317,6 +339,11 @@ class OpenSearchVectorSearch(VectorStore):
size: number of results the query actually returns; default: 4
boolean_filter: A Boolean filter consists of a Boolean query that
contains a k-NN query and a filter
subquery_clause: Query clause on the knn vector field; default: "must"
Optional Args for Script Scoring Search:
search_type: "script_scoring"; default: "approximate_search"
@ -339,8 +366,16 @@ class OpenSearchVectorSearch(VectorStore):
text_field = _get_kwargs_value(kwargs, "text_field", "text")
metadata_field = _get_kwargs_value(kwargs, "metadata_field", "metadata")
vector_field = _get_kwargs_value(kwargs, "vector_field", "vector_field")
if search_type == "approximate_search":
size = _get_kwargs_value(kwargs, "size", 4)
boolean_filter = _get_kwargs_value(kwargs, "boolean_filter", {})
subquery_clause = _get_kwargs_value(kwargs, "subquery_clause", "must")
if boolean_filter != {}:
search_query = _approximate_search_query_with_boolean_filter(
embedding, boolean_filter, size, k, vector_field, subquery_clause
)
else:
search_query = _default_approximate_search_query(
embedding, size, k, vector_field
)

View File

@ -150,3 +150,17 @@ def test_opensearch_embedding_size_zero() -> None:
OpenSearchVectorSearch.from_texts(
[], FakeEmbeddings(), opensearch_url=DEFAULT_OPENSEARCH_URL
)
def test_appx_search_with_boolean_filter() -> None:
"""Test Approximate Search with Boolean Filter."""
boolean_filter_val = {"bool": {"must": [{"term": {"text": "bar"}}]}}
docsearch = OpenSearchVectorSearch.from_texts(
texts,
FakeEmbeddings(),
opensearch_url=DEFAULT_OPENSEARCH_URL,
)
output = docsearch.similarity_search(
"foo", k=3, boolean_filter=boolean_filter_val, subquery_clause="should"
)
assert output == [Document(page_content="bar")]