You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
langchain/tests/integration_tests/vectorstores/test_opensearch.py

216 lines
7.9 KiB
Python

"""Test OpenSearch functionality."""
import pytest
from langchain.docstore.document import Document
from langchain.vectorstores.opensearch_vector_search import (
PAINLESS_SCRIPTING_SEARCH,
SCRIPT_SCORING_SEARCH,
OpenSearchVectorSearch,
)
from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings
DEFAULT_OPENSEARCH_URL = "http://localhost:9200"
texts = ["foo", "bar", "baz"]
def test_opensearch() -> None:
"""Test end to end indexing and search using Approximate Search."""
docsearch = OpenSearchVectorSearch.from_texts(
texts, FakeEmbeddings(), opensearch_url=DEFAULT_OPENSEARCH_URL
)
output = docsearch.similarity_search("foo", k=1)
assert output == [Document(page_content="foo")]
def test_similarity_search_with_score() -> None:
"""Test similarity search with score using Approximate Search."""
metadatas = [{"page": i} for i in range(len(texts))]
docsearch = OpenSearchVectorSearch.from_texts(
texts,
FakeEmbeddings(),
metadatas=metadatas,
opensearch_url=DEFAULT_OPENSEARCH_URL,
)
output = docsearch.similarity_search_with_score("foo", k=2)
assert output == [
(Document(page_content="foo", metadata={"page": 0}), 1.0),
(Document(page_content="bar", metadata={"page": 1}), 0.5),
]
def test_opensearch_with_custom_field_name() -> None:
"""Test indexing and search using custom vector field and text field name."""
docsearch = OpenSearchVectorSearch.from_texts(
texts,
FakeEmbeddings(),
opensearch_url=DEFAULT_OPENSEARCH_URL,
vector_field="my_vector",
text_field="custom_text",
)
output = docsearch.similarity_search(
"foo", k=1, vector_field="my_vector", text_field="custom_text"
)
assert output == [Document(page_content="foo")]
text_input = ["test", "add", "text", "method"]
OpenSearchVectorSearch.add_texts(
docsearch, text_input, vector_field="my_vector", text_field="custom_text"
)
output = docsearch.similarity_search(
"add", k=1, vector_field="my_vector", text_field="custom_text"
)
assert output == [Document(page_content="foo")]
def test_opensearch_with_metadatas() -> None:
"""Test end to end indexing and search with metadata."""
metadatas = [{"page": i} for i in range(len(texts))]
docsearch = OpenSearchVectorSearch.from_texts(
texts,
FakeEmbeddings(),
metadatas=metadatas,
opensearch_url=DEFAULT_OPENSEARCH_URL,
)
output = docsearch.similarity_search("foo", k=1)
assert output == [Document(page_content="foo", metadata={"page": 0})]
def test_add_text() -> None:
"""Test adding additional text elements to existing index."""
text_input = ["test", "add", "text", "method"]
metadatas = [{"page": i} for i in range(len(text_input))]
docsearch = OpenSearchVectorSearch.from_texts(
texts, FakeEmbeddings(), opensearch_url=DEFAULT_OPENSEARCH_URL
)
docids = OpenSearchVectorSearch.add_texts(docsearch, text_input, metadatas)
assert len(docids) == len(text_input)
def test_opensearch_script_scoring() -> None:
"""Test end to end indexing and search using Script Scoring Search."""
pre_filter_val = {"bool": {"filter": {"term": {"text": "bar"}}}}
docsearch = OpenSearchVectorSearch.from_texts(
texts,
FakeEmbeddings(),
opensearch_url=DEFAULT_OPENSEARCH_URL,
is_appx_search=False,
)
output = docsearch.similarity_search(
"foo", k=1, search_type=SCRIPT_SCORING_SEARCH, pre_filter=pre_filter_val
)
assert output == [Document(page_content="bar")]
def test_add_text_script_scoring() -> None:
"""Test adding additional text elements and validating using Script Scoring."""
text_input = ["test", "add", "text", "method"]
metadatas = [{"page": i} for i in range(len(text_input))]
docsearch = OpenSearchVectorSearch.from_texts(
text_input,
FakeEmbeddings(),
opensearch_url=DEFAULT_OPENSEARCH_URL,
is_appx_search=False,
)
OpenSearchVectorSearch.add_texts(docsearch, texts, metadatas)
output = docsearch.similarity_search(
"add", k=1, search_type=SCRIPT_SCORING_SEARCH, space_type="innerproduct"
)
assert output == [Document(page_content="test")]
def test_opensearch_painless_scripting() -> None:
"""Test end to end indexing and search using Painless Scripting Search."""
pre_filter_val = {"bool": {"filter": {"term": {"text": "baz"}}}}
docsearch = OpenSearchVectorSearch.from_texts(
texts,
FakeEmbeddings(),
opensearch_url=DEFAULT_OPENSEARCH_URL,
is_appx_search=False,
)
output = docsearch.similarity_search(
"foo", k=1, search_type=PAINLESS_SCRIPTING_SEARCH, pre_filter=pre_filter_val
)
assert output == [Document(page_content="baz")]
def test_add_text_painless_scripting() -> None:
"""Test adding additional text elements and validating using Painless Scripting."""
text_input = ["test", "add", "text", "method"]
metadatas = [{"page": i} for i in range(len(text_input))]
docsearch = OpenSearchVectorSearch.from_texts(
text_input,
FakeEmbeddings(),
opensearch_url=DEFAULT_OPENSEARCH_URL,
is_appx_search=False,
)
OpenSearchVectorSearch.add_texts(docsearch, texts, metadatas)
output = docsearch.similarity_search(
"add", k=1, search_type=PAINLESS_SCRIPTING_SEARCH, space_type="cosineSimilarity"
)
assert output == [Document(page_content="test")]
def test_opensearch_invalid_search_type() -> None:
"""Test to validate similarity_search by providing invalid search_type."""
docsearch = OpenSearchVectorSearch.from_texts(
texts, FakeEmbeddings(), opensearch_url=DEFAULT_OPENSEARCH_URL
)
with pytest.raises(ValueError):
docsearch.similarity_search("foo", k=1, search_type="invalid_search_type")
def test_opensearch_embedding_size_zero() -> None:
"""Test to validate indexing when embedding size is zero."""
with pytest.raises(RuntimeError):
OpenSearchVectorSearch.from_texts(
[], FakeEmbeddings(), opensearch_url=DEFAULT_OPENSEARCH_URL
)
def test_appx_search_with_boolean_filter() -> None:
"""Test Approximate Search with Boolean Filter."""
boolean_filter_val = {"bool": {"must": [{"term": {"text": "bar"}}]}}
docsearch = OpenSearchVectorSearch.from_texts(
texts,
FakeEmbeddings(),
opensearch_url=DEFAULT_OPENSEARCH_URL,
)
output = docsearch.similarity_search(
"foo", k=3, boolean_filter=boolean_filter_val, subquery_clause="should"
)
assert output == [Document(page_content="bar")]
def test_appx_search_with_lucene_filter() -> None:
"""Test Approximate Search with Lucene Filter."""
lucene_filter_val = {"bool": {"must": [{"term": {"text": "bar"}}]}}
docsearch = OpenSearchVectorSearch.from_texts(
texts, FakeEmbeddings(), opensearch_url=DEFAULT_OPENSEARCH_URL, engine="lucene"
)
output = docsearch.similarity_search("foo", k=3, lucene_filter=lucene_filter_val)
assert output == [Document(page_content="bar")]
def test_opensearch_with_custom_field_name_appx_true() -> None:
"""Test Approximate Search with custom field name appx true."""
text_input = ["add", "test", "text", "method"]
docsearch = OpenSearchVectorSearch.from_texts(
text_input,
FakeEmbeddings(),
opensearch_url=DEFAULT_OPENSEARCH_URL,
is_appx_search=True,
)
output = docsearch.similarity_search("add", k=1)
assert output == [Document(page_content="add")]
def test_opensearch_with_custom_field_name_appx_false() -> None:
"""Test Approximate Search with custom field name appx true."""
text_input = ["add", "test", "text", "method"]
docsearch = OpenSearchVectorSearch.from_texts(
text_input, FakeEmbeddings(), opensearch_url=DEFAULT_OPENSEARCH_URL
)
output = docsearch.similarity_search("add", k=1)
assert output == [Document(page_content="add")]