mirror of
https://github.com/hwchase17/langchain
synced 2024-11-13 19:10:52 +00:00
community: added Document.id support to opensearch vectorstore (#27945)
Description: * Added support of Document.id on OpenSearch vector store * Added tests cases to match
This commit is contained in:
parent
75aa82fedc
commit
ea0ad917b0
@ -16,7 +16,7 @@ IMPORT_OPENSEARCH_PY_ERROR = (
|
||||
"Could not import OpenSearch. Please install it with `pip install opensearch-py`."
|
||||
)
|
||||
IMPORT_ASYNC_OPENSEARCH_PY_ERROR = """
|
||||
Could not import AsyncOpenSearch.
|
||||
Could not import AsyncOpenSearch.
|
||||
Please install it with `pip install opensearch-py`."""
|
||||
|
||||
SCRIPT_SCORING_SEARCH = "script_scoring"
|
||||
@ -902,6 +902,7 @@ class OpenSearchVectorSearch(VectorStore):
|
||||
if metadata_field == "*" or metadata_field not in hit["_source"]
|
||||
else hit["_source"][metadata_field]
|
||||
),
|
||||
id=hit["_id"],
|
||||
),
|
||||
hit["_score"],
|
||||
)
|
||||
@ -1099,6 +1100,7 @@ class OpenSearchVectorSearch(VectorStore):
|
||||
Document(
|
||||
page_content=results[i]["_source"][text_field],
|
||||
metadata=results[i]["_source"][metadata_field],
|
||||
id=results[i]["_id"],
|
||||
)
|
||||
for i in mmr_selected
|
||||
]
|
||||
|
@ -15,15 +15,19 @@ from tests.integration_tests.vectorstores.fake_embeddings import (
|
||||
|
||||
DEFAULT_OPENSEARCH_URL = "http://localhost:9200"
|
||||
texts = ["foo", "bar", "baz"]
|
||||
ids = ["id_foo", "id_bar", "id_baz"]
|
||||
|
||||
|
||||
def test_opensearch() -> None:
|
||||
"""Test end to end indexing and search using Approximate Search."""
|
||||
docsearch = OpenSearchVectorSearch.from_texts(
|
||||
texts, FakeEmbeddings(), opensearch_url=DEFAULT_OPENSEARCH_URL
|
||||
texts,
|
||||
FakeEmbeddings(),
|
||||
opensearch_url=DEFAULT_OPENSEARCH_URL,
|
||||
ids=ids,
|
||||
)
|
||||
output = docsearch.similarity_search("foo", k=1)
|
||||
assert output == [Document(page_content="foo")]
|
||||
assert output == [Document(page_content="foo", id="id_foo")]
|
||||
|
||||
|
||||
def test_similarity_search_with_score() -> None:
|
||||
@ -34,11 +38,12 @@ def test_similarity_search_with_score() -> None:
|
||||
FakeEmbeddings(),
|
||||
metadatas=metadatas,
|
||||
opensearch_url=DEFAULT_OPENSEARCH_URL,
|
||||
ids=ids,
|
||||
)
|
||||
output = docsearch.similarity_search_with_score("foo", k=2)
|
||||
assert output == [
|
||||
(Document(page_content="foo", metadata={"page": 0}), 1.0),
|
||||
(Document(page_content="bar", metadata={"page": 1}), 0.5),
|
||||
(Document(page_content="foo", metadata={"page": 0}, id="id_foo"), 1.0),
|
||||
(Document(page_content="bar", metadata={"page": 1}, id="id_bar"), 0.5),
|
||||
]
|
||||
|
||||
|
||||
@ -50,20 +55,24 @@ def test_opensearch_with_custom_field_name() -> None:
|
||||
opensearch_url=DEFAULT_OPENSEARCH_URL,
|
||||
vector_field="my_vector",
|
||||
text_field="custom_text",
|
||||
ids=ids,
|
||||
)
|
||||
output = docsearch.similarity_search(
|
||||
"foo", k=1, vector_field="my_vector", text_field="custom_text"
|
||||
)
|
||||
assert output == [Document(page_content="foo")]
|
||||
assert output == [Document(page_content="foo", id="id_foo")]
|
||||
|
||||
text_input = ["test", "add", "text", "method"]
|
||||
OpenSearchVectorSearch.add_texts(
|
||||
docsearch, text_input, vector_field="my_vector", text_field="custom_text"
|
||||
docsearch,
|
||||
text_input,
|
||||
vector_field="my_vector",
|
||||
text_field="custom_text",
|
||||
)
|
||||
output = docsearch.similarity_search(
|
||||
"add", k=1, vector_field="my_vector", text_field="custom_text"
|
||||
)
|
||||
assert output == [Document(page_content="foo")]
|
||||
assert output == [Document(page_content="foo", id="id_foo")]
|
||||
|
||||
|
||||
def test_opensearch_with_metadatas() -> None:
|
||||
@ -74,9 +83,22 @@ def test_opensearch_with_metadatas() -> None:
|
||||
FakeEmbeddings(),
|
||||
metadatas=metadatas,
|
||||
opensearch_url=DEFAULT_OPENSEARCH_URL,
|
||||
ids=ids,
|
||||
)
|
||||
output = docsearch.similarity_search("foo", k=1)
|
||||
assert output == [Document(page_content="foo", metadata={"page": 0})]
|
||||
assert output == [Document(page_content="foo", metadata={"page": 0}, id="id_foo")]
|
||||
|
||||
|
||||
def test_max_marginal_relevance_search() -> None:
|
||||
"""Test end to end indexing and mmr search."""
|
||||
docsearch = OpenSearchVectorSearch.from_texts(
|
||||
texts,
|
||||
FakeEmbeddings(),
|
||||
opensearch_url=DEFAULT_OPENSEARCH_URL,
|
||||
ids=ids,
|
||||
)
|
||||
output = docsearch.max_marginal_relevance_search("foo", k=1)
|
||||
assert output == [Document(page_content="foo", id="id_foo")]
|
||||
|
||||
|
||||
def test_add_text() -> None:
|
||||
@ -86,8 +108,8 @@ def test_add_text() -> None:
|
||||
docsearch = OpenSearchVectorSearch.from_texts(
|
||||
texts, FakeEmbeddings(), opensearch_url=DEFAULT_OPENSEARCH_URL
|
||||
)
|
||||
docids = OpenSearchVectorSearch.add_texts(docsearch, text_input, metadatas)
|
||||
assert len(docids) == len(text_input)
|
||||
doc_ids = OpenSearchVectorSearch.add_texts(docsearch, text_input, metadatas)
|
||||
assert len(doc_ids) == len(text_input)
|
||||
|
||||
|
||||
def test_add_embeddings() -> None:
|
||||
@ -112,7 +134,8 @@ def test_add_embeddings() -> None:
|
||||
)
|
||||
docsearch.add_embeddings(list(zip(text_input, embedding_vectors)), metadatas)
|
||||
output = docsearch.similarity_search("foo1", k=1)
|
||||
assert output == [Document(page_content="foo3", metadata={"page": 2})]
|
||||
assert output[0].page_content == "foo3"
|
||||
assert output[0].metadata == {"page": 2}
|
||||
|
||||
|
||||
def test_opensearch_script_scoring() -> None:
|
||||
@ -127,7 +150,8 @@ def test_opensearch_script_scoring() -> None:
|
||||
output = docsearch.similarity_search(
|
||||
"foo", k=1, search_type=SCRIPT_SCORING_SEARCH, pre_filter=pre_filter_val
|
||||
)
|
||||
assert output == [Document(page_content="bar")]
|
||||
assert output[0].page_content == "bar"
|
||||
assert output[0].id is not None
|
||||
|
||||
|
||||
def test_add_text_script_scoring() -> None:
|
||||
@ -144,7 +168,8 @@ def test_add_text_script_scoring() -> None:
|
||||
output = docsearch.similarity_search(
|
||||
"add", k=1, search_type=SCRIPT_SCORING_SEARCH, space_type="innerproduct"
|
||||
)
|
||||
assert output == [Document(page_content="test")]
|
||||
assert output[0].page_content == "test"
|
||||
assert output[0].id is not None
|
||||
|
||||
|
||||
def test_opensearch_painless_scripting() -> None:
|
||||
@ -159,7 +184,8 @@ def test_opensearch_painless_scripting() -> None:
|
||||
output = docsearch.similarity_search(
|
||||
"foo", k=1, search_type=PAINLESS_SCRIPTING_SEARCH, pre_filter=pre_filter_val
|
||||
)
|
||||
assert output == [Document(page_content="baz")]
|
||||
assert output[0].page_content == "baz"
|
||||
assert output[0].id is not None
|
||||
|
||||
|
||||
def test_add_text_painless_scripting() -> None:
|
||||
@ -176,7 +202,8 @@ def test_add_text_painless_scripting() -> None:
|
||||
output = docsearch.similarity_search(
|
||||
"add", k=1, search_type=PAINLESS_SCRIPTING_SEARCH, space_type="cosineSimilarity"
|
||||
)
|
||||
assert output == [Document(page_content="test")]
|
||||
assert output[0].page_content == "test"
|
||||
assert output[0].id is not None
|
||||
|
||||
|
||||
def test_opensearch_invalid_search_type() -> None:
|
||||
@ -207,7 +234,8 @@ def test_appx_search_with_boolean_filter() -> None:
|
||||
output = docsearch.similarity_search(
|
||||
"foo", k=3, boolean_filter=boolean_filter_val, subquery_clause="should"
|
||||
)
|
||||
assert output == [Document(page_content="bar")]
|
||||
assert output[0].page_content == "bar"
|
||||
assert output[0].id is not None
|
||||
|
||||
|
||||
def test_appx_search_with_lucene_filter() -> None:
|
||||
@ -217,7 +245,8 @@ def test_appx_search_with_lucene_filter() -> None:
|
||||
texts, FakeEmbeddings(), opensearch_url=DEFAULT_OPENSEARCH_URL, engine="lucene"
|
||||
)
|
||||
output = docsearch.similarity_search("foo", k=3, lucene_filter=lucene_filter_val)
|
||||
assert output == [Document(page_content="bar")]
|
||||
assert output[0].page_content == "bar"
|
||||
assert output[0].id is not None
|
||||
|
||||
|
||||
def test_opensearch_with_custom_field_name_appx_true() -> None:
|
||||
@ -230,7 +259,8 @@ def test_opensearch_with_custom_field_name_appx_true() -> None:
|
||||
is_appx_search=True,
|
||||
)
|
||||
output = docsearch.similarity_search("add", k=1)
|
||||
assert output == [Document(page_content="add")]
|
||||
assert output[0].page_content == "add"
|
||||
assert output[0].id is not None
|
||||
|
||||
|
||||
def test_opensearch_with_custom_field_name_appx_false() -> None:
|
||||
@ -240,7 +270,8 @@ def test_opensearch_with_custom_field_name_appx_false() -> None:
|
||||
text_input, FakeEmbeddings(), opensearch_url=DEFAULT_OPENSEARCH_URL
|
||||
)
|
||||
output = docsearch.similarity_search("add", k=1)
|
||||
assert output == [Document(page_content="add")]
|
||||
assert output[0].page_content == "add"
|
||||
assert output[0].id is not None
|
||||
|
||||
|
||||
def test_opensearch_serverless_with_scripting_search_indexing_throws_error() -> None:
|
||||
@ -338,4 +369,5 @@ def test_appx_search_with_faiss_efficient_filter() -> None:
|
||||
output = docsearch.similarity_search(
|
||||
"foo", k=3, efficient_filter=efficient_filter_val
|
||||
)
|
||||
assert output == [Document(page_content="bar")]
|
||||
assert output[0].page_content == "bar"
|
||||
assert output[0].id is not None
|
||||
|
Loading…
Reference in New Issue
Block a user