2022-11-20 04:32:45 +00:00
|
|
|
"""Test ElasticSearch functionality."""
|
2023-04-05 13:51:32 +00:00
|
|
|
import logging
|
|
|
|
import os
|
2023-04-07 14:28:57 +00:00
|
|
|
import uuid
|
2023-04-05 13:51:32 +00:00
|
|
|
from typing import Generator, List, Union
|
|
|
|
|
|
|
|
import pytest
|
|
|
|
from elasticsearch import Elasticsearch
|
2022-11-20 04:32:45 +00:00
|
|
|
|
|
|
|
from langchain.docstore.document import Document
|
2023-04-05 13:51:32 +00:00
|
|
|
from langchain.embeddings import OpenAIEmbeddings
|
2022-11-20 04:32:45 +00:00
|
|
|
from langchain.vectorstores.elastic_vector_search import ElasticVectorSearch
|
2023-02-03 06:05:47 +00:00
|
|
|
from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings
|
2022-11-20 04:32:45 +00:00
|
|
|
|
2023-04-05 13:51:32 +00:00
|
|
|
logging.basicConfig(level=logging.DEBUG)
|
|
|
|
|
|
|
|
"""
|
|
|
|
cd tests/integration_tests/vectorstores/docker-compose
|
|
|
|
docker-compose -f elasticsearch.yml up
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
class TestElasticsearch:
|
2023-04-14 04:49:31 +00:00
|
|
|
@classmethod
|
|
|
|
def setup_class(cls) -> None:
|
|
|
|
if not os.getenv("OPENAI_API_KEY"):
|
|
|
|
raise ValueError("OPENAI_API_KEY environment variable is not set")
|
|
|
|
|
2023-04-05 13:51:32 +00:00
|
|
|
@pytest.fixture(scope="class", autouse=True)
|
|
|
|
def elasticsearch_url(self) -> Union[str, Generator[str, None, None]]:
|
|
|
|
"""Return the elasticsearch url."""
|
|
|
|
url = "http://localhost:9200"
|
|
|
|
yield url
|
|
|
|
es = Elasticsearch(hosts=url)
|
|
|
|
|
|
|
|
# Clear all indexes
|
|
|
|
index_names = es.indices.get(index="_all").keys()
|
|
|
|
for index_name in index_names:
|
|
|
|
# print(index_name)
|
|
|
|
es.indices.delete(index=index_name)
|
|
|
|
|
|
|
|
def test_similarity_search_without_metadata(self, elasticsearch_url: str) -> None:
|
|
|
|
"""Test end to end construction and search without metadata."""
|
|
|
|
texts = ["foo", "bar", "baz"]
|
|
|
|
docsearch = ElasticVectorSearch.from_texts(
|
|
|
|
texts, FakeEmbeddings(), elasticsearch_url=elasticsearch_url
|
|
|
|
)
|
|
|
|
output = docsearch.similarity_search("foo", k=1)
|
|
|
|
assert output == [Document(page_content="foo")]
|
|
|
|
|
2023-05-22 18:51:32 +00:00
|
|
|
def test_similarity_search_with_ssl_verify(self, elasticsearch_url: str) -> None:
|
|
|
|
"""Test end to end construction and search with ssl verify."""
|
|
|
|
ssl_verify = {
|
|
|
|
"verify_certs": True,
|
|
|
|
"basic_auth": ("ES_USER", "ES_PASSWORD"),
|
|
|
|
"ca_certs": "ES_CA_CERTS_PATH",
|
|
|
|
}
|
|
|
|
texts = ["foo", "bar", "baz"]
|
|
|
|
docsearch = ElasticVectorSearch.from_texts(
|
|
|
|
texts,
|
|
|
|
FakeEmbeddings(),
|
|
|
|
elasticsearch_url=elasticsearch_url,
|
|
|
|
ssl_verify=ssl_verify,
|
|
|
|
)
|
|
|
|
output = docsearch.similarity_search("foo", k=1)
|
|
|
|
assert output == [Document(page_content="foo")]
|
|
|
|
|
2023-04-05 13:51:32 +00:00
|
|
|
def test_similarity_search_with_metadata(self, elasticsearch_url: str) -> None:
|
|
|
|
"""Test end to end construction and search with metadata."""
|
|
|
|
texts = ["foo", "bar", "baz"]
|
|
|
|
metadatas = [{"page": i} for i in range(len(texts))]
|
|
|
|
docsearch = ElasticVectorSearch.from_texts(
|
|
|
|
texts,
|
|
|
|
FakeEmbeddings(),
|
|
|
|
metadatas=metadatas,
|
|
|
|
elasticsearch_url=elasticsearch_url,
|
|
|
|
)
|
|
|
|
output = docsearch.similarity_search("foo", k=1)
|
|
|
|
assert output == [Document(page_content="foo", metadata={"page": 0})]
|
|
|
|
|
2023-04-07 14:28:57 +00:00
|
|
|
@pytest.mark.vcr(ignore_localhost=True)
|
2023-04-05 13:51:32 +00:00
|
|
|
def test_default_index_from_documents(
|
2023-04-14 04:49:31 +00:00
|
|
|
self,
|
|
|
|
documents: List[Document],
|
|
|
|
embedding_openai: OpenAIEmbeddings,
|
|
|
|
elasticsearch_url: str,
|
2023-04-05 13:51:32 +00:00
|
|
|
) -> None:
|
|
|
|
"""This test checks the construction of a default
|
|
|
|
ElasticSearch index using the 'from_documents'."""
|
|
|
|
|
|
|
|
elastic_vector_search = ElasticVectorSearch.from_documents(
|
|
|
|
documents=documents,
|
2023-04-14 04:49:31 +00:00
|
|
|
embedding=embedding_openai,
|
2023-04-05 13:51:32 +00:00
|
|
|
elasticsearch_url=elasticsearch_url,
|
|
|
|
)
|
|
|
|
|
|
|
|
search_result = elastic_vector_search.similarity_search("sharks")
|
|
|
|
|
|
|
|
print(search_result)
|
|
|
|
assert len(search_result) != 0
|
|
|
|
|
2023-04-07 14:28:57 +00:00
|
|
|
@pytest.mark.vcr(ignore_localhost=True)
|
2023-04-05 13:51:32 +00:00
|
|
|
def test_custom_index_from_documents(
|
2023-04-14 04:49:31 +00:00
|
|
|
self,
|
|
|
|
documents: List[Document],
|
|
|
|
embedding_openai: OpenAIEmbeddings,
|
|
|
|
elasticsearch_url: str,
|
2023-04-05 13:51:32 +00:00
|
|
|
) -> None:
|
|
|
|
"""This test checks the construction of a custom
|
|
|
|
ElasticSearch index using the 'from_documents'."""
|
2023-04-07 14:28:57 +00:00
|
|
|
|
|
|
|
index_name = f"custom_index_{uuid.uuid4().hex}"
|
2023-04-05 13:51:32 +00:00
|
|
|
elastic_vector_search = ElasticVectorSearch.from_documents(
|
|
|
|
documents=documents,
|
2023-04-14 04:49:31 +00:00
|
|
|
embedding=embedding_openai,
|
2023-04-05 13:51:32 +00:00
|
|
|
elasticsearch_url=elasticsearch_url,
|
2023-04-07 14:28:57 +00:00
|
|
|
index_name=index_name,
|
2023-04-05 13:51:32 +00:00
|
|
|
)
|
|
|
|
es = Elasticsearch(hosts=elasticsearch_url)
|
|
|
|
index_names = es.indices.get(index="_all").keys()
|
2023-04-07 14:28:57 +00:00
|
|
|
assert index_name in index_names
|
2023-04-05 13:51:32 +00:00
|
|
|
|
|
|
|
search_result = elastic_vector_search.similarity_search("sharks")
|
|
|
|
print(search_result)
|
|
|
|
|
|
|
|
assert len(search_result) != 0
|
|
|
|
|
2023-04-07 14:28:57 +00:00
|
|
|
@pytest.mark.vcr(ignore_localhost=True)
|
2023-04-05 13:51:32 +00:00
|
|
|
def test_custom_index_add_documents(
|
2023-04-14 04:49:31 +00:00
|
|
|
self,
|
|
|
|
documents: List[Document],
|
|
|
|
embedding_openai: OpenAIEmbeddings,
|
|
|
|
elasticsearch_url: str,
|
2023-04-05 13:51:32 +00:00
|
|
|
) -> None:
|
|
|
|
"""This test checks the construction of a custom
|
|
|
|
ElasticSearch index using the 'add_documents'."""
|
2023-04-07 14:28:57 +00:00
|
|
|
|
|
|
|
index_name = f"custom_index_{uuid.uuid4().hex}"
|
2023-04-05 13:51:32 +00:00
|
|
|
elastic_vector_search = ElasticVectorSearch(
|
2023-04-14 04:49:31 +00:00
|
|
|
embedding=embedding_openai,
|
2023-04-05 13:51:32 +00:00
|
|
|
elasticsearch_url=elasticsearch_url,
|
2023-04-07 14:28:57 +00:00
|
|
|
index_name=index_name,
|
2023-04-05 13:51:32 +00:00
|
|
|
)
|
|
|
|
es = Elasticsearch(hosts=elasticsearch_url)
|
2023-04-07 14:28:57 +00:00
|
|
|
elastic_vector_search.add_documents(documents)
|
|
|
|
|
2023-04-05 13:51:32 +00:00
|
|
|
index_names = es.indices.get(index="_all").keys()
|
2023-04-07 14:28:57 +00:00
|
|
|
assert index_name in index_names
|
2023-04-05 13:51:32 +00:00
|
|
|
|
|
|
|
search_result = elastic_vector_search.similarity_search("sharks")
|
|
|
|
print(search_result)
|
2022-11-20 04:32:45 +00:00
|
|
|
|
2023-04-05 13:51:32 +00:00
|
|
|
assert len(search_result) != 0
|
2023-04-07 14:28:57 +00:00
|
|
|
|
|
|
|
def test_custom_index_add_documents_to_exists_store(self) -> None:
|
|
|
|
# TODO: implement it
|
|
|
|
pass
|