langchain/tests/integration_tests/vectorstores/test_marqo.py

179 lines
5.7 KiB
Python
Raw Normal View History

"""Test Marqo functionality."""
from typing import Dict
import marqo
import pytest
from langchain.docstore.document import Document
from langchain.vectorstores.marqo import Marqo
DEFAULT_MARQO_URL = "http://localhost:8882"
DEFAULT_MARQO_API_KEY = ""
INDEX_NAME = "langchain-integration-tests"
@pytest.fixture
def client() -> Marqo:
# fixture for marqo client to be used throughout testing, resets the index
client = marqo.Client(url=DEFAULT_MARQO_URL, api_key=DEFAULT_MARQO_API_KEY)
try:
client.index(INDEX_NAME).delete()
except Exception:
pass
client.create_index(INDEX_NAME)
return client
def test_marqo(client: Marqo) -> None:
"""Test end to end construction and search."""
texts = ["foo", "bar", "baz"]
marqo_search = Marqo.from_texts(
texts=texts,
index_name=INDEX_NAME,
url=DEFAULT_MARQO_URL,
api_key=DEFAULT_MARQO_API_KEY,
verbose=False,
)
results = marqo_search.similarity_search("foo", k=1)
assert results == [Document(page_content="foo")]
def test_marqo_with_metadatas(client: Marqo) -> None:
"""Test end to end construction and search."""
texts = ["foo", "bar", "baz"]
metadatas = [{"page": i} for i in range(len(texts))]
marqo_search = Marqo.from_texts(
texts=texts,
metadatas=metadatas,
index_name=INDEX_NAME,
url=DEFAULT_MARQO_URL,
api_key=DEFAULT_MARQO_API_KEY,
verbose=False,
)
results = marqo_search.similarity_search("foo", k=1)
assert results == [Document(page_content="foo", metadata={"page": 0})]
def test_marqo_with_scores(client: Marqo) -> None:
"""Test end to end construction and search with scores and IDs."""
texts = ["foo", "bar", "baz"]
metadatas = [{"page": i} for i in range(len(texts))]
marqo_search = Marqo.from_texts(
texts=texts,
metadatas=metadatas,
index_name=INDEX_NAME,
url=DEFAULT_MARQO_URL,
api_key=DEFAULT_MARQO_API_KEY,
verbose=False,
)
results = marqo_search.similarity_search_with_score("foo", k=3)
docs = [r[0] for r in results]
scores = [r[1] for r in results]
assert docs == [
Document(page_content="foo", metadata={"page": 0}),
Document(page_content="bar", metadata={"page": 1}),
Document(page_content="baz", metadata={"page": 2}),
]
assert scores[0] > scores[1] > scores[2]
def test_marqo_add_texts(client: Marqo) -> None:
marqo_search = Marqo(client=client, index_name=INDEX_NAME)
ids1 = marqo_search.add_texts(["1", "2", "3"])
assert len(ids1) == 3
ids2 = marqo_search.add_texts(["1", "2", "3"])
assert len(ids2) == 3
assert len(set(ids1).union(set(ids2))) == 6
def test_marqo_search(client: Marqo) -> None:
marqo_search = Marqo(client=client, index_name=INDEX_NAME)
input_documents = ["This is document 1", "2", "3"]
ids = marqo_search.add_texts(input_documents)
results = marqo_search.marqo_similarity_search("What is the first document?", k=3)
assert len(ids) == len(input_documents)
assert ids[0] == results["hits"][0]["_id"]
def test_marqo_bulk(client: Marqo) -> None:
marqo_search = Marqo(client=client, index_name=INDEX_NAME)
input_documents = ["This is document 1", "2", "3"]
ids = marqo_search.add_texts(input_documents)
bulk_results = marqo_search.bulk_similarity_search(
["What is the first document?", "2", "3"], k=3
)
assert len(ids) == len(input_documents)
assert bulk_results[0][0].page_content == input_documents[0]
assert bulk_results[1][0].page_content == input_documents[1]
assert bulk_results[2][0].page_content == input_documents[2]
def test_marqo_weighted_query(client: Marqo) -> None:
"""Test end to end construction and search."""
texts = ["Smartphone", "Telephone"]
marqo_search = Marqo.from_texts(
texts=texts,
index_name=INDEX_NAME,
url=DEFAULT_MARQO_URL,
api_key=DEFAULT_MARQO_API_KEY,
verbose=False,
)
results = marqo_search.similarity_search(
{"communications device": 1.0, "Old technology": -5.0}, k=1
)
assert results == [Document(page_content="Smartphone")]
def test_marqo_multimodal() -> None:
client = marqo.Client(url=DEFAULT_MARQO_URL, api_key=DEFAULT_MARQO_API_KEY)
try:
client.index(INDEX_NAME).delete()
except Exception:
pass
# reset the index for this example
client.delete_index(INDEX_NAME)
# This index could have been created by another system
settings = {"treat_urls_and_pointers_as_images": True, "model": "ViT-L/14"}
client.create_index(INDEX_NAME, **settings)
client.index(INDEX_NAME).add_documents(
[
# image of a bus
{
"caption": "Bus",
"image": "https://raw.githubusercontent.com/marqo-ai/marqo/mainline/"
"examples/ImageSearchGuide/data/image4.jpg",
},
# image of a plane
{
"caption": "Plane",
"image": "https://raw.githubusercontent.com/marqo-ai/marqo/"
"mainline/examples/ImageSearchGuide/data/image2.jpg",
},
],
)
def get_content(res: Dict[str, str]) -> str:
if "text" in res:
return res["text"]
return f"{res['caption']}: {res['image']}"
marqo_search = Marqo(client, INDEX_NAME, page_content_builder=get_content)
query = "vehicles that fly"
docs = marqo_search.similarity_search(query)
assert docs[0].page_content.split(":")[0] == "Plane"
raised_value_error = False
try:
marqo_search.add_texts(["text"])
except ValueError:
raised_value_error = True
assert raised_value_error