mirror of https://github.com/hwchase17/langchain
Implement async API for Qdrant vector store (#7704)
Inspired by #5550, I implemented full async API support in Qdrant. The docs were extended to mention the existence of asynchronous operations in Langchain. I also used that chance to restructure the tests of Qdrant and provided a suite of tests for the async version. Async API requires the GRPC protocol to be enabled. Thus, it doesn't work on local mode yet, but we're considering including the support to be consistent.pull/7951/head
parent
275b926cf7
commit
1ff5b67025
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,10 @@
|
||||
def qdrant_is_not_running() -> bool:
|
||||
"""Check if Qdrant is not running."""
|
||||
import requests
|
||||
|
||||
try:
|
||||
response = requests.get("http://localhost:6333", timeout=10.0)
|
||||
response_json = response.json()
|
||||
return response_json.get("title") != "qdrant - vector search engine"
|
||||
except (requests.exceptions.ConnectionError, requests.exceptions.Timeout):
|
||||
return True
|
@ -0,0 +1,121 @@
|
||||
from typing import Optional
|
||||
|
||||
import pytest
|
||||
from qdrant_client.http import models as rest
|
||||
|
||||
from langchain.vectorstores import Qdrant
|
||||
from tests.integration_tests.vectorstores.fake_embeddings import (
|
||||
ConsistentFakeEmbeddings,
|
||||
)
|
||||
|
||||
from .common import qdrant_is_not_running
|
||||
|
||||
# Skipping all the tests in the module if Qdrant is not running on localhost.
|
||||
pytestmark = pytest.mark.skipif(
|
||||
qdrant_is_not_running(), reason="Qdrant server is not running"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
async def test_qdrant_aadd_texts_returns_all_ids(batch_size: int) -> None:
|
||||
"""Test end to end Qdrant.aadd_texts returns unique ids."""
|
||||
docsearch: Qdrant = Qdrant.from_texts(
|
||||
["foobar"],
|
||||
ConsistentFakeEmbeddings(),
|
||||
batch_size=batch_size,
|
||||
)
|
||||
|
||||
ids = await docsearch.aadd_texts(["foo", "bar", "baz"])
|
||||
assert 3 == len(ids)
|
||||
assert 3 == len(set(ids))
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
|
||||
async def test_qdrant_aadd_texts_stores_duplicated_texts(
|
||||
vector_name: Optional[str],
|
||||
) -> None:
|
||||
"""Test end to end Qdrant.aadd_texts stores duplicated texts separately."""
|
||||
from qdrant_client import QdrantClient
|
||||
from qdrant_client.http import models as rest
|
||||
|
||||
client = QdrantClient()
|
||||
collection_name = "test"
|
||||
vectors_config = rest.VectorParams(size=10, distance=rest.Distance.COSINE)
|
||||
if vector_name is not None:
|
||||
vectors_config = {vector_name: vectors_config} # type: ignore[assignment]
|
||||
client.recreate_collection(collection_name, vectors_config=vectors_config)
|
||||
|
||||
vec_store = Qdrant(
|
||||
client,
|
||||
collection_name,
|
||||
embeddings=ConsistentFakeEmbeddings(),
|
||||
vector_name=vector_name,
|
||||
)
|
||||
ids = await vec_store.aadd_texts(["abc", "abc"], [{"a": 1}, {"a": 2}])
|
||||
|
||||
assert 2 == len(set(ids))
|
||||
assert 2 == client.count(collection_name).count
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
async def test_qdrant_aadd_texts_stores_ids(batch_size: int) -> None:
|
||||
"""Test end to end Qdrant.aadd_texts stores provided ids."""
|
||||
from qdrant_client import QdrantClient
|
||||
|
||||
ids = [
|
||||
"fa38d572-4c31-4579-aedc-1960d79df6df",
|
||||
"cdc1aa36-d6ab-4fb2-8a94-56674fd27484",
|
||||
]
|
||||
|
||||
client = QdrantClient()
|
||||
collection_name = "test"
|
||||
client.recreate_collection(
|
||||
collection_name,
|
||||
vectors_config=rest.VectorParams(size=10, distance=rest.Distance.COSINE),
|
||||
)
|
||||
|
||||
vec_store = Qdrant(client, collection_name, ConsistentFakeEmbeddings())
|
||||
returned_ids = await vec_store.aadd_texts(
|
||||
["abc", "def"], ids=ids, batch_size=batch_size
|
||||
)
|
||||
|
||||
assert all(first == second for first, second in zip(ids, returned_ids))
|
||||
assert 2 == client.count(collection_name).count
|
||||
stored_ids = [point.id for point in client.scroll(collection_name)[0]]
|
||||
assert set(ids) == set(stored_ids)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("vector_name", ["custom-vector"])
|
||||
async def test_qdrant_aadd_texts_stores_embeddings_as_named_vectors(
|
||||
vector_name: str,
|
||||
) -> None:
|
||||
"""Test end to end Qdrant.aadd_texts stores named vectors if name is provided."""
|
||||
from qdrant_client import QdrantClient
|
||||
|
||||
collection_name = "test"
|
||||
|
||||
client = QdrantClient()
|
||||
client.recreate_collection(
|
||||
collection_name,
|
||||
vectors_config={
|
||||
vector_name: rest.VectorParams(size=10, distance=rest.Distance.COSINE)
|
||||
},
|
||||
)
|
||||
|
||||
vec_store = Qdrant(
|
||||
client,
|
||||
collection_name,
|
||||
ConsistentFakeEmbeddings(),
|
||||
vector_name=vector_name,
|
||||
)
|
||||
await vec_store.aadd_texts(["lorem", "ipsum", "dolor", "sit", "amet"])
|
||||
|
||||
assert 5 == client.count(collection_name).count
|
||||
assert all(
|
||||
vector_name in point.vector # type: ignore[operator]
|
||||
for point in client.scroll(collection_name, with_vectors=True)[0]
|
||||
)
|
@ -0,0 +1,247 @@
|
||||
import uuid
|
||||
from typing import Optional
|
||||
|
||||
import pytest
|
||||
|
||||
from langchain.schema import Document
|
||||
from langchain.vectorstores import Qdrant
|
||||
from langchain.vectorstores.qdrant import QdrantException
|
||||
from tests.integration_tests.vectorstores.fake_embeddings import (
|
||||
ConsistentFakeEmbeddings,
|
||||
)
|
||||
|
||||
from .common import qdrant_is_not_running
|
||||
|
||||
# Skipping all the tests in the module if Qdrant is not running on localhost.
|
||||
pytestmark = pytest.mark.skipif(
|
||||
qdrant_is_not_running(), reason="Qdrant server is not running"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_qdrant_from_texts_stores_duplicated_texts() -> None:
|
||||
"""Test end to end Qdrant.afrom_texts stores duplicated texts separately."""
|
||||
from qdrant_client import QdrantClient
|
||||
|
||||
collection_name = uuid.uuid4().hex
|
||||
|
||||
await Qdrant.afrom_texts(
|
||||
["abc", "abc"],
|
||||
ConsistentFakeEmbeddings(),
|
||||
collection_name=collection_name,
|
||||
)
|
||||
|
||||
client = QdrantClient()
|
||||
assert 2 == client.count(collection_name).count
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
|
||||
async def test_qdrant_from_texts_stores_ids(
|
||||
batch_size: int, vector_name: Optional[str]
|
||||
) -> None:
|
||||
"""Test end to end Qdrant.afrom_texts stores provided ids."""
|
||||
from qdrant_client import QdrantClient
|
||||
|
||||
collection_name = uuid.uuid4().hex
|
||||
ids = [
|
||||
"fa38d572-4c31-4579-aedc-1960d79df6df",
|
||||
"cdc1aa36-d6ab-4fb2-8a94-56674fd27484",
|
||||
]
|
||||
await Qdrant.afrom_texts(
|
||||
["abc", "def"],
|
||||
ConsistentFakeEmbeddings(),
|
||||
ids=ids,
|
||||
collection_name=collection_name,
|
||||
batch_size=batch_size,
|
||||
vector_name=vector_name,
|
||||
)
|
||||
|
||||
client = QdrantClient()
|
||||
assert 2 == client.count(collection_name).count
|
||||
stored_ids = [point.id for point in client.scroll(collection_name)[0]]
|
||||
assert set(ids) == set(stored_ids)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("vector_name", ["custom-vector"])
|
||||
async def test_qdrant_from_texts_stores_embeddings_as_named_vectors(
|
||||
vector_name: str,
|
||||
) -> None:
|
||||
"""Test end to end Qdrant.afrom_texts stores named vectors if name is provided."""
|
||||
from qdrant_client import QdrantClient
|
||||
|
||||
collection_name = uuid.uuid4().hex
|
||||
|
||||
await Qdrant.afrom_texts(
|
||||
["lorem", "ipsum", "dolor", "sit", "amet"],
|
||||
ConsistentFakeEmbeddings(),
|
||||
collection_name=collection_name,
|
||||
vector_name=vector_name,
|
||||
)
|
||||
|
||||
client = QdrantClient()
|
||||
assert 5 == client.count(collection_name).count
|
||||
assert all(
|
||||
vector_name in point.vector # type: ignore[operator]
|
||||
for point in client.scroll(collection_name, with_vectors=True)[0]
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("vector_name", [None, "custom-vector"])
|
||||
async def test_qdrant_from_texts_reuses_same_collection(
|
||||
vector_name: Optional[str],
|
||||
) -> None:
|
||||
"""Test if Qdrant.afrom_texts reuses the same collection"""
|
||||
from qdrant_client import QdrantClient
|
||||
|
||||
collection_name = uuid.uuid4().hex
|
||||
embeddings = ConsistentFakeEmbeddings()
|
||||
|
||||
await Qdrant.afrom_texts(
|
||||
["lorem", "ipsum", "dolor", "sit", "amet"],
|
||||
embeddings,
|
||||
collection_name=collection_name,
|
||||
vector_name=vector_name,
|
||||
)
|
||||
|
||||
await Qdrant.afrom_texts(
|
||||
["foo", "bar"],
|
||||
embeddings,
|
||||
collection_name=collection_name,
|
||||
vector_name=vector_name,
|
||||
)
|
||||
|
||||
client = QdrantClient()
|
||||
assert 7 == client.count(collection_name).count
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("vector_name", [None, "custom-vector"])
|
||||
async def test_qdrant_from_texts_raises_error_on_different_dimensionality(
|
||||
vector_name: Optional[str],
|
||||
) -> None:
|
||||
"""Test if Qdrant.afrom_texts raises an exception if dimensionality does not
|
||||
match"""
|
||||
collection_name = uuid.uuid4().hex
|
||||
|
||||
await Qdrant.afrom_texts(
|
||||
["lorem", "ipsum", "dolor", "sit", "amet"],
|
||||
ConsistentFakeEmbeddings(dimensionality=10),
|
||||
collection_name=collection_name,
|
||||
vector_name=vector_name,
|
||||
)
|
||||
|
||||
with pytest.raises(QdrantException):
|
||||
await Qdrant.afrom_texts(
|
||||
["foo", "bar"],
|
||||
ConsistentFakeEmbeddings(dimensionality=5),
|
||||
collection_name=collection_name,
|
||||
vector_name=vector_name,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize(
|
||||
["first_vector_name", "second_vector_name"],
|
||||
[
|
||||
(None, "custom-vector"),
|
||||
("custom-vector", None),
|
||||
("my-first-vector", "my-second_vector"),
|
||||
],
|
||||
)
|
||||
async def test_qdrant_from_texts_raises_error_on_different_vector_name(
|
||||
first_vector_name: Optional[str],
|
||||
second_vector_name: Optional[str],
|
||||
) -> None:
|
||||
"""Test if Qdrant.afrom_texts raises an exception if vector name does not match"""
|
||||
collection_name = uuid.uuid4().hex
|
||||
|
||||
await Qdrant.afrom_texts(
|
||||
["lorem", "ipsum", "dolor", "sit", "amet"],
|
||||
ConsistentFakeEmbeddings(dimensionality=10),
|
||||
collection_name=collection_name,
|
||||
vector_name=first_vector_name,
|
||||
)
|
||||
|
||||
with pytest.raises(QdrantException):
|
||||
await Qdrant.afrom_texts(
|
||||
["foo", "bar"],
|
||||
ConsistentFakeEmbeddings(dimensionality=5),
|
||||
collection_name=collection_name,
|
||||
vector_name=second_vector_name,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_qdrant_from_texts_raises_error_on_different_distance() -> None:
|
||||
"""Test if Qdrant.afrom_texts raises an exception if distance does not match"""
|
||||
collection_name = uuid.uuid4().hex
|
||||
|
||||
await Qdrant.afrom_texts(
|
||||
["lorem", "ipsum", "dolor", "sit", "amet"],
|
||||
ConsistentFakeEmbeddings(dimensionality=10),
|
||||
collection_name=collection_name,
|
||||
distance_func="Cosine",
|
||||
)
|
||||
|
||||
with pytest.raises(QdrantException):
|
||||
await Qdrant.afrom_texts(
|
||||
["foo", "bar"],
|
||||
ConsistentFakeEmbeddings(dimensionality=5),
|
||||
collection_name=collection_name,
|
||||
distance_func="Euclid",
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("vector_name", [None, "custom-vector"])
|
||||
async def test_qdrant_from_texts_recreates_collection_on_force_recreate(
|
||||
vector_name: Optional[str],
|
||||
) -> None:
|
||||
"""Test if Qdrant.afrom_texts recreates the collection even if config mismatches"""
|
||||
from qdrant_client import QdrantClient
|
||||
|
||||
collection_name = uuid.uuid4().hex
|
||||
|
||||
await Qdrant.afrom_texts(
|
||||
["lorem", "ipsum", "dolor", "sit", "amet"],
|
||||
ConsistentFakeEmbeddings(dimensionality=10),
|
||||
collection_name=collection_name,
|
||||
vector_name=vector_name,
|
||||
)
|
||||
|
||||
await Qdrant.afrom_texts(
|
||||
["foo", "bar"],
|
||||
ConsistentFakeEmbeddings(dimensionality=5),
|
||||
collection_name=collection_name,
|
||||
vector_name=vector_name,
|
||||
force_recreate=True,
|
||||
)
|
||||
|
||||
client = QdrantClient()
|
||||
assert 2 == client.count(collection_name).count
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
@pytest.mark.parametrize("content_payload_key", [Qdrant.CONTENT_KEY, "foo"])
|
||||
@pytest.mark.parametrize("metadata_payload_key", [Qdrant.METADATA_KEY, "bar"])
|
||||
async def test_qdrant_from_texts_stores_metadatas(
|
||||
batch_size: int, content_payload_key: str, metadata_payload_key: str
|
||||
) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [{"page": i} for i in range(len(texts))]
|
||||
docsearch = await Qdrant.afrom_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
metadatas=metadatas,
|
||||
content_payload_key=content_payload_key,
|
||||
metadata_payload_key=metadata_payload_key,
|
||||
batch_size=batch_size,
|
||||
)
|
||||
output = await docsearch.asimilarity_search("foo", k=1)
|
||||
assert output == [Document(page_content="foo", metadata={"page": 0})]
|
@ -0,0 +1,46 @@
|
||||
from typing import Optional
|
||||
|
||||
import pytest
|
||||
|
||||
from langchain.schema import Document
|
||||
from langchain.vectorstores import Qdrant
|
||||
from tests.integration_tests.vectorstores.fake_embeddings import (
|
||||
ConsistentFakeEmbeddings,
|
||||
)
|
||||
|
||||
from .common import qdrant_is_not_running
|
||||
|
||||
# Skipping all the tests in the module if Qdrant is not running on localhost.
|
||||
pytestmark = pytest.mark.skipif(
|
||||
qdrant_is_not_running(), reason="Qdrant server is not running"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
@pytest.mark.parametrize("content_payload_key", [Qdrant.CONTENT_KEY, "test_content"])
|
||||
@pytest.mark.parametrize("metadata_payload_key", [Qdrant.METADATA_KEY, "test_metadata"])
|
||||
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
|
||||
async def test_qdrant_max_marginal_relevance_search(
|
||||
batch_size: int,
|
||||
content_payload_key: str,
|
||||
metadata_payload_key: str,
|
||||
vector_name: Optional[str],
|
||||
) -> None:
|
||||
"""Test end to end construction and MRR search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [{"page": i} for i in range(len(texts))]
|
||||
docsearch = Qdrant.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
metadatas=metadatas,
|
||||
content_payload_key=content_payload_key,
|
||||
metadata_payload_key=metadata_payload_key,
|
||||
batch_size=batch_size,
|
||||
vector_name=vector_name,
|
||||
)
|
||||
output = await docsearch.amax_marginal_relevance_search("foo", k=2, fetch_k=3)
|
||||
assert output == [
|
||||
Document(page_content="foo", metadata={"page": 0}),
|
||||
Document(page_content="baz", metadata={"page": 2}),
|
||||
]
|
@ -0,0 +1,286 @@
|
||||
from typing import Optional
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
from qdrant_client.http import models as rest
|
||||
|
||||
from langchain.schema import Document
|
||||
from langchain.vectorstores import Qdrant
|
||||
from tests.integration_tests.vectorstores.fake_embeddings import (
|
||||
ConsistentFakeEmbeddings,
|
||||
)
|
||||
|
||||
from .common import qdrant_is_not_running
|
||||
|
||||
# Skipping all the tests in the module if Qdrant is not running on localhost.
|
||||
pytestmark = pytest.mark.skipif(
|
||||
qdrant_is_not_running(), reason="Qdrant server is not running"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
@pytest.mark.parametrize("content_payload_key", [Qdrant.CONTENT_KEY, "foo"])
|
||||
@pytest.mark.parametrize("metadata_payload_key", [Qdrant.METADATA_KEY, "bar"])
|
||||
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
|
||||
async def test_qdrant_similarity_search(
|
||||
batch_size: int,
|
||||
content_payload_key: str,
|
||||
metadata_payload_key: str,
|
||||
vector_name: Optional[str],
|
||||
) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
docsearch = Qdrant.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
content_payload_key=content_payload_key,
|
||||
metadata_payload_key=metadata_payload_key,
|
||||
batch_size=batch_size,
|
||||
vector_name=vector_name,
|
||||
)
|
||||
output = await docsearch.asimilarity_search("foo", k=1)
|
||||
assert output == [Document(page_content="foo")]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
@pytest.mark.parametrize("content_payload_key", [Qdrant.CONTENT_KEY, "foo"])
|
||||
@pytest.mark.parametrize("metadata_payload_key", [Qdrant.METADATA_KEY, "bar"])
|
||||
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
|
||||
async def test_qdrant_similarity_search_by_vector(
|
||||
batch_size: int,
|
||||
content_payload_key: str,
|
||||
metadata_payload_key: str,
|
||||
vector_name: Optional[str],
|
||||
) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
docsearch = Qdrant.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
content_payload_key=content_payload_key,
|
||||
metadata_payload_key=metadata_payload_key,
|
||||
batch_size=batch_size,
|
||||
vector_name=vector_name,
|
||||
)
|
||||
embeddings = ConsistentFakeEmbeddings().embed_query("foo")
|
||||
output = await docsearch.asimilarity_search_by_vector(embeddings, k=1)
|
||||
assert output == [Document(page_content="foo")]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
@pytest.mark.parametrize("content_payload_key", [Qdrant.CONTENT_KEY, "foo"])
|
||||
@pytest.mark.parametrize("metadata_payload_key", [Qdrant.METADATA_KEY, "bar"])
|
||||
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
|
||||
async def test_qdrant_similarity_search_with_score_by_vector(
|
||||
batch_size: int,
|
||||
content_payload_key: str,
|
||||
metadata_payload_key: str,
|
||||
vector_name: Optional[str],
|
||||
) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
docsearch = Qdrant.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
content_payload_key=content_payload_key,
|
||||
metadata_payload_key=metadata_payload_key,
|
||||
batch_size=batch_size,
|
||||
vector_name=vector_name,
|
||||
)
|
||||
embeddings = ConsistentFakeEmbeddings().embed_query("foo")
|
||||
output = await docsearch.asimilarity_search_with_score_by_vector(embeddings, k=1)
|
||||
assert len(output) == 1
|
||||
document, score = output[0]
|
||||
assert document == Document(page_content="foo")
|
||||
assert score >= 0
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
|
||||
async def test_qdrant_similarity_search_filters(
|
||||
batch_size: int, vector_name: Optional[str]
|
||||
) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [
|
||||
{"page": i, "metadata": {"page": i + 1, "pages": [i + 2, -1]}}
|
||||
for i in range(len(texts))
|
||||
]
|
||||
docsearch = Qdrant.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
metadatas=metadatas,
|
||||
batch_size=batch_size,
|
||||
vector_name=vector_name,
|
||||
)
|
||||
|
||||
output = await docsearch.asimilarity_search(
|
||||
"foo", k=1, filter={"page": 1, "metadata": {"page": 2, "pages": [3]}}
|
||||
)
|
||||
assert output == [
|
||||
Document(
|
||||
page_content="bar",
|
||||
metadata={"page": 1, "metadata": {"page": 2, "pages": [3, -1]}},
|
||||
)
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
|
||||
async def test_qdrant_similarity_search_with_relevance_score_no_threshold(
|
||||
vector_name: Optional[str],
|
||||
) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [
|
||||
{"page": i, "metadata": {"page": i + 1, "pages": [i + 2, -1]}}
|
||||
for i in range(len(texts))
|
||||
]
|
||||
docsearch = Qdrant.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
metadatas=metadatas,
|
||||
vector_name=vector_name,
|
||||
)
|
||||
output = await docsearch.asimilarity_search_with_relevance_scores(
|
||||
"foo", k=3, score_threshold=None
|
||||
)
|
||||
assert len(output) == 3
|
||||
for i in range(len(output)):
|
||||
assert round(output[i][1], 2) >= 0
|
||||
assert round(output[i][1], 2) <= 1
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
|
||||
async def test_qdrant_similarity_search_with_relevance_score_with_threshold(
|
||||
vector_name: Optional[str],
|
||||
) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [
|
||||
{"page": i, "metadata": {"page": i + 1, "pages": [i + 2, -1]}}
|
||||
for i in range(len(texts))
|
||||
]
|
||||
docsearch = Qdrant.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
metadatas=metadatas,
|
||||
vector_name=vector_name,
|
||||
)
|
||||
|
||||
score_threshold = 0.98
|
||||
kwargs = {"score_threshold": score_threshold}
|
||||
output = await docsearch.asimilarity_search_with_relevance_scores(
|
||||
"foo", k=3, **kwargs
|
||||
)
|
||||
assert len(output) == 1
|
||||
assert all([score >= score_threshold for _, score in output])
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
|
||||
async def test_similarity_search_with_relevance_score_with_threshold_and_filter(
|
||||
vector_name: Optional[str],
|
||||
) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [
|
||||
{"page": i, "metadata": {"page": i + 1, "pages": [i + 2, -1]}}
|
||||
for i in range(len(texts))
|
||||
]
|
||||
docsearch = Qdrant.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
metadatas=metadatas,
|
||||
vector_name=vector_name,
|
||||
)
|
||||
score_threshold = 0.99 # for almost exact match
|
||||
# test negative filter condition
|
||||
negative_filter = {"page": 1, "metadata": {"page": 2, "pages": [3]}}
|
||||
kwargs = {"filter": negative_filter, "score_threshold": score_threshold}
|
||||
output = docsearch.similarity_search_with_relevance_scores("foo", k=3, **kwargs)
|
||||
assert len(output) == 0
|
||||
# test positive filter condition
|
||||
positive_filter = {"page": 0, "metadata": {"page": 1, "pages": [2]}}
|
||||
kwargs = {"filter": positive_filter, "score_threshold": score_threshold}
|
||||
output = await docsearch.asimilarity_search_with_relevance_scores(
|
||||
"foo", k=3, **kwargs
|
||||
)
|
||||
assert len(output) == 1
|
||||
assert all([score >= score_threshold for _, score in output])
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
|
||||
async def test_qdrant_similarity_search_filters_with_qdrant_filters(
|
||||
vector_name: Optional[str],
|
||||
) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [
|
||||
{"page": i, "details": {"page": i + 1, "pages": [i + 2, -1]}}
|
||||
for i in range(len(texts))
|
||||
]
|
||||
docsearch = Qdrant.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
metadatas=metadatas,
|
||||
vector_name=vector_name,
|
||||
)
|
||||
|
||||
qdrant_filter = rest.Filter(
|
||||
must=[
|
||||
rest.FieldCondition(
|
||||
key="metadata.page",
|
||||
match=rest.MatchValue(value=1),
|
||||
),
|
||||
rest.FieldCondition(
|
||||
key="metadata.details.page",
|
||||
match=rest.MatchValue(value=2),
|
||||
),
|
||||
rest.FieldCondition(
|
||||
key="metadata.details.pages",
|
||||
match=rest.MatchAny(any=[3]),
|
||||
),
|
||||
]
|
||||
)
|
||||
output = await docsearch.asimilarity_search("foo", k=1, filter=qdrant_filter)
|
||||
assert output == [
|
||||
Document(
|
||||
page_content="bar",
|
||||
metadata={"page": 1, "details": {"page": 2, "pages": [3, -1]}},
|
||||
)
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
@pytest.mark.parametrize("content_payload_key", [Qdrant.CONTENT_KEY, "foo"])
|
||||
@pytest.mark.parametrize("metadata_payload_key", [Qdrant.METADATA_KEY, "bar"])
|
||||
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
|
||||
async def test_qdrant_similarity_search_with_relevance_scores(
|
||||
batch_size: int,
|
||||
content_payload_key: str,
|
||||
metadata_payload_key: str,
|
||||
vector_name: str,
|
||||
) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
docsearch = Qdrant.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
content_payload_key=content_payload_key,
|
||||
metadata_payload_key=metadata_payload_key,
|
||||
batch_size=batch_size,
|
||||
vector_name=vector_name,
|
||||
)
|
||||
output = await docsearch.asimilarity_search_with_relevance_scores("foo", k=3)
|
||||
|
||||
assert all(
|
||||
(1 >= score or np.isclose(score, 1)) and score >= 0 for _, score in output
|
||||
)
|
@ -0,0 +1,132 @@
|
||||
from typing import Optional
|
||||
|
||||
import pytest
|
||||
from qdrant_client.http import models as rest
|
||||
|
||||
from langchain.schema import Document
|
||||
from langchain.vectorstores import Qdrant
|
||||
from tests.integration_tests.vectorstores.fake_embeddings import (
|
||||
ConsistentFakeEmbeddings,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
|
||||
def test_qdrant_add_documents_extends_existing_collection(
|
||||
batch_size: int, vector_name: Optional[str]
|
||||
) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
docsearch: Qdrant = Qdrant.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
location=":memory:",
|
||||
batch_size=batch_size,
|
||||
vector_name=vector_name,
|
||||
)
|
||||
|
||||
new_texts = ["foobar", "foobaz"]
|
||||
docsearch.add_documents(
|
||||
[Document(page_content=content) for content in new_texts], batch_size=batch_size
|
||||
)
|
||||
output = docsearch.similarity_search("foobar", k=1)
|
||||
# ConsistentFakeEmbeddings return the same query embedding as the first document
|
||||
# embedding computed in `embedding.embed_documents`. Thus, "foo" embedding is the
|
||||
# same as "foobar" embedding
|
||||
assert output == [Document(page_content="foobar")]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
def test_qdrant_add_texts_returns_all_ids(batch_size: int) -> None:
|
||||
"""Test end to end Qdrant.add_texts returns unique ids."""
|
||||
docsearch: Qdrant = Qdrant.from_texts(
|
||||
["foobar"],
|
||||
ConsistentFakeEmbeddings(),
|
||||
location=":memory:",
|
||||
batch_size=batch_size,
|
||||
)
|
||||
|
||||
ids = docsearch.add_texts(["foo", "bar", "baz"])
|
||||
assert 3 == len(ids)
|
||||
assert 3 == len(set(ids))
|
||||
|
||||
|
||||
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
|
||||
def test_qdrant_add_texts_stores_duplicated_texts(vector_name: Optional[str]) -> None:
|
||||
"""Test end to end Qdrant.add_texts stores duplicated texts separately."""
|
||||
from qdrant_client import QdrantClient
|
||||
from qdrant_client.http import models as rest
|
||||
|
||||
client = QdrantClient(":memory:")
|
||||
collection_name = "test"
|
||||
vectors_config = rest.VectorParams(size=10, distance=rest.Distance.COSINE)
|
||||
if vector_name is not None:
|
||||
vectors_config = {vector_name: vectors_config} # type: ignore[assignment]
|
||||
client.recreate_collection(collection_name, vectors_config=vectors_config)
|
||||
|
||||
vec_store = Qdrant(
|
||||
client,
|
||||
collection_name,
|
||||
embeddings=ConsistentFakeEmbeddings(),
|
||||
vector_name=vector_name,
|
||||
)
|
||||
ids = vec_store.add_texts(["abc", "abc"], [{"a": 1}, {"a": 2}])
|
||||
|
||||
assert 2 == len(set(ids))
|
||||
assert 2 == client.count(collection_name).count
|
||||
|
||||
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
def test_qdrant_add_texts_stores_ids(batch_size: int) -> None:
|
||||
"""Test end to end Qdrant.add_texts stores provided ids."""
|
||||
from qdrant_client import QdrantClient
|
||||
|
||||
ids = [
|
||||
"fa38d572-4c31-4579-aedc-1960d79df6df",
|
||||
"cdc1aa36-d6ab-4fb2-8a94-56674fd27484",
|
||||
]
|
||||
|
||||
client = QdrantClient(":memory:")
|
||||
collection_name = "test"
|
||||
client.recreate_collection(
|
||||
collection_name,
|
||||
vectors_config=rest.VectorParams(size=10, distance=rest.Distance.COSINE),
|
||||
)
|
||||
|
||||
vec_store = Qdrant(client, collection_name, ConsistentFakeEmbeddings())
|
||||
returned_ids = vec_store.add_texts(["abc", "def"], ids=ids, batch_size=batch_size)
|
||||
|
||||
assert all(first == second for first, second in zip(ids, returned_ids))
|
||||
assert 2 == client.count(collection_name).count
|
||||
stored_ids = [point.id for point in client.scroll(collection_name)[0]]
|
||||
assert set(ids) == set(stored_ids)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("vector_name", ["custom-vector"])
|
||||
def test_qdrant_add_texts_stores_embeddings_as_named_vectors(vector_name: str) -> None:
|
||||
"""Test end to end Qdrant.add_texts stores named vectors if name is provided."""
|
||||
from qdrant_client import QdrantClient
|
||||
|
||||
collection_name = "test"
|
||||
|
||||
client = QdrantClient(":memory:")
|
||||
client.recreate_collection(
|
||||
collection_name,
|
||||
vectors_config={
|
||||
vector_name: rest.VectorParams(size=10, distance=rest.Distance.COSINE)
|
||||
},
|
||||
)
|
||||
|
||||
vec_store = Qdrant(
|
||||
client,
|
||||
collection_name,
|
||||
ConsistentFakeEmbeddings(),
|
||||
vector_name=vector_name,
|
||||
)
|
||||
vec_store.add_texts(["lorem", "ipsum", "dolor", "sit", "amet"])
|
||||
|
||||
assert 5 == client.count(collection_name).count
|
||||
assert all(
|
||||
vector_name in point.vector # type: ignore[operator]
|
||||
for point in client.scroll(collection_name, with_vectors=True)[0]
|
||||
)
|
@ -0,0 +1 @@
|
||||
# TODO: implement tests for delete
|
@ -0,0 +1,59 @@
|
||||
from typing import Callable, Optional
|
||||
|
||||
import pytest
|
||||
|
||||
from langchain.embeddings.base import Embeddings
|
||||
from langchain.vectorstores import Qdrant
|
||||
from tests.integration_tests.vectorstores.fake_embeddings import (
|
||||
ConsistentFakeEmbeddings,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
["embeddings", "embedding_function"],
|
||||
[
|
||||
(ConsistentFakeEmbeddings(), None),
|
||||
(ConsistentFakeEmbeddings().embed_query, None),
|
||||
(None, ConsistentFakeEmbeddings().embed_query),
|
||||
],
|
||||
)
|
||||
def test_qdrant_embedding_interface(
|
||||
embeddings: Optional[Embeddings], embedding_function: Optional[Callable]
|
||||
) -> None:
|
||||
"""Test Qdrant may accept different types for embeddings."""
|
||||
from qdrant_client import QdrantClient
|
||||
|
||||
client = QdrantClient(":memory:")
|
||||
collection_name = "test"
|
||||
|
||||
Qdrant(
|
||||
client,
|
||||
collection_name,
|
||||
embeddings=embeddings,
|
||||
embedding_function=embedding_function,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
["embeddings", "embedding_function"],
|
||||
[
|
||||
(ConsistentFakeEmbeddings(), ConsistentFakeEmbeddings().embed_query),
|
||||
(None, None),
|
||||
],
|
||||
)
|
||||
def test_qdrant_embedding_interface_raises_value_error(
|
||||
embeddings: Optional[Embeddings], embedding_function: Optional[Callable]
|
||||
) -> None:
|
||||
"""Test Qdrant requires only one method for embeddings."""
|
||||
from qdrant_client import QdrantClient
|
||||
|
||||
client = QdrantClient(":memory:")
|
||||
collection_name = "test"
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
Qdrant(
|
||||
client,
|
||||
collection_name,
|
||||
embeddings=embeddings,
|
||||
embedding_function=embedding_function,
|
||||
)
|
@ -0,0 +1,252 @@
|
||||
import tempfile
|
||||
from typing import Optional
|
||||
|
||||
import pytest
|
||||
|
||||
from langchain.schema import Document
|
||||
from langchain.vectorstores import Qdrant
|
||||
from langchain.vectorstores.qdrant import QdrantException
|
||||
from tests.integration_tests.vectorstores.fake_embeddings import (
|
||||
ConsistentFakeEmbeddings,
|
||||
)
|
||||
|
||||
|
||||
def test_qdrant_from_texts_stores_duplicated_texts() -> None:
|
||||
"""Test end to end Qdrant.from_texts stores duplicated texts separately."""
|
||||
from qdrant_client import QdrantClient
|
||||
|
||||
collection_name = "test"
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
vec_store = Qdrant.from_texts(
|
||||
["abc", "abc"],
|
||||
ConsistentFakeEmbeddings(),
|
||||
collection_name=collection_name,
|
||||
path=str(tmpdir),
|
||||
)
|
||||
del vec_store
|
||||
|
||||
client = QdrantClient(path=str(tmpdir))
|
||||
assert 2 == client.count(collection_name).count
|
||||
|
||||
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
|
||||
def test_qdrant_from_texts_stores_ids(
|
||||
batch_size: int, vector_name: Optional[str]
|
||||
) -> None:
|
||||
"""Test end to end Qdrant.from_texts stores provided ids."""
|
||||
from qdrant_client import QdrantClient
|
||||
|
||||
collection_name = "test"
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
ids = [
|
||||
"fa38d572-4c31-4579-aedc-1960d79df6df",
|
||||
"cdc1aa36-d6ab-4fb2-8a94-56674fd27484",
|
||||
]
|
||||
vec_store = Qdrant.from_texts(
|
||||
["abc", "def"],
|
||||
ConsistentFakeEmbeddings(),
|
||||
ids=ids,
|
||||
collection_name=collection_name,
|
||||
path=str(tmpdir),
|
||||
batch_size=batch_size,
|
||||
vector_name=vector_name,
|
||||
)
|
||||
del vec_store
|
||||
|
||||
client = QdrantClient(path=str(tmpdir))
|
||||
assert 2 == client.count(collection_name).count
|
||||
stored_ids = [point.id for point in client.scroll(collection_name)[0]]
|
||||
assert set(ids) == set(stored_ids)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("vector_name", ["custom-vector"])
|
||||
def test_qdrant_from_texts_stores_embeddings_as_named_vectors(vector_name: str) -> None:
|
||||
"""Test end to end Qdrant.from_texts stores named vectors if name is provided."""
|
||||
from qdrant_client import QdrantClient
|
||||
|
||||
collection_name = "test"
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
vec_store = Qdrant.from_texts(
|
||||
["lorem", "ipsum", "dolor", "sit", "amet"],
|
||||
ConsistentFakeEmbeddings(),
|
||||
collection_name=collection_name,
|
||||
path=str(tmpdir),
|
||||
vector_name=vector_name,
|
||||
)
|
||||
del vec_store
|
||||
|
||||
client = QdrantClient(path=str(tmpdir))
|
||||
assert 5 == client.count(collection_name).count
|
||||
assert all(
|
||||
vector_name in point.vector # type: ignore[operator]
|
||||
for point in client.scroll(collection_name, with_vectors=True)[0]
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("vector_name", [None, "custom-vector"])
|
||||
def test_qdrant_from_texts_reuses_same_collection(vector_name: Optional[str]) -> None:
|
||||
"""Test if Qdrant.from_texts reuses the same collection"""
|
||||
from qdrant_client import QdrantClient
|
||||
|
||||
collection_name = "test"
|
||||
embeddings = ConsistentFakeEmbeddings()
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
vec_store = Qdrant.from_texts(
|
||||
["lorem", "ipsum", "dolor", "sit", "amet"],
|
||||
embeddings,
|
||||
collection_name=collection_name,
|
||||
path=str(tmpdir),
|
||||
vector_name=vector_name,
|
||||
)
|
||||
del vec_store
|
||||
|
||||
vec_store = Qdrant.from_texts(
|
||||
["foo", "bar"],
|
||||
embeddings,
|
||||
collection_name=collection_name,
|
||||
path=str(tmpdir),
|
||||
vector_name=vector_name,
|
||||
)
|
||||
del vec_store
|
||||
|
||||
client = QdrantClient(path=str(tmpdir))
|
||||
assert 7 == client.count(collection_name).count
|
||||
|
||||
|
||||
@pytest.mark.parametrize("vector_name", [None, "custom-vector"])
|
||||
def test_qdrant_from_texts_raises_error_on_different_dimensionality(
|
||||
vector_name: Optional[str],
|
||||
) -> None:
|
||||
"""Test if Qdrant.from_texts raises an exception if dimensionality does not match"""
|
||||
collection_name = "test"
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
vec_store = Qdrant.from_texts(
|
||||
["lorem", "ipsum", "dolor", "sit", "amet"],
|
||||
ConsistentFakeEmbeddings(dimensionality=10),
|
||||
collection_name=collection_name,
|
||||
path=str(tmpdir),
|
||||
vector_name=vector_name,
|
||||
)
|
||||
del vec_store
|
||||
|
||||
with pytest.raises(QdrantException):
|
||||
Qdrant.from_texts(
|
||||
["foo", "bar"],
|
||||
ConsistentFakeEmbeddings(dimensionality=5),
|
||||
collection_name=collection_name,
|
||||
path=str(tmpdir),
|
||||
vector_name=vector_name,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
["first_vector_name", "second_vector_name"],
|
||||
[
|
||||
(None, "custom-vector"),
|
||||
("custom-vector", None),
|
||||
("my-first-vector", "my-second_vector"),
|
||||
],
|
||||
)
|
||||
def test_qdrant_from_texts_raises_error_on_different_vector_name(
|
||||
first_vector_name: Optional[str],
|
||||
second_vector_name: Optional[str],
|
||||
) -> None:
|
||||
"""Test if Qdrant.from_texts raises an exception if vector name does not match"""
|
||||
collection_name = "test"
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
vec_store = Qdrant.from_texts(
|
||||
["lorem", "ipsum", "dolor", "sit", "amet"],
|
||||
ConsistentFakeEmbeddings(dimensionality=10),
|
||||
collection_name=collection_name,
|
||||
path=str(tmpdir),
|
||||
vector_name=first_vector_name,
|
||||
)
|
||||
del vec_store
|
||||
|
||||
with pytest.raises(QdrantException):
|
||||
Qdrant.from_texts(
|
||||
["foo", "bar"],
|
||||
ConsistentFakeEmbeddings(dimensionality=5),
|
||||
collection_name=collection_name,
|
||||
path=str(tmpdir),
|
||||
vector_name=second_vector_name,
|
||||
)
|
||||
|
||||
|
||||
def test_qdrant_from_texts_raises_error_on_different_distance() -> None:
|
||||
"""Test if Qdrant.from_texts raises an exception if distance does not match"""
|
||||
collection_name = "test"
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
vec_store = Qdrant.from_texts(
|
||||
["lorem", "ipsum", "dolor", "sit", "amet"],
|
||||
ConsistentFakeEmbeddings(dimensionality=10),
|
||||
collection_name=collection_name,
|
||||
path=str(tmpdir),
|
||||
distance_func="Cosine",
|
||||
)
|
||||
del vec_store
|
||||
|
||||
with pytest.raises(QdrantException):
|
||||
Qdrant.from_texts(
|
||||
["foo", "bar"],
|
||||
ConsistentFakeEmbeddings(dimensionality=5),
|
||||
collection_name=collection_name,
|
||||
path=str(tmpdir),
|
||||
distance_func="Euclid",
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("vector_name", [None, "custom-vector"])
|
||||
def test_qdrant_from_texts_recreates_collection_on_force_recreate(
|
||||
vector_name: Optional[str],
|
||||
) -> None:
|
||||
"""Test if Qdrant.from_texts recreates the collection even if config mismatches"""
|
||||
from qdrant_client import QdrantClient
|
||||
|
||||
collection_name = "test"
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
vec_store = Qdrant.from_texts(
|
||||
["lorem", "ipsum", "dolor", "sit", "amet"],
|
||||
ConsistentFakeEmbeddings(dimensionality=10),
|
||||
collection_name=collection_name,
|
||||
path=str(tmpdir),
|
||||
vector_name=vector_name,
|
||||
)
|
||||
del vec_store
|
||||
|
||||
vec_store = Qdrant.from_texts(
|
||||
["foo", "bar"],
|
||||
ConsistentFakeEmbeddings(dimensionality=5),
|
||||
collection_name=collection_name,
|
||||
path=str(tmpdir),
|
||||
vector_name=vector_name,
|
||||
force_recreate=True,
|
||||
)
|
||||
del vec_store
|
||||
|
||||
client = QdrantClient(path=str(tmpdir))
|
||||
assert 2 == client.count(collection_name).count
|
||||
|
||||
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
@pytest.mark.parametrize("content_payload_key", [Qdrant.CONTENT_KEY, "foo"])
|
||||
@pytest.mark.parametrize("metadata_payload_key", [Qdrant.METADATA_KEY, "bar"])
|
||||
def test_qdrant_from_texts_stores_metadatas(
|
||||
batch_size: int, content_payload_key: str, metadata_payload_key: str
|
||||
) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [{"page": i} for i in range(len(texts))]
|
||||
docsearch = Qdrant.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
metadatas=metadatas,
|
||||
location=":memory:",
|
||||
content_payload_key=content_payload_key,
|
||||
metadata_payload_key=metadata_payload_key,
|
||||
batch_size=batch_size,
|
||||
)
|
||||
output = docsearch.similarity_search("foo", k=1)
|
||||
assert output == [Document(page_content="foo", metadata={"page": 0})]
|
@ -0,0 +1,40 @@
|
||||
from typing import Optional
|
||||
|
||||
import pytest
|
||||
|
||||
from langchain.schema import Document
|
||||
from langchain.vectorstores import Qdrant
|
||||
from tests.integration_tests.vectorstores.fake_embeddings import (
|
||||
ConsistentFakeEmbeddings,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
@pytest.mark.parametrize("content_payload_key", [Qdrant.CONTENT_KEY, "test_content"])
|
||||
@pytest.mark.parametrize("metadata_payload_key", [Qdrant.METADATA_KEY, "test_metadata"])
|
||||
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
|
||||
@pytest.mark.skip(reason="Qdrant local behaves differently from Qdrant server")
|
||||
def test_qdrant_max_marginal_relevance_search(
|
||||
batch_size: int,
|
||||
content_payload_key: str,
|
||||
metadata_payload_key: str,
|
||||
vector_name: Optional[str],
|
||||
) -> None:
|
||||
"""Test end to end construction and MRR search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [{"page": i} for i in range(len(texts))]
|
||||
docsearch = Qdrant.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
metadatas=metadatas,
|
||||
location=":memory:",
|
||||
content_payload_key=content_payload_key,
|
||||
metadata_payload_key=metadata_payload_key,
|
||||
batch_size=batch_size,
|
||||
vector_name=vector_name,
|
||||
)
|
||||
output = docsearch.max_marginal_relevance_search("foo", k=2, fetch_k=3)
|
||||
assert output == [
|
||||
Document(page_content="foo", metadata={"page": 0}),
|
||||
Document(page_content="baz", metadata={"page": 2}),
|
||||
]
|
@ -0,0 +1,275 @@
|
||||
from typing import Optional
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
from qdrant_client.http import models as rest
|
||||
|
||||
from langchain.schema import Document
|
||||
from langchain.vectorstores import Qdrant
|
||||
from tests.integration_tests.vectorstores.fake_embeddings import (
|
||||
ConsistentFakeEmbeddings,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
@pytest.mark.parametrize("content_payload_key", [Qdrant.CONTENT_KEY, "foo"])
|
||||
@pytest.mark.parametrize("metadata_payload_key", [Qdrant.METADATA_KEY, "bar"])
|
||||
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
|
||||
def test_qdrant_similarity_search(
|
||||
batch_size: int,
|
||||
content_payload_key: str,
|
||||
metadata_payload_key: str,
|
||||
vector_name: Optional[str],
|
||||
) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
docsearch = Qdrant.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
location=":memory:",
|
||||
content_payload_key=content_payload_key,
|
||||
metadata_payload_key=metadata_payload_key,
|
||||
batch_size=batch_size,
|
||||
vector_name=vector_name,
|
||||
)
|
||||
output = docsearch.similarity_search("foo", k=1)
|
||||
assert output == [Document(page_content="foo")]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
@pytest.mark.parametrize("content_payload_key", [Qdrant.CONTENT_KEY, "foo"])
|
||||
@pytest.mark.parametrize("metadata_payload_key", [Qdrant.METADATA_KEY, "bar"])
|
||||
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
|
||||
def test_qdrant_similarity_search_by_vector(
|
||||
batch_size: int,
|
||||
content_payload_key: str,
|
||||
metadata_payload_key: str,
|
||||
vector_name: Optional[str],
|
||||
) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
docsearch = Qdrant.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
location=":memory:",
|
||||
content_payload_key=content_payload_key,
|
||||
metadata_payload_key=metadata_payload_key,
|
||||
batch_size=batch_size,
|
||||
vector_name=vector_name,
|
||||
)
|
||||
embeddings = ConsistentFakeEmbeddings().embed_query("foo")
|
||||
output = docsearch.similarity_search_by_vector(embeddings, k=1)
|
||||
assert output == [Document(page_content="foo")]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
@pytest.mark.parametrize("content_payload_key", [Qdrant.CONTENT_KEY, "foo"])
|
||||
@pytest.mark.parametrize("metadata_payload_key", [Qdrant.METADATA_KEY, "bar"])
|
||||
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
|
||||
def test_qdrant_similarity_search_with_score_by_vector(
|
||||
batch_size: int,
|
||||
content_payload_key: str,
|
||||
metadata_payload_key: str,
|
||||
vector_name: Optional[str],
|
||||
) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
docsearch = Qdrant.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
location=":memory:",
|
||||
content_payload_key=content_payload_key,
|
||||
metadata_payload_key=metadata_payload_key,
|
||||
batch_size=batch_size,
|
||||
vector_name=vector_name,
|
||||
)
|
||||
embeddings = ConsistentFakeEmbeddings().embed_query("foo")
|
||||
output = docsearch.similarity_search_with_score_by_vector(embeddings, k=1)
|
||||
assert len(output) == 1
|
||||
document, score = output[0]
|
||||
assert document == Document(page_content="foo")
|
||||
assert score >= 0
|
||||
|
||||
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
|
||||
def test_qdrant_similarity_search_filters(
|
||||
batch_size: int, vector_name: Optional[str]
|
||||
) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [
|
||||
{"page": i, "metadata": {"page": i + 1, "pages": [i + 2, -1]}}
|
||||
for i in range(len(texts))
|
||||
]
|
||||
docsearch = Qdrant.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
metadatas=metadatas,
|
||||
location=":memory:",
|
||||
batch_size=batch_size,
|
||||
vector_name=vector_name,
|
||||
)
|
||||
|
||||
output = docsearch.similarity_search(
|
||||
"foo", k=1, filter={"page": 1, "metadata": {"page": 2, "pages": [3]}}
|
||||
)
|
||||
assert output == [
|
||||
Document(
|
||||
page_content="bar",
|
||||
metadata={"page": 1, "metadata": {"page": 2, "pages": [3, -1]}},
|
||||
)
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
|
||||
def test_qdrant_similarity_search_with_relevance_score_no_threshold(
|
||||
vector_name: Optional[str],
|
||||
) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [
|
||||
{"page": i, "metadata": {"page": i + 1, "pages": [i + 2, -1]}}
|
||||
for i in range(len(texts))
|
||||
]
|
||||
docsearch = Qdrant.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
metadatas=metadatas,
|
||||
location=":memory:",
|
||||
vector_name=vector_name,
|
||||
)
|
||||
output = docsearch.similarity_search_with_relevance_scores(
|
||||
"foo", k=3, score_threshold=None
|
||||
)
|
||||
assert len(output) == 3
|
||||
for i in range(len(output)):
|
||||
assert round(output[i][1], 2) >= 0
|
||||
assert round(output[i][1], 2) <= 1
|
||||
|
||||
|
||||
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
|
||||
def test_qdrant_similarity_search_with_relevance_score_with_threshold(
|
||||
vector_name: Optional[str],
|
||||
) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [
|
||||
{"page": i, "metadata": {"page": i + 1, "pages": [i + 2, -1]}}
|
||||
for i in range(len(texts))
|
||||
]
|
||||
docsearch = Qdrant.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
metadatas=metadatas,
|
||||
location=":memory:",
|
||||
vector_name=vector_name,
|
||||
)
|
||||
|
||||
score_threshold = 0.98
|
||||
kwargs = {"score_threshold": score_threshold}
|
||||
output = docsearch.similarity_search_with_relevance_scores("foo", k=3, **kwargs)
|
||||
assert len(output) == 1
|
||||
assert all([score >= score_threshold for _, score in output])
|
||||
|
||||
|
||||
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
|
||||
def test_qdrant_similarity_search_with_relevance_score_with_threshold_and_filter(
|
||||
vector_name: Optional[str],
|
||||
) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [
|
||||
{"page": i, "metadata": {"page": i + 1, "pages": [i + 2, -1]}}
|
||||
for i in range(len(texts))
|
||||
]
|
||||
docsearch = Qdrant.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
metadatas=metadatas,
|
||||
location=":memory:",
|
||||
vector_name=vector_name,
|
||||
)
|
||||
score_threshold = 0.99 # for almost exact match
|
||||
# test negative filter condition
|
||||
negative_filter = {"page": 1, "metadata": {"page": 2, "pages": [3]}}
|
||||
kwargs = {"filter": negative_filter, "score_threshold": score_threshold}
|
||||
output = docsearch.similarity_search_with_relevance_scores("foo", k=3, **kwargs)
|
||||
assert len(output) == 0
|
||||
# test positive filter condition
|
||||
positive_filter = {"page": 0, "metadata": {"page": 1, "pages": [2]}}
|
||||
kwargs = {"filter": positive_filter, "score_threshold": score_threshold}
|
||||
output = docsearch.similarity_search_with_relevance_scores("foo", k=3, **kwargs)
|
||||
assert len(output) == 1
|
||||
assert all([score >= score_threshold for _, score in output])
|
||||
|
||||
|
||||
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
|
||||
def test_qdrant_similarity_search_filters_with_qdrant_filters(
|
||||
vector_name: Optional[str],
|
||||
) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [
|
||||
{"page": i, "details": {"page": i + 1, "pages": [i + 2, -1]}}
|
||||
for i in range(len(texts))
|
||||
]
|
||||
docsearch = Qdrant.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
metadatas=metadatas,
|
||||
location=":memory:",
|
||||
vector_name=vector_name,
|
||||
)
|
||||
|
||||
qdrant_filter = rest.Filter(
|
||||
must=[
|
||||
rest.FieldCondition(
|
||||
key="metadata.page",
|
||||
match=rest.MatchValue(value=1),
|
||||
),
|
||||
rest.FieldCondition(
|
||||
key="metadata.details.page",
|
||||
match=rest.MatchValue(value=2),
|
||||
),
|
||||
rest.FieldCondition(
|
||||
key="metadata.details.pages",
|
||||
match=rest.MatchAny(any=[3]),
|
||||
),
|
||||
]
|
||||
)
|
||||
output = docsearch.similarity_search("foo", k=1, filter=qdrant_filter)
|
||||
assert output == [
|
||||
Document(
|
||||
page_content="bar",
|
||||
metadata={"page": 1, "details": {"page": 2, "pages": [3, -1]}},
|
||||
)
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
@pytest.mark.parametrize("content_payload_key", [Qdrant.CONTENT_KEY, "foo"])
|
||||
@pytest.mark.parametrize("metadata_payload_key", [Qdrant.METADATA_KEY, "bar"])
|
||||
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
|
||||
def test_qdrant_similarity_search_with_relevance_scores(
|
||||
batch_size: int,
|
||||
content_payload_key: str,
|
||||
metadata_payload_key: str,
|
||||
vector_name: Optional[str],
|
||||
) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
docsearch = Qdrant.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
location=":memory:",
|
||||
content_payload_key=content_payload_key,
|
||||
metadata_payload_key=metadata_payload_key,
|
||||
batch_size=batch_size,
|
||||
vector_name=vector_name,
|
||||
)
|
||||
output = docsearch.similarity_search_with_relevance_scores("foo", k=3)
|
||||
|
||||
assert all(
|
||||
(1 >= score or np.isclose(score, 1)) and score >= 0 for _, score in output
|
||||
)
|
@ -1,685 +0,0 @@
|
||||
"""Test Qdrant functionality."""
|
||||
import tempfile
|
||||
from typing import Callable, Optional
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
from qdrant_client.http import models as rest
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.embeddings.base import Embeddings
|
||||
from langchain.vectorstores import Qdrant
|
||||
from langchain.vectorstores.qdrant import QdrantException
|
||||
from tests.integration_tests.vectorstores.fake_embeddings import (
|
||||
ConsistentFakeEmbeddings,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
@pytest.mark.parametrize("content_payload_key", [Qdrant.CONTENT_KEY, "foo"])
|
||||
@pytest.mark.parametrize("metadata_payload_key", [Qdrant.METADATA_KEY, "bar"])
|
||||
def test_qdrant_similarity_search(
|
||||
batch_size: int, content_payload_key: str, metadata_payload_key: str
|
||||
) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
docsearch = Qdrant.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
location=":memory:",
|
||||
content_payload_key=content_payload_key,
|
||||
metadata_payload_key=metadata_payload_key,
|
||||
batch_size=batch_size,
|
||||
)
|
||||
output = docsearch.similarity_search("foo", k=1)
|
||||
assert output == [Document(page_content="foo")]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
@pytest.mark.parametrize("content_payload_key", [Qdrant.CONTENT_KEY, "foo"])
|
||||
@pytest.mark.parametrize("metadata_payload_key", [Qdrant.METADATA_KEY, "bar"])
|
||||
def test_qdrant_similarity_search_by_vector(
|
||||
batch_size: int, content_payload_key: str, metadata_payload_key: str
|
||||
) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
docsearch = Qdrant.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
location=":memory:",
|
||||
content_payload_key=content_payload_key,
|
||||
metadata_payload_key=metadata_payload_key,
|
||||
batch_size=batch_size,
|
||||
)
|
||||
embeddings = ConsistentFakeEmbeddings().embed_query("foo")
|
||||
output = docsearch.similarity_search_by_vector(embeddings, k=1)
|
||||
assert output == [Document(page_content="foo")]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
@pytest.mark.parametrize("content_payload_key", [Qdrant.CONTENT_KEY, "foo"])
|
||||
@pytest.mark.parametrize("metadata_payload_key", [Qdrant.METADATA_KEY, "bar"])
|
||||
def test_qdrant_similarity_search_with_score_by_vector(
|
||||
batch_size: int, content_payload_key: str, metadata_payload_key: str
|
||||
) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
docsearch = Qdrant.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
location=":memory:",
|
||||
content_payload_key=content_payload_key,
|
||||
metadata_payload_key=metadata_payload_key,
|
||||
batch_size=batch_size,
|
||||
)
|
||||
embeddings = ConsistentFakeEmbeddings().embed_query("foo")
|
||||
output = docsearch.similarity_search_with_score_by_vector(embeddings, k=1)
|
||||
assert len(output) == 1
|
||||
document, score = output[0]
|
||||
assert document == Document(page_content="foo")
|
||||
assert score >= 0
|
||||
|
||||
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
|
||||
def test_qdrant_add_documents(batch_size: int, vector_name: Optional[str]) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
docsearch: Qdrant = Qdrant.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
location=":memory:",
|
||||
batch_size=batch_size,
|
||||
vector_name=vector_name,
|
||||
)
|
||||
|
||||
new_texts = ["foobar", "foobaz"]
|
||||
docsearch.add_documents(
|
||||
[Document(page_content=content) for content in new_texts], batch_size=batch_size
|
||||
)
|
||||
output = docsearch.similarity_search("foobar", k=1)
|
||||
# StatefulFakeEmbeddings return the same query embedding as the first document
|
||||
# embedding computed in `embedding.embed_documents`. Thus, "foo" embedding is the
|
||||
# same as "foobar" embedding
|
||||
assert output == [Document(page_content="foobar")] or output == [
|
||||
Document(page_content="foo")
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
def test_qdrant_add_texts_returns_all_ids(batch_size: int) -> None:
|
||||
"""Test end to end Qdrant.add_texts returns unique ids."""
|
||||
docsearch: Qdrant = Qdrant.from_texts(
|
||||
["foobar"],
|
||||
ConsistentFakeEmbeddings(),
|
||||
location=":memory:",
|
||||
batch_size=batch_size,
|
||||
)
|
||||
|
||||
ids = docsearch.add_texts(["foo", "bar", "baz"])
|
||||
assert 3 == len(ids)
|
||||
assert 3 == len(set(ids))
|
||||
|
||||
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
@pytest.mark.parametrize("content_payload_key", [Qdrant.CONTENT_KEY, "foo"])
|
||||
@pytest.mark.parametrize("metadata_payload_key", [Qdrant.METADATA_KEY, "bar"])
|
||||
def test_qdrant_with_metadatas(
|
||||
batch_size: int, content_payload_key: str, metadata_payload_key: str
|
||||
) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [{"page": i} for i in range(len(texts))]
|
||||
docsearch = Qdrant.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
metadatas=metadatas,
|
||||
location=":memory:",
|
||||
content_payload_key=content_payload_key,
|
||||
metadata_payload_key=metadata_payload_key,
|
||||
batch_size=batch_size,
|
||||
)
|
||||
output = docsearch.similarity_search("foo", k=1)
|
||||
assert output == [Document(page_content="foo", metadata={"page": 0})]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
def test_qdrant_similarity_search_filters(batch_size: int) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [
|
||||
{"page": i, "metadata": {"page": i + 1, "pages": [i + 2, -1]}}
|
||||
for i in range(len(texts))
|
||||
]
|
||||
docsearch = Qdrant.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
metadatas=metadatas,
|
||||
location=":memory:",
|
||||
batch_size=batch_size,
|
||||
)
|
||||
|
||||
output = docsearch.similarity_search(
|
||||
"foo", k=1, filter={"page": 1, "metadata": {"page": 2, "pages": [3]}}
|
||||
)
|
||||
assert output == [
|
||||
Document(
|
||||
page_content="bar",
|
||||
metadata={"page": 1, "metadata": {"page": 2, "pages": [3, -1]}},
|
||||
)
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
|
||||
def test_qdrant_similarity_search_with_relevance_score_no_threshold(
|
||||
vector_name: Optional[str],
|
||||
) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [
|
||||
{"page": i, "metadata": {"page": i + 1, "pages": [i + 2, -1]}}
|
||||
for i in range(len(texts))
|
||||
]
|
||||
docsearch = Qdrant.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
metadatas=metadatas,
|
||||
location=":memory:",
|
||||
vector_name=vector_name,
|
||||
)
|
||||
output = docsearch.similarity_search_with_relevance_scores(
|
||||
"foo", k=3, score_threshold=None
|
||||
)
|
||||
assert len(output) == 3
|
||||
for i in range(len(output)):
|
||||
assert round(output[i][1], 2) >= 0
|
||||
assert round(output[i][1], 2) <= 1
|
||||
|
||||
|
||||
def test_qdrant_similarity_search_with_relevance_score_with_threshold() -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [
|
||||
{"page": i, "metadata": {"page": i + 1, "pages": [i + 2, -1]}}
|
||||
for i in range(len(texts))
|
||||
]
|
||||
docsearch = Qdrant.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
metadatas=metadatas,
|
||||
location=":memory:",
|
||||
)
|
||||
|
||||
score_threshold = 0.98
|
||||
kwargs = {"score_threshold": score_threshold}
|
||||
output = docsearch.similarity_search_with_relevance_scores("foo", k=3, **kwargs)
|
||||
assert len(output) == 1
|
||||
assert all([score >= score_threshold for _, score in output])
|
||||
|
||||
|
||||
def test_qdrant_similarity_search_with_relevance_score_with_threshold_and_filter() -> (
|
||||
None
|
||||
):
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [
|
||||
{"page": i, "metadata": {"page": i + 1, "pages": [i + 2, -1]}}
|
||||
for i in range(len(texts))
|
||||
]
|
||||
docsearch = Qdrant.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
metadatas=metadatas,
|
||||
location=":memory:",
|
||||
)
|
||||
score_threshold = 0.99 # for almost exact match
|
||||
# test negative filter condition
|
||||
negative_filter = {"page": 1, "metadata": {"page": 2, "pages": [3]}}
|
||||
kwargs = {"filter": negative_filter, "score_threshold": score_threshold}
|
||||
output = docsearch.similarity_search_with_relevance_scores("foo", k=3, **kwargs)
|
||||
assert len(output) == 0
|
||||
# test positive filter condition
|
||||
positive_filter = {"page": 0, "metadata": {"page": 1, "pages": [2]}}
|
||||
kwargs = {"filter": positive_filter, "score_threshold": score_threshold}
|
||||
output = docsearch.similarity_search_with_relevance_scores("foo", k=3, **kwargs)
|
||||
assert len(output) == 1
|
||||
assert all([score >= score_threshold for _, score in output])
|
||||
|
||||
|
||||
def test_qdrant_similarity_search_filters_with_qdrant_filters() -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [
|
||||
{"page": i, "details": {"page": i + 1, "pages": [i + 2, -1]}}
|
||||
for i in range(len(texts))
|
||||
]
|
||||
docsearch = Qdrant.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
metadatas=metadatas,
|
||||
location=":memory:",
|
||||
)
|
||||
|
||||
qdrant_filter = rest.Filter(
|
||||
must=[
|
||||
rest.FieldCondition(
|
||||
key="metadata.page",
|
||||
match=rest.MatchValue(value=1),
|
||||
),
|
||||
rest.FieldCondition(
|
||||
key="metadata.details.page",
|
||||
match=rest.MatchValue(value=2),
|
||||
),
|
||||
rest.FieldCondition(
|
||||
key="metadata.details.pages",
|
||||
match=rest.MatchAny(any=[3]),
|
||||
),
|
||||
]
|
||||
)
|
||||
output = docsearch.similarity_search("foo", k=1, filter=qdrant_filter)
|
||||
assert output == [
|
||||
Document(
|
||||
page_content="bar",
|
||||
metadata={"page": 1, "details": {"page": 2, "pages": [3, -1]}},
|
||||
)
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
@pytest.mark.parametrize("content_payload_key", [Qdrant.CONTENT_KEY, "test_content"])
|
||||
@pytest.mark.parametrize("metadata_payload_key", [Qdrant.METADATA_KEY, "test_metadata"])
|
||||
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
|
||||
def test_qdrant_max_marginal_relevance_search(
|
||||
batch_size: int,
|
||||
content_payload_key: str,
|
||||
metadata_payload_key: str,
|
||||
vector_name: Optional[str],
|
||||
) -> None:
|
||||
"""Test end to end construction and MRR search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [{"page": i} for i in range(len(texts))]
|
||||
docsearch = Qdrant.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
metadatas=metadatas,
|
||||
location=":memory:",
|
||||
content_payload_key=content_payload_key,
|
||||
metadata_payload_key=metadata_payload_key,
|
||||
batch_size=batch_size,
|
||||
vector_name=vector_name,
|
||||
)
|
||||
output = docsearch.max_marginal_relevance_search("foo", k=2, fetch_k=3)
|
||||
assert output == [
|
||||
Document(page_content="foo", metadata={"page": 0}),
|
||||
Document(page_content="bar", metadata={"page": 1}),
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
["embeddings", "embedding_function"],
|
||||
[
|
||||
(ConsistentFakeEmbeddings(), None),
|
||||
(ConsistentFakeEmbeddings().embed_query, None),
|
||||
(None, ConsistentFakeEmbeddings().embed_query),
|
||||
],
|
||||
)
|
||||
def test_qdrant_embedding_interface(
|
||||
embeddings: Optional[Embeddings], embedding_function: Optional[Callable]
|
||||
) -> None:
|
||||
"""Test Qdrant may accept different types for embeddings."""
|
||||
from qdrant_client import QdrantClient
|
||||
|
||||
client = QdrantClient(":memory:")
|
||||
collection_name = "test"
|
||||
|
||||
Qdrant(
|
||||
client,
|
||||
collection_name,
|
||||
embeddings=embeddings,
|
||||
embedding_function=embedding_function,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
["embeddings", "embedding_function"],
|
||||
[
|
||||
(ConsistentFakeEmbeddings(), ConsistentFakeEmbeddings().embed_query),
|
||||
(None, None),
|
||||
],
|
||||
)
|
||||
def test_qdrant_embedding_interface_raises_value_error(
|
||||
embeddings: Optional[Embeddings], embedding_function: Optional[Callable]
|
||||
) -> None:
|
||||
"""Test Qdrant requires only one method for embeddings."""
|
||||
from qdrant_client import QdrantClient
|
||||
|
||||
client = QdrantClient(":memory:")
|
||||
collection_name = "test"
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
Qdrant(
|
||||
client,
|
||||
collection_name,
|
||||
embeddings=embeddings,
|
||||
embedding_function=embedding_function,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
|
||||
def test_qdrant_add_texts_stores_duplicated_texts(vector_name: Optional[str]) -> None:
|
||||
"""Test end to end Qdrant.add_texts stores duplicated texts separately."""
|
||||
from qdrant_client import QdrantClient
|
||||
from qdrant_client.http import models as rest
|
||||
|
||||
client = QdrantClient(":memory:")
|
||||
collection_name = "test"
|
||||
vectors_config = rest.VectorParams(size=10, distance=rest.Distance.COSINE)
|
||||
if vector_name is not None:
|
||||
vectors_config = {vector_name: vectors_config} # type: ignore[assignment]
|
||||
client.recreate_collection(collection_name, vectors_config=vectors_config)
|
||||
|
||||
vec_store = Qdrant(
|
||||
client,
|
||||
collection_name,
|
||||
embeddings=ConsistentFakeEmbeddings(),
|
||||
vector_name=vector_name,
|
||||
)
|
||||
ids = vec_store.add_texts(["abc", "abc"], [{"a": 1}, {"a": 2}])
|
||||
|
||||
assert 2 == len(set(ids))
|
||||
assert 2 == client.count(collection_name).count
|
||||
|
||||
|
||||
def test_qdrant_from_texts_stores_duplicated_texts() -> None:
|
||||
"""Test end to end Qdrant.from_texts stores duplicated texts separately."""
|
||||
from qdrant_client import QdrantClient
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
vec_store = Qdrant.from_texts(
|
||||
["abc", "abc"],
|
||||
ConsistentFakeEmbeddings(),
|
||||
collection_name="test",
|
||||
path=str(tmpdir),
|
||||
)
|
||||
del vec_store
|
||||
|
||||
client = QdrantClient(path=str(tmpdir))
|
||||
assert 2 == client.count("test").count
|
||||
|
||||
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
|
||||
def test_qdrant_from_texts_stores_ids(
|
||||
batch_size: int, vector_name: Optional[str]
|
||||
) -> None:
|
||||
"""Test end to end Qdrant.from_texts stores provided ids."""
|
||||
from qdrant_client import QdrantClient
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
ids = [
|
||||
"fa38d572-4c31-4579-aedc-1960d79df6df",
|
||||
"cdc1aa36-d6ab-4fb2-8a94-56674fd27484",
|
||||
]
|
||||
vec_store = Qdrant.from_texts(
|
||||
["abc", "def"],
|
||||
ConsistentFakeEmbeddings(),
|
||||
ids=ids,
|
||||
collection_name="test",
|
||||
path=str(tmpdir),
|
||||
batch_size=batch_size,
|
||||
vector_name=vector_name,
|
||||
)
|
||||
del vec_store
|
||||
|
||||
client = QdrantClient(path=str(tmpdir))
|
||||
assert 2 == client.count("test").count
|
||||
stored_ids = [point.id for point in client.scroll("test")[0]]
|
||||
assert set(ids) == set(stored_ids)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
def test_qdrant_add_texts_stores_ids(batch_size: int) -> None:
|
||||
"""Test end to end Qdrant.add_texts stores provided ids."""
|
||||
from qdrant_client import QdrantClient
|
||||
|
||||
ids = [
|
||||
"fa38d572-4c31-4579-aedc-1960d79df6df",
|
||||
"cdc1aa36-d6ab-4fb2-8a94-56674fd27484",
|
||||
]
|
||||
|
||||
client = QdrantClient(":memory:")
|
||||
collection_name = "test"
|
||||
client.recreate_collection(
|
||||
collection_name,
|
||||
vectors_config=rest.VectorParams(size=10, distance=rest.Distance.COSINE),
|
||||
)
|
||||
|
||||
vec_store = Qdrant(client, "test", ConsistentFakeEmbeddings())
|
||||
returned_ids = vec_store.add_texts(["abc", "def"], ids=ids, batch_size=batch_size)
|
||||
|
||||
assert all(first == second for first, second in zip(ids, returned_ids))
|
||||
assert 2 == client.count("test").count
|
||||
stored_ids = [point.id for point in client.scroll("test")[0]]
|
||||
assert set(ids) == set(stored_ids)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("vector_name", ["custom-vector"])
|
||||
def test_qdrant_from_texts_stores_embeddings_as_named_vectors(vector_name: str) -> None:
|
||||
"""Test end to end Qdrant.from_texts stores named vectors if name is provided."""
|
||||
from qdrant_client import QdrantClient
|
||||
|
||||
collection_name = "test"
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
vec_store = Qdrant.from_texts(
|
||||
["lorem", "ipsum", "dolor", "sit", "amet"],
|
||||
ConsistentFakeEmbeddings(),
|
||||
collection_name=collection_name,
|
||||
path=str(tmpdir),
|
||||
vector_name=vector_name,
|
||||
)
|
||||
del vec_store
|
||||
|
||||
client = QdrantClient(path=str(tmpdir))
|
||||
assert 5 == client.count("test").count
|
||||
assert all(
|
||||
vector_name in point.vector # type: ignore[operator]
|
||||
for point in client.scroll(collection_name, with_vectors=True)[0]
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("vector_name", ["custom-vector"])
|
||||
def test_qdrant_add_texts_stores_embeddings_as_named_vectors(vector_name: str) -> None:
|
||||
"""Test end to end Qdrant.add_texts stores named vectors if name is provided."""
|
||||
from qdrant_client import QdrantClient
|
||||
|
||||
collection_name = "test"
|
||||
|
||||
client = QdrantClient(":memory:")
|
||||
client.recreate_collection(
|
||||
collection_name,
|
||||
vectors_config={
|
||||
vector_name: rest.VectorParams(size=10, distance=rest.Distance.COSINE)
|
||||
},
|
||||
)
|
||||
|
||||
vec_store = Qdrant(
|
||||
client,
|
||||
collection_name,
|
||||
ConsistentFakeEmbeddings(),
|
||||
vector_name=vector_name,
|
||||
)
|
||||
vec_store.add_texts(["lorem", "ipsum", "dolor", "sit", "amet"])
|
||||
|
||||
assert 5 == client.count("test").count
|
||||
assert all(
|
||||
vector_name in point.vector # type: ignore[operator]
|
||||
for point in client.scroll(collection_name, with_vectors=True)[0]
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
@pytest.mark.parametrize("content_payload_key", [Qdrant.CONTENT_KEY, "foo"])
|
||||
@pytest.mark.parametrize("metadata_payload_key", [Qdrant.METADATA_KEY, "bar"])
|
||||
def test_qdrant_similarity_search_with_relevance_scores(
|
||||
batch_size: int, content_payload_key: str, metadata_payload_key: str
|
||||
) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
docsearch = Qdrant.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
location=":memory:",
|
||||
content_payload_key=content_payload_key,
|
||||
metadata_payload_key=metadata_payload_key,
|
||||
batch_size=batch_size,
|
||||
)
|
||||
output = docsearch.similarity_search_with_relevance_scores("foo", k=3)
|
||||
|
||||
assert all(
|
||||
(1 >= score or np.isclose(score, 1)) and score >= 0 for _, score in output
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("vector_name", [None, "custom-vector"])
|
||||
def test_qdrant_from_texts_reuses_same_collection(vector_name: Optional[str]) -> None:
|
||||
"""Test if Qdrant.from_texts reuses the same collection"""
|
||||
from qdrant_client import QdrantClient
|
||||
|
||||
collection_name = "test"
|
||||
embeddings = ConsistentFakeEmbeddings()
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
vec_store = Qdrant.from_texts(
|
||||
["lorem", "ipsum", "dolor", "sit", "amet"],
|
||||
embeddings,
|
||||
collection_name=collection_name,
|
||||
path=str(tmpdir),
|
||||
vector_name=vector_name,
|
||||
)
|
||||
del vec_store
|
||||
|
||||
vec_store = Qdrant.from_texts(
|
||||
["foo", "bar"],
|
||||
embeddings,
|
||||
collection_name=collection_name,
|
||||
path=str(tmpdir),
|
||||
vector_name=vector_name,
|
||||
)
|
||||
del vec_store
|
||||
|
||||
client = QdrantClient(path=str(tmpdir))
|
||||
assert 7 == client.count(collection_name).count
|
||||
|
||||
|
||||
@pytest.mark.parametrize("vector_name", [None, "custom-vector"])
|
||||
def test_qdrant_from_texts_raises_error_on_different_dimensionality(
|
||||
vector_name: Optional[str],
|
||||
) -> None:
|
||||
"""Test if Qdrant.from_texts raises an exception if dimensionality does not match"""
|
||||
collection_name = "test"
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
vec_store = Qdrant.from_texts(
|
||||
["lorem", "ipsum", "dolor", "sit", "amet"],
|
||||
ConsistentFakeEmbeddings(dimensionality=10),
|
||||
collection_name=collection_name,
|
||||
path=str(tmpdir),
|
||||
vector_name=vector_name,
|
||||
)
|
||||
del vec_store
|
||||
|
||||
with pytest.raises(QdrantException):
|
||||
Qdrant.from_texts(
|
||||
["foo", "bar"],
|
||||
ConsistentFakeEmbeddings(dimensionality=5),
|
||||
collection_name=collection_name,
|
||||
path=str(tmpdir),
|
||||
vector_name=vector_name,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
["first_vector_name", "second_vector_name"],
|
||||
[
|
||||
(None, "custom-vector"),
|
||||
("custom-vector", None),
|
||||
("my-first-vector", "my-second_vector"),
|
||||
],
|
||||
)
|
||||
def test_qdrant_from_texts_raises_error_on_different_vector_name(
|
||||
first_vector_name: Optional[str],
|
||||
second_vector_name: Optional[str],
|
||||
) -> None:
|
||||
"""Test if Qdrant.from_texts raises an exception if vector name does not match"""
|
||||
collection_name = "test"
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
vec_store = Qdrant.from_texts(
|
||||
["lorem", "ipsum", "dolor", "sit", "amet"],
|
||||
ConsistentFakeEmbeddings(dimensionality=10),
|
||||
collection_name=collection_name,
|
||||
path=str(tmpdir),
|
||||
vector_name=first_vector_name,
|
||||
)
|
||||
del vec_store
|
||||
|
||||
with pytest.raises(QdrantException):
|
||||
Qdrant.from_texts(
|
||||
["foo", "bar"],
|
||||
ConsistentFakeEmbeddings(dimensionality=5),
|
||||
collection_name=collection_name,
|
||||
path=str(tmpdir),
|
||||
vector_name=second_vector_name,
|
||||
)
|
||||
|
||||
|
||||
def test_qdrant_from_texts_raises_error_on_different_distance() -> None:
|
||||
"""Test if Qdrant.from_texts raises an exception if distance does not match"""
|
||||
collection_name = "test"
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
vec_store = Qdrant.from_texts(
|
||||
["lorem", "ipsum", "dolor", "sit", "amet"],
|
||||
ConsistentFakeEmbeddings(dimensionality=10),
|
||||
collection_name=collection_name,
|
||||
path=str(tmpdir),
|
||||
distance_func="Cosine",
|
||||
)
|
||||
del vec_store
|
||||
|
||||
with pytest.raises(QdrantException):
|
||||
Qdrant.from_texts(
|
||||
["foo", "bar"],
|
||||
ConsistentFakeEmbeddings(dimensionality=5),
|
||||
collection_name=collection_name,
|
||||
path=str(tmpdir),
|
||||
distance_func="Euclid",
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("vector_name", [None, "custom-vector"])
|
||||
def test_qdrant_from_texts_recreates_collection_on_force_recreate(
|
||||
vector_name: Optional[str],
|
||||
) -> None:
|
||||
"""Test if Qdrant.from_texts recreates the collection even if config mismatches"""
|
||||
from qdrant_client import QdrantClient
|
||||
|
||||
collection_name = "test"
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
vec_store = Qdrant.from_texts(
|
||||
["lorem", "ipsum", "dolor", "sit", "amet"],
|
||||
ConsistentFakeEmbeddings(dimensionality=10),
|
||||
collection_name=collection_name,
|
||||
path=str(tmpdir),
|
||||
vector_name=vector_name,
|
||||
)
|
||||
del vec_store
|
||||
|
||||
vec_store = Qdrant.from_texts(
|
||||
["foo", "bar"],
|
||||
ConsistentFakeEmbeddings(dimensionality=5),
|
||||
collection_name=collection_name,
|
||||
path=str(tmpdir),
|
||||
vector_name=vector_name,
|
||||
force_recreate=True,
|
||||
)
|
||||
del vec_store
|
||||
|
||||
client = QdrantClient(path=str(tmpdir))
|
||||
assert 2 == client.count(collection_name).count
|
Loading…
Reference in New Issue