mirror of
https://github.com/hwchase17/langchain
synced 2024-11-10 01:10:59 +00:00
qdrant: test new QdrantVectorStore (#24165)
## Description This PR adds integration tests to follow up on #24164. By default, the tests use an in-memory instance. To run the full suite of tests, with both in-memory and Qdrant server: ``` $ docker run -p 6333:6333 qdrant/qdrant $ make test $ make integration_test ``` --------- Co-authored-by: Erick Friis <erick@langchain.dev>
This commit is contained in:
parent
f071581aea
commit
a653b209ba
@ -4,6 +4,8 @@ import requests # type: ignore
|
||||
from langchain_core.documents import Document
|
||||
from langchain_core.embeddings import Embeddings
|
||||
|
||||
from langchain_qdrant import SparseEmbeddings, SparseVector
|
||||
|
||||
|
||||
def qdrant_running_locally() -> bool:
|
||||
"""Check if Qdrant is running at http://localhost:6333."""
|
||||
@ -55,3 +57,29 @@ class ConsistentFakeEmbeddings(Embeddings):
|
||||
"""Return consistent embeddings for the text, if seen before, or a constant
|
||||
one if the text is unknown."""
|
||||
return self.embed_documents([text])[0]
|
||||
|
||||
|
||||
class ConsistentFakeSparseEmbeddings(SparseEmbeddings):
|
||||
"""Fake sparse embeddings which remembers all the texts seen so far "
|
||||
"to return consistent vectors for the same texts."""
|
||||
|
||||
def __init__(self, dimensionality: int = 25) -> None:
|
||||
self.known_texts: List[str] = []
|
||||
self.dimensionality = 25
|
||||
|
||||
def embed_documents(self, texts: List[str]) -> List[SparseVector]:
|
||||
"""Return consistent embeddings for each text seen so far."""
|
||||
out_vectors = []
|
||||
for text in texts:
|
||||
if text not in self.known_texts:
|
||||
self.known_texts.append(text)
|
||||
index = self.known_texts.index(text)
|
||||
indices = [i + index for i in range(self.dimensionality)]
|
||||
values = [1.0] * (self.dimensionality - 1) + [float(index)]
|
||||
out_vectors.append(SparseVector(indices=indices, values=values))
|
||||
return out_vectors
|
||||
|
||||
def embed_query(self, text: str) -> SparseVector:
|
||||
"""Return consistent embeddings for the text, "
|
||||
"if seen before, or a constant one if the text is unknown."""
|
||||
return self.embed_documents([text])[0]
|
||||
|
@ -5,8 +5,8 @@ from qdrant_client import QdrantClient
|
||||
from tests.integration_tests.fixtures import qdrant_locations
|
||||
|
||||
|
||||
def pytest_sessionfinish() -> None:
|
||||
"""Clean up all collections after the test session."""
|
||||
def pytest_runtest_teardown() -> None:
|
||||
"""Clean up all collections after the each test."""
|
||||
for location in qdrant_locations():
|
||||
client = QdrantClient(location=location, api_key=os.getenv("QDRANT_API_KEY"))
|
||||
collections = client.get_collections().collections
|
||||
|
@ -2,6 +2,7 @@ import logging
|
||||
import os
|
||||
from typing import List
|
||||
|
||||
from langchain_qdrant.qdrant import RetrievalMode
|
||||
from tests.integration_tests.common import qdrant_running_locally
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@ -23,3 +24,20 @@ def qdrant_locations(use_in_memory: bool = True) -> List[str]:
|
||||
locations.append(qdrant_url)
|
||||
|
||||
return locations
|
||||
|
||||
|
||||
def retrieval_modes(
|
||||
*, dense: bool = True, sparse: bool = True, hybrid: bool = True
|
||||
) -> List[RetrievalMode]:
|
||||
modes = []
|
||||
|
||||
if dense:
|
||||
modes.append(RetrievalMode.DENSE)
|
||||
|
||||
if sparse:
|
||||
modes.append(RetrievalMode.SPARSE)
|
||||
|
||||
if hybrid:
|
||||
modes.append(RetrievalMode.HYBRID)
|
||||
|
||||
return modes
|
||||
|
@ -0,0 +1,143 @@
|
||||
import uuid
|
||||
from typing import List, Union
|
||||
|
||||
import pytest
|
||||
from langchain_core.documents import Document
|
||||
from qdrant_client import QdrantClient, models
|
||||
|
||||
from langchain_qdrant import QdrantVectorStore, RetrievalMode
|
||||
from tests.integration_tests.common import (
|
||||
ConsistentFakeEmbeddings,
|
||||
ConsistentFakeSparseEmbeddings,
|
||||
assert_documents_equals,
|
||||
)
|
||||
from tests.integration_tests.fixtures import qdrant_locations, retrieval_modes
|
||||
|
||||
|
||||
@pytest.mark.parametrize("location", qdrant_locations())
|
||||
@pytest.mark.parametrize("vector_name", ["", "my-vector"])
|
||||
@pytest.mark.parametrize("retrieval_mode", retrieval_modes())
|
||||
@pytest.mark.parametrize(
|
||||
"sparse_vector_name", ["my-sparse-vector", "another-sparse-vector"]
|
||||
)
|
||||
def test_qdrant_add_documents_extends_existing_collection(
|
||||
location: str,
|
||||
vector_name: str,
|
||||
retrieval_mode: RetrievalMode,
|
||||
sparse_vector_name: str,
|
||||
) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
docsearch = QdrantVectorStore.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
location=location,
|
||||
vector_name=vector_name,
|
||||
retrieval_mode=retrieval_mode,
|
||||
sparse_vector_name=sparse_vector_name,
|
||||
sparse_embedding=ConsistentFakeSparseEmbeddings(),
|
||||
)
|
||||
|
||||
new_texts = ["foobar", "foobaz"]
|
||||
docsearch.add_documents([Document(page_content=content) for content in new_texts])
|
||||
output = docsearch.similarity_search("foobar", k=1)
|
||||
assert_documents_equals(output, [Document(page_content="foobar")])
|
||||
|
||||
|
||||
@pytest.mark.parametrize("location", qdrant_locations())
|
||||
@pytest.mark.parametrize("vector_name", ["", "my-vector"])
|
||||
@pytest.mark.parametrize("retrieval_mode", retrieval_modes())
|
||||
@pytest.mark.parametrize(
|
||||
"sparse_vector_name", ["my-sparse-vector", "another-sparse-vector"]
|
||||
)
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
def test_qdrant_add_texts_returns_all_ids(
|
||||
location: str,
|
||||
vector_name: str,
|
||||
retrieval_mode: RetrievalMode,
|
||||
sparse_vector_name: str,
|
||||
batch_size: int,
|
||||
) -> None:
|
||||
"""Test end to end Qdrant.add_texts returns unique ids."""
|
||||
docsearch = QdrantVectorStore.from_texts(
|
||||
["foobar"],
|
||||
ConsistentFakeEmbeddings(),
|
||||
location=location,
|
||||
vector_name=vector_name,
|
||||
retrieval_mode=retrieval_mode,
|
||||
sparse_vector_name=sparse_vector_name,
|
||||
sparse_embedding=ConsistentFakeSparseEmbeddings(),
|
||||
batch_size=batch_size,
|
||||
)
|
||||
|
||||
ids = docsearch.add_texts(["foo", "bar", "baz"])
|
||||
assert 3 == len(ids)
|
||||
assert 3 == len(set(ids))
|
||||
assert 3 == len(docsearch.get_by_ids(ids))
|
||||
|
||||
|
||||
@pytest.mark.parametrize("location", qdrant_locations())
|
||||
@pytest.mark.parametrize("vector_name", ["", "my-vector"])
|
||||
def test_qdrant_add_texts_stores_duplicated_texts(
|
||||
location: str,
|
||||
vector_name: str,
|
||||
) -> None:
|
||||
"""Test end to end Qdrant.add_texts stores duplicated texts separately."""
|
||||
|
||||
client = QdrantClient(location)
|
||||
collection_name = uuid.uuid4().hex
|
||||
vectors_config = {
|
||||
vector_name: models.VectorParams(size=10, distance=models.Distance.COSINE)
|
||||
}
|
||||
client.recreate_collection(collection_name, vectors_config=vectors_config)
|
||||
|
||||
vec_store = QdrantVectorStore(
|
||||
client,
|
||||
collection_name,
|
||||
embedding=ConsistentFakeEmbeddings(),
|
||||
vector_name=vector_name,
|
||||
)
|
||||
ids = vec_store.add_texts(["abc", "abc"], [{"a": 1}, {"a": 2}])
|
||||
|
||||
assert 2 == len(set(ids))
|
||||
assert 2 == client.count(collection_name).count
|
||||
|
||||
|
||||
@pytest.mark.parametrize("location", qdrant_locations())
|
||||
@pytest.mark.parametrize("vector_name", ["", "my-vector"])
|
||||
@pytest.mark.parametrize("retrieval_mode", retrieval_modes())
|
||||
@pytest.mark.parametrize(
|
||||
"sparse_vector_name", ["my-sparse-vector", "another-sparse-vector"]
|
||||
)
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
def test_qdrant_add_texts_stores_ids(
|
||||
location: str,
|
||||
vector_name: str,
|
||||
retrieval_mode: RetrievalMode,
|
||||
sparse_vector_name: str,
|
||||
batch_size: int,
|
||||
) -> None:
|
||||
"""Test end to end Qdrant.add_texts stores provided ids."""
|
||||
ids: List[Union[str, int]] = [
|
||||
"fa38d572-4c31-4579-aedc-1960d79df6df",
|
||||
432,
|
||||
432145435,
|
||||
]
|
||||
collection_name = uuid.uuid4().hex
|
||||
vec_store = QdrantVectorStore.from_texts(
|
||||
["abc", "def", "ghi"],
|
||||
ConsistentFakeEmbeddings(),
|
||||
ids=ids,
|
||||
collection_name=collection_name,
|
||||
location=location,
|
||||
vector_name=vector_name,
|
||||
retrieval_mode=retrieval_mode,
|
||||
sparse_vector_name=sparse_vector_name,
|
||||
sparse_embedding=ConsistentFakeSparseEmbeddings(),
|
||||
batch_size=batch_size,
|
||||
)
|
||||
|
||||
assert 3 == vec_store.client.count(collection_name).count
|
||||
stored_ids = [point.id for point in vec_store.client.scroll(collection_name)[0]]
|
||||
assert set(ids) == set(stored_ids)
|
||||
assert 3 == len(vec_store.get_by_ids(ids))
|
@ -0,0 +1,51 @@
|
||||
import uuid
|
||||
|
||||
import pytest
|
||||
|
||||
from langchain_qdrant.qdrant import QdrantVectorStore, RetrievalMode
|
||||
from tests.integration_tests.common import (
|
||||
ConsistentFakeEmbeddings,
|
||||
ConsistentFakeSparseEmbeddings,
|
||||
)
|
||||
from tests.integration_tests.fixtures import qdrant_locations, retrieval_modes
|
||||
|
||||
|
||||
@pytest.mark.parametrize("location", qdrant_locations(use_in_memory=False))
|
||||
@pytest.mark.parametrize("vector_name", ["", "my-vector"])
|
||||
@pytest.mark.parametrize("retrieval_mode", retrieval_modes())
|
||||
@pytest.mark.parametrize(
|
||||
"sparse_vector_name", ["my-sparse-vector", "another-sparse-vector"]
|
||||
)
|
||||
def test_qdrant_from_existing_collection_uses_same_collection(
|
||||
location: str,
|
||||
vector_name: str,
|
||||
retrieval_mode: RetrievalMode,
|
||||
sparse_vector_name: str,
|
||||
) -> None:
|
||||
"""Test if the QdrantVectorStore.from_existing_collection reuses the collection."""
|
||||
|
||||
collection_name = uuid.uuid4().hex
|
||||
docs = ["foo"]
|
||||
QdrantVectorStore.from_texts(
|
||||
docs,
|
||||
embedding=ConsistentFakeEmbeddings(),
|
||||
collection_name=collection_name,
|
||||
location=location,
|
||||
vector_name=vector_name,
|
||||
retrieval_mode=retrieval_mode,
|
||||
sparse_vector_name=sparse_vector_name,
|
||||
sparse_embedding=ConsistentFakeSparseEmbeddings(),
|
||||
)
|
||||
|
||||
qdrant = QdrantVectorStore.from_existing_collection(
|
||||
collection_name,
|
||||
embedding=ConsistentFakeEmbeddings(),
|
||||
location=location,
|
||||
vector_name=vector_name,
|
||||
retrieval_mode=retrieval_mode,
|
||||
sparse_vector_name=sparse_vector_name,
|
||||
sparse_embedding=ConsistentFakeSparseEmbeddings(),
|
||||
)
|
||||
qdrant.add_texts(["baz", "bar"])
|
||||
|
||||
assert 3 == qdrant.client.count(collection_name).count
|
@ -0,0 +1,385 @@
|
||||
import uuid
|
||||
from typing import List, Union
|
||||
|
||||
import pytest
|
||||
from langchain_core.documents import Document
|
||||
from qdrant_client import models
|
||||
|
||||
from langchain_qdrant import QdrantVectorStore, RetrievalMode
|
||||
from langchain_qdrant.qdrant import QdrantVectorStoreError
|
||||
from tests.integration_tests.common import (
|
||||
ConsistentFakeEmbeddings,
|
||||
ConsistentFakeSparseEmbeddings,
|
||||
assert_documents_equals,
|
||||
)
|
||||
from tests.integration_tests.fixtures import qdrant_locations, retrieval_modes
|
||||
|
||||
|
||||
@pytest.mark.parametrize("location", qdrant_locations())
|
||||
@pytest.mark.parametrize("retrieval_mode", retrieval_modes())
|
||||
def test_vectorstore_from_texts(location: str, retrieval_mode: RetrievalMode) -> None:
|
||||
"""Test end to end Qdrant.from_texts stores texts."""
|
||||
collection_name = uuid.uuid4().hex
|
||||
|
||||
vec_store = QdrantVectorStore.from_texts(
|
||||
["Lorem ipsum dolor sit amet", "Ipsum dolor sit amet"],
|
||||
ConsistentFakeEmbeddings(),
|
||||
collection_name=collection_name,
|
||||
location=location,
|
||||
retrieval_mode=retrieval_mode,
|
||||
sparse_embedding=ConsistentFakeSparseEmbeddings(),
|
||||
)
|
||||
|
||||
assert 2 == vec_store.client.count(collection_name).count
|
||||
|
||||
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
@pytest.mark.parametrize("vector_name", ["", "my-vector"])
|
||||
@pytest.mark.parametrize(
|
||||
"sparse_vector_name", ["my-sparse-vector", "another-sparse-vector"]
|
||||
)
|
||||
@pytest.mark.parametrize("location", qdrant_locations())
|
||||
@pytest.mark.parametrize("retrieval_mode", retrieval_modes())
|
||||
def test_qdrant_from_texts_stores_ids(
|
||||
batch_size: int,
|
||||
vector_name: str,
|
||||
sparse_vector_name: str,
|
||||
location: str,
|
||||
retrieval_mode: RetrievalMode,
|
||||
) -> None:
|
||||
"""Test end to end Qdrant.from_texts stores provided ids."""
|
||||
collection_name = uuid.uuid4().hex
|
||||
ids: List[Union[str, int]] = [
|
||||
"fa38d572-4c31-4579-aedc-1960d79df6df",
|
||||
786,
|
||||
]
|
||||
vec_store = QdrantVectorStore.from_texts(
|
||||
["abc", "def"],
|
||||
ConsistentFakeEmbeddings(),
|
||||
ids=ids,
|
||||
collection_name=collection_name,
|
||||
location=location,
|
||||
retrieval_mode=retrieval_mode,
|
||||
sparse_embedding=ConsistentFakeSparseEmbeddings(),
|
||||
batch_size=batch_size,
|
||||
vector_name=vector_name,
|
||||
sparse_vector_name=sparse_vector_name,
|
||||
)
|
||||
|
||||
assert 2 == vec_store.client.count(collection_name).count
|
||||
stored_ids = [point.id for point in vec_store.client.retrieve(collection_name, ids)]
|
||||
assert set(ids) == set(stored_ids)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("location", qdrant_locations())
|
||||
@pytest.mark.parametrize("retrieval_mode", retrieval_modes())
|
||||
@pytest.mark.parametrize("vector_name", ["", "my-vector"])
|
||||
@pytest.mark.parametrize(
|
||||
"sparse_vector_name", ["my-sparse-vector", "another-sparse-vector"]
|
||||
)
|
||||
def test_qdrant_from_texts_stores_embeddings_as_named_vectors(
|
||||
location: str,
|
||||
retrieval_mode: RetrievalMode,
|
||||
vector_name: str,
|
||||
sparse_vector_name: str,
|
||||
) -> None:
|
||||
"""Test end to end Qdrant.from_texts stores named vectors if name is provided."""
|
||||
|
||||
collection_name = uuid.uuid4().hex
|
||||
vec_store = QdrantVectorStore.from_texts(
|
||||
["lorem", "ipsum", "dolor", "sit", "amet"],
|
||||
ConsistentFakeEmbeddings(),
|
||||
collection_name=collection_name,
|
||||
location=location,
|
||||
vector_name=vector_name,
|
||||
retrieval_mode=retrieval_mode,
|
||||
sparse_vector_name=sparse_vector_name,
|
||||
sparse_embedding=ConsistentFakeSparseEmbeddings(),
|
||||
)
|
||||
|
||||
assert 5 == vec_store.client.count(collection_name).count
|
||||
if retrieval_mode in retrieval_modes(sparse=False):
|
||||
assert all(
|
||||
(vector_name in point.vector or isinstance(point.vector, list)) # type: ignore
|
||||
for point in vec_store.client.scroll(collection_name, with_vectors=True)[0]
|
||||
)
|
||||
if retrieval_mode in retrieval_modes(dense=False):
|
||||
assert all(
|
||||
sparse_vector_name in point.vector # type: ignore
|
||||
for point in vec_store.client.scroll(collection_name, with_vectors=True)[0]
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("location", qdrant_locations(use_in_memory=False))
|
||||
@pytest.mark.parametrize("retrieval_mode", retrieval_modes())
|
||||
@pytest.mark.parametrize("vector_name", ["", "my-vector"])
|
||||
@pytest.mark.parametrize(
|
||||
"sparse_vector_name", ["my-sparse-vector", "another-sparse-vector"]
|
||||
)
|
||||
def test_qdrant_from_texts_reuses_same_collection(
|
||||
location: str,
|
||||
retrieval_mode: RetrievalMode,
|
||||
vector_name: str,
|
||||
sparse_vector_name: str,
|
||||
) -> None:
|
||||
"""Test if Qdrant.from_texts reuses the same collection"""
|
||||
collection_name = uuid.uuid4().hex
|
||||
embeddings = ConsistentFakeEmbeddings()
|
||||
sparse_embeddings = ConsistentFakeSparseEmbeddings()
|
||||
vec_store = QdrantVectorStore.from_texts(
|
||||
["lorem", "ipsum", "dolor", "sit", "amet"],
|
||||
embeddings,
|
||||
collection_name=collection_name,
|
||||
location=location,
|
||||
vector_name=vector_name,
|
||||
retrieval_mode=retrieval_mode,
|
||||
sparse_vector_name=sparse_vector_name,
|
||||
sparse_embedding=sparse_embeddings,
|
||||
)
|
||||
del vec_store
|
||||
|
||||
vec_store = QdrantVectorStore.from_texts(
|
||||
["foo", "bar"],
|
||||
embeddings,
|
||||
collection_name=collection_name,
|
||||
location=location,
|
||||
vector_name=vector_name,
|
||||
retrieval_mode=retrieval_mode,
|
||||
sparse_vector_name=sparse_vector_name,
|
||||
sparse_embedding=sparse_embeddings,
|
||||
)
|
||||
|
||||
assert 7 == vec_store.client.count(collection_name).count
|
||||
|
||||
|
||||
@pytest.mark.parametrize("location", qdrant_locations(use_in_memory=False))
|
||||
@pytest.mark.parametrize("vector_name", ["", "my-vector"])
|
||||
@pytest.mark.parametrize("retrieval_mode", retrieval_modes(sparse=False))
|
||||
def test_qdrant_from_texts_raises_error_on_different_dimensionality(
|
||||
location: str,
|
||||
vector_name: str,
|
||||
retrieval_mode: RetrievalMode,
|
||||
) -> None:
|
||||
"""Test if Qdrant.from_texts raises an exception if dimensionality does not match"""
|
||||
collection_name = uuid.uuid4().hex
|
||||
QdrantVectorStore.from_texts(
|
||||
["lorem", "ipsum", "dolor", "sit", "amet"],
|
||||
ConsistentFakeEmbeddings(dimensionality=10),
|
||||
collection_name=collection_name,
|
||||
location=location,
|
||||
vector_name=vector_name,
|
||||
retrieval_mode=retrieval_mode,
|
||||
sparse_embedding=ConsistentFakeSparseEmbeddings(),
|
||||
)
|
||||
|
||||
with pytest.raises(QdrantVectorStoreError) as excinfo:
|
||||
QdrantVectorStore.from_texts(
|
||||
["foo", "bar"],
|
||||
ConsistentFakeEmbeddings(dimensionality=5),
|
||||
collection_name=collection_name,
|
||||
location=location,
|
||||
vector_name=vector_name,
|
||||
retrieval_mode=retrieval_mode,
|
||||
sparse_embedding=ConsistentFakeSparseEmbeddings(),
|
||||
)
|
||||
|
||||
expected_message = "collection is configured for dense vectors "
|
||||
"with 10 dimensions. Selected embeddings are 5-dimensional"
|
||||
assert expected_message in str(excinfo.value)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("location", qdrant_locations(use_in_memory=False))
|
||||
@pytest.mark.parametrize(
|
||||
["first_vector_name", "second_vector_name"],
|
||||
[
|
||||
("", "custom-vector"),
|
||||
("custom-vector", ""),
|
||||
("my-first-vector", "my-second_vector"),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("retrieval_mode", retrieval_modes(sparse=False))
|
||||
def test_qdrant_from_texts_raises_error_on_different_vector_name(
|
||||
location: str,
|
||||
first_vector_name: str,
|
||||
second_vector_name: str,
|
||||
retrieval_mode: RetrievalMode,
|
||||
) -> None:
|
||||
"""Test if Qdrant.from_texts raises an exception if vector name does not match"""
|
||||
collection_name = uuid.uuid4().hex
|
||||
QdrantVectorStore.from_texts(
|
||||
["lorem", "ipsum", "dolor", "sit", "amet"],
|
||||
ConsistentFakeEmbeddings(dimensionality=10),
|
||||
collection_name=collection_name,
|
||||
location=location,
|
||||
vector_name=first_vector_name,
|
||||
retrieval_mode=retrieval_mode,
|
||||
sparse_embedding=ConsistentFakeSparseEmbeddings(),
|
||||
)
|
||||
|
||||
with pytest.raises(QdrantVectorStoreError) as excinfo:
|
||||
QdrantVectorStore.from_texts(
|
||||
["foo", "bar"],
|
||||
ConsistentFakeEmbeddings(dimensionality=10),
|
||||
collection_name=collection_name,
|
||||
location=location,
|
||||
vector_name=second_vector_name,
|
||||
retrieval_mode=retrieval_mode,
|
||||
sparse_embedding=ConsistentFakeSparseEmbeddings(),
|
||||
)
|
||||
|
||||
expected_message = "does not contain dense vector named"
|
||||
assert expected_message in str(excinfo.value)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("location", qdrant_locations(use_in_memory=False))
|
||||
@pytest.mark.parametrize("vector_name", ["", "my-vector"])
|
||||
@pytest.mark.parametrize("retrieval_mode", retrieval_modes(sparse=False))
|
||||
def test_qdrant_from_texts_raises_error_on_different_distance(
|
||||
location: str, vector_name: str, retrieval_mode: RetrievalMode
|
||||
) -> None:
|
||||
"""Test if Qdrant.from_texts raises an exception if distance does not match"""
|
||||
collection_name = uuid.uuid4().hex
|
||||
QdrantVectorStore.from_texts(
|
||||
["lorem", "ipsum", "dolor", "sit", "amet"],
|
||||
ConsistentFakeEmbeddings(),
|
||||
collection_name=collection_name,
|
||||
location=location,
|
||||
vector_name=vector_name,
|
||||
distance=models.Distance.COSINE,
|
||||
retrieval_mode=retrieval_mode,
|
||||
sparse_embedding=ConsistentFakeSparseEmbeddings(),
|
||||
)
|
||||
|
||||
with pytest.raises(QdrantVectorStoreError) as excinfo:
|
||||
QdrantVectorStore.from_texts(
|
||||
["foo", "bar"],
|
||||
ConsistentFakeEmbeddings(),
|
||||
collection_name=collection_name,
|
||||
location=location,
|
||||
vector_name=vector_name,
|
||||
distance=models.Distance.EUCLID,
|
||||
retrieval_mode=retrieval_mode,
|
||||
sparse_embedding=ConsistentFakeSparseEmbeddings(),
|
||||
)
|
||||
|
||||
expected_message = "configured for COSINE similarity, but requested EUCLID"
|
||||
assert expected_message in str(excinfo.value)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("location", qdrant_locations(use_in_memory=False))
|
||||
@pytest.mark.parametrize("vector_name", ["", "my-vector"])
|
||||
@pytest.mark.parametrize("retrieval_mode", retrieval_modes())
|
||||
@pytest.mark.parametrize(
|
||||
"sparse_vector_name", ["my-sparse-vector", "another-sparse-vector"]
|
||||
)
|
||||
def test_qdrant_from_texts_recreates_collection_on_force_recreate(
|
||||
location: str,
|
||||
vector_name: str,
|
||||
retrieval_mode: RetrievalMode,
|
||||
sparse_vector_name: str,
|
||||
) -> None:
|
||||
collection_name = uuid.uuid4().hex
|
||||
vec_store = QdrantVectorStore.from_texts(
|
||||
["lorem", "ipsum", "dolor", "sit", "amet"],
|
||||
ConsistentFakeEmbeddings(dimensionality=10),
|
||||
collection_name=collection_name,
|
||||
location=location,
|
||||
vector_name=vector_name,
|
||||
retrieval_mode=retrieval_mode,
|
||||
sparse_vector_name=sparse_vector_name,
|
||||
sparse_embedding=ConsistentFakeSparseEmbeddings(),
|
||||
)
|
||||
|
||||
vec_store = QdrantVectorStore.from_texts(
|
||||
["foo", "bar"],
|
||||
ConsistentFakeEmbeddings(dimensionality=5),
|
||||
collection_name=collection_name,
|
||||
location=location,
|
||||
vector_name=vector_name,
|
||||
retrieval_mode=retrieval_mode,
|
||||
sparse_vector_name=sparse_vector_name,
|
||||
sparse_embedding=ConsistentFakeSparseEmbeddings(),
|
||||
force_recreate=True,
|
||||
)
|
||||
|
||||
assert 2 == vec_store.client.count(collection_name).count
|
||||
|
||||
|
||||
@pytest.mark.parametrize("location", qdrant_locations())
|
||||
@pytest.mark.parametrize("content_payload_key", [QdrantVectorStore.CONTENT_KEY, "foo"])
|
||||
@pytest.mark.parametrize(
|
||||
"metadata_payload_key", [QdrantVectorStore.METADATA_KEY, "bar"]
|
||||
)
|
||||
@pytest.mark.parametrize("vector_name", ["", "my-vector"])
|
||||
@pytest.mark.parametrize("retrieval_mode", retrieval_modes())
|
||||
@pytest.mark.parametrize(
|
||||
"sparse_vector_name", ["my-sparse-vector", "another-sparse-vector"]
|
||||
)
|
||||
def test_qdrant_from_texts_stores_metadatas(
|
||||
location: str,
|
||||
content_payload_key: str,
|
||||
metadata_payload_key: str,
|
||||
vector_name: str,
|
||||
retrieval_mode: RetrievalMode,
|
||||
sparse_vector_name: str,
|
||||
) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["fabrin", "barizda"]
|
||||
metadatas = [{"page": i} for i in range(len(texts))]
|
||||
docsearch = QdrantVectorStore.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
metadatas=metadatas,
|
||||
location=location,
|
||||
content_payload_key=content_payload_key,
|
||||
metadata_payload_key=metadata_payload_key,
|
||||
vector_name=vector_name,
|
||||
retrieval_mode=retrieval_mode,
|
||||
sparse_vector_name=sparse_vector_name,
|
||||
sparse_embedding=ConsistentFakeSparseEmbeddings(),
|
||||
)
|
||||
output = docsearch.similarity_search("fabrin", k=1)
|
||||
assert_documents_equals(
|
||||
output, [Document(page_content="fabrin", metadata={"page": 0})]
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("location", qdrant_locations(use_in_memory=False))
|
||||
@pytest.mark.parametrize("vector_name", ["", "my-vector"])
|
||||
@pytest.mark.parametrize("retrieval_mode", retrieval_modes(sparse=False))
|
||||
@pytest.mark.parametrize(
|
||||
"sparse_vector_name", ["my-sparse-vector", "another-sparse-vector"]
|
||||
)
|
||||
def test_from_texts_passed_optimizers_config_and_on_disk_payload(
|
||||
location: str,
|
||||
vector_name: str,
|
||||
retrieval_mode: RetrievalMode,
|
||||
sparse_vector_name: str,
|
||||
) -> None:
|
||||
collection_name = uuid.uuid4().hex
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [{"page": i} for i in range(len(texts))]
|
||||
optimizers_config = models.OptimizersConfigDiff(memmap_threshold=1000)
|
||||
vec_store = QdrantVectorStore.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
metadatas=metadatas,
|
||||
collection_create_options={
|
||||
"on_disk_payload": True,
|
||||
"optimizers_config": optimizers_config,
|
||||
},
|
||||
vector_params={
|
||||
"on_disk": True,
|
||||
},
|
||||
collection_name=collection_name,
|
||||
location=location,
|
||||
vector_name=vector_name,
|
||||
retrieval_mode=retrieval_mode,
|
||||
sparse_vector_name=sparse_vector_name,
|
||||
sparse_embedding=ConsistentFakeSparseEmbeddings(),
|
||||
)
|
||||
|
||||
collection_info = vec_store.client.get_collection(collection_name)
|
||||
assert collection_info.config.params.vectors[vector_name].on_disk is True # type: ignore
|
||||
assert collection_info.config.optimizer_config.memmap_threshold == 1000
|
||||
assert collection_info.config.params.on_disk_payload is True
|
@ -0,0 +1,116 @@
|
||||
import pytest # type: ignore[import-not-found]
|
||||
from langchain_core.documents import Document
|
||||
from qdrant_client import models
|
||||
|
||||
from langchain_qdrant import QdrantVectorStore, RetrievalMode
|
||||
from langchain_qdrant.qdrant import QdrantVectorStoreError
|
||||
from tests.integration_tests.common import (
|
||||
ConsistentFakeEmbeddings,
|
||||
ConsistentFakeSparseEmbeddings,
|
||||
assert_documents_equals,
|
||||
)
|
||||
from tests.integration_tests.fixtures import qdrant_locations, retrieval_modes
|
||||
|
||||
|
||||
# MMR is supported when dense embeddings are available
|
||||
# i.e. In Dense and Hybrid retrieval modes
|
||||
@pytest.mark.parametrize("location", qdrant_locations())
|
||||
@pytest.mark.parametrize(
|
||||
"content_payload_key", [QdrantVectorStore.CONTENT_KEY, "test_content"]
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"metadata_payload_key", [QdrantVectorStore.METADATA_KEY, "test_metadata"]
|
||||
)
|
||||
@pytest.mark.parametrize("retrieval_mode", retrieval_modes(sparse=False))
|
||||
@pytest.mark.parametrize("vector_name", ["", "my-vector"])
|
||||
def test_qdrant_mmr_search(
|
||||
location: str,
|
||||
content_payload_key: str,
|
||||
metadata_payload_key: str,
|
||||
retrieval_mode: RetrievalMode,
|
||||
vector_name: str,
|
||||
) -> None:
|
||||
"""Test end to end construction and MRR search."""
|
||||
filter = models.Filter(
|
||||
must=[
|
||||
models.FieldCondition(
|
||||
key=f"{metadata_payload_key}.page",
|
||||
match=models.MatchValue(
|
||||
value=2,
|
||||
),
|
||||
),
|
||||
],
|
||||
)
|
||||
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [{"page": i} for i in range(len(texts))]
|
||||
docsearch = QdrantVectorStore.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
metadatas=metadatas,
|
||||
content_payload_key=content_payload_key,
|
||||
metadata_payload_key=metadata_payload_key,
|
||||
location=location,
|
||||
retrieval_mode=retrieval_mode,
|
||||
vector_name=vector_name,
|
||||
distance=models.Distance.EUCLID,
|
||||
sparse_embedding=ConsistentFakeSparseEmbeddings(),
|
||||
)
|
||||
output = docsearch.max_marginal_relevance_search(
|
||||
"foo", k=2, fetch_k=3, lambda_mult=0.0
|
||||
)
|
||||
assert_documents_equals(
|
||||
output,
|
||||
[
|
||||
Document(page_content="foo", metadata={"page": 0}),
|
||||
Document(page_content="baz", metadata={"page": 2}),
|
||||
],
|
||||
)
|
||||
|
||||
output = docsearch.max_marginal_relevance_search(
|
||||
"foo", k=2, fetch_k=3, lambda_mult=0.0, filter=filter
|
||||
)
|
||||
assert_documents_equals(
|
||||
output,
|
||||
[Document(page_content="baz", metadata={"page": 2})],
|
||||
)
|
||||
|
||||
|
||||
# MMR shouldn't work with only sparse retrieval mode
|
||||
@pytest.mark.parametrize("location", qdrant_locations())
|
||||
@pytest.mark.parametrize(
|
||||
"content_payload_key", [QdrantVectorStore.CONTENT_KEY, "test_content"]
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"metadata_payload_key", [QdrantVectorStore.METADATA_KEY, "test_metadata"]
|
||||
)
|
||||
@pytest.mark.parametrize("retrieval_mode", retrieval_modes(dense=False, hybrid=False))
|
||||
@pytest.mark.parametrize("vector_name", ["", "my-vector"])
|
||||
def test_invalid_qdrant_mmr_with_sparse(
|
||||
location: str,
|
||||
content_payload_key: str,
|
||||
metadata_payload_key: str,
|
||||
retrieval_mode: RetrievalMode,
|
||||
vector_name: str,
|
||||
) -> None:
|
||||
"""Test end to end construction and MRR search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [{"page": i} for i in range(len(texts))]
|
||||
docsearch = QdrantVectorStore.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
metadatas=metadatas,
|
||||
content_payload_key=content_payload_key,
|
||||
metadata_payload_key=metadata_payload_key,
|
||||
location=location,
|
||||
retrieval_mode=retrieval_mode,
|
||||
vector_name=vector_name,
|
||||
distance=models.Distance.EUCLID,
|
||||
sparse_embedding=ConsistentFakeSparseEmbeddings(),
|
||||
)
|
||||
|
||||
with pytest.raises(QdrantVectorStoreError) as excinfo:
|
||||
docsearch.max_marginal_relevance_search("foo", k=2, fetch_k=3, lambda_mult=0.0)
|
||||
|
||||
expected_message = "does not contain dense vector named"
|
||||
assert expected_message in str(excinfo.value)
|
@ -0,0 +1,278 @@
|
||||
import pytest
|
||||
from langchain_core.documents import Document
|
||||
from qdrant_client import models
|
||||
|
||||
from langchain_qdrant import QdrantVectorStore, RetrievalMode
|
||||
from tests.integration_tests.common import (
|
||||
ConsistentFakeEmbeddings,
|
||||
ConsistentFakeSparseEmbeddings,
|
||||
assert_documents_equals,
|
||||
)
|
||||
from tests.integration_tests.fixtures import qdrant_locations, retrieval_modes
|
||||
|
||||
|
||||
@pytest.mark.parametrize("location", qdrant_locations())
|
||||
@pytest.mark.parametrize("vector_name", ["", "my-vector"])
|
||||
@pytest.mark.parametrize("retrieval_mode", retrieval_modes())
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
def test_similarity_search(
|
||||
location: str,
|
||||
vector_name: str,
|
||||
retrieval_mode: RetrievalMode,
|
||||
batch_size: int,
|
||||
) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
docsearch = QdrantVectorStore.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
location=location,
|
||||
batch_size=batch_size,
|
||||
vector_name=vector_name,
|
||||
retrieval_mode=retrieval_mode,
|
||||
sparse_embedding=ConsistentFakeSparseEmbeddings(),
|
||||
)
|
||||
output = docsearch.similarity_search("foo", k=1)
|
||||
assert_documents_equals(actual=output, expected=[Document(page_content="foo")])
|
||||
|
||||
|
||||
@pytest.mark.parametrize("location", qdrant_locations())
|
||||
@pytest.mark.parametrize("content_payload_key", [QdrantVectorStore.CONTENT_KEY, "foo"])
|
||||
@pytest.mark.parametrize(
|
||||
"metadata_payload_key", [QdrantVectorStore.METADATA_KEY, "bar"]
|
||||
)
|
||||
@pytest.mark.parametrize("vector_name", ["", "my-vector"])
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
def test_similarity_search_by_vector(
|
||||
location: str,
|
||||
content_payload_key: str,
|
||||
metadata_payload_key: str,
|
||||
vector_name: str,
|
||||
batch_size: int,
|
||||
) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
docsearch = QdrantVectorStore.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
location=location,
|
||||
content_payload_key=content_payload_key,
|
||||
metadata_payload_key=metadata_payload_key,
|
||||
batch_size=batch_size,
|
||||
vector_name=vector_name,
|
||||
)
|
||||
embeddings = ConsistentFakeEmbeddings().embed_query("foo")
|
||||
output = docsearch.similarity_search_by_vector(embeddings, k=1)
|
||||
assert_documents_equals(output, [Document(page_content="foo")])
|
||||
|
||||
|
||||
@pytest.mark.parametrize("location", qdrant_locations())
|
||||
@pytest.mark.parametrize(
|
||||
"metadata_payload_key", [QdrantVectorStore.METADATA_KEY, "bar"]
|
||||
)
|
||||
@pytest.mark.parametrize("retrieval_mode", retrieval_modes())
|
||||
def test_similarity_search_filters(
|
||||
location: str,
|
||||
metadata_payload_key: str,
|
||||
retrieval_mode: RetrievalMode,
|
||||
) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [
|
||||
{"page": i, "metadata": {"page": i + 1, "pages": [i + 2, -1]}}
|
||||
for i in range(len(texts))
|
||||
]
|
||||
docsearch = QdrantVectorStore.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
metadatas=metadatas,
|
||||
location=location,
|
||||
metadata_payload_key=metadata_payload_key,
|
||||
retrieval_mode=retrieval_mode,
|
||||
sparse_embedding=ConsistentFakeSparseEmbeddings(),
|
||||
)
|
||||
|
||||
qdrant_filter = models.Filter(
|
||||
must=[
|
||||
models.FieldCondition(
|
||||
key=f"{metadata_payload_key}.page", match=models.MatchValue(value=1)
|
||||
)
|
||||
]
|
||||
)
|
||||
output = docsearch.similarity_search("foo", k=1, filter=qdrant_filter)
|
||||
|
||||
assert_documents_equals(
|
||||
actual=output,
|
||||
expected=[
|
||||
Document(
|
||||
page_content="bar",
|
||||
metadata={"page": 1, "metadata": {"page": 2, "pages": [3, -1]}},
|
||||
)
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("location", qdrant_locations())
|
||||
@pytest.mark.parametrize("vector_name", ["", "my-vector"])
|
||||
def test_similarity_relevance_search_no_threshold(
|
||||
location: str,
|
||||
vector_name: str,
|
||||
) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [
|
||||
{"page": i, "metadata": {"page": i + 1, "pages": [i + 2, -1]}}
|
||||
for i in range(len(texts))
|
||||
]
|
||||
docsearch = QdrantVectorStore.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
metadatas=metadatas,
|
||||
location=location,
|
||||
vector_name=vector_name,
|
||||
)
|
||||
output = docsearch.similarity_search_with_relevance_scores(
|
||||
"foo", k=3, score_threshold=None
|
||||
)
|
||||
assert len(output) == 3
|
||||
for i in range(len(output)):
|
||||
assert round(output[i][1], 2) >= 0
|
||||
assert round(output[i][1], 2) <= 1
|
||||
|
||||
|
||||
@pytest.mark.parametrize("location", qdrant_locations())
|
||||
@pytest.mark.parametrize("vector_name", ["", "my-vector"])
|
||||
def test_relevance_search_with_threshold(
|
||||
location: str,
|
||||
vector_name: str,
|
||||
) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [
|
||||
{"page": i, "metadata": {"page": i + 1, "pages": [i + 2, -1]}}
|
||||
for i in range(len(texts))
|
||||
]
|
||||
docsearch = QdrantVectorStore.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
metadatas=metadatas,
|
||||
location=location,
|
||||
vector_name=vector_name,
|
||||
)
|
||||
|
||||
score_threshold = 0.99
|
||||
kwargs = {"score_threshold": score_threshold}
|
||||
output = docsearch.similarity_search_with_relevance_scores("foo", k=3, **kwargs)
|
||||
assert len(output) == 1
|
||||
assert all([score >= score_threshold for _, score in output])
|
||||
|
||||
|
||||
@pytest.mark.parametrize("location", qdrant_locations())
|
||||
@pytest.mark.parametrize("content_payload_key", [QdrantVectorStore.CONTENT_KEY, "foo"])
|
||||
@pytest.mark.parametrize(
|
||||
"metadata_payload_key", [QdrantVectorStore.METADATA_KEY, "bar"]
|
||||
)
|
||||
@pytest.mark.parametrize("vector_name", ["", "my-vector"])
|
||||
def test_relevance_search_with_threshold_and_filter(
|
||||
location: str,
|
||||
content_payload_key: str,
|
||||
metadata_payload_key: str,
|
||||
vector_name: str,
|
||||
) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [
|
||||
{"page": i, "metadata": {"page": i + 1, "pages": [i + 2, -1]}}
|
||||
for i in range(len(texts))
|
||||
]
|
||||
docsearch = QdrantVectorStore.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
metadatas=metadatas,
|
||||
location=location,
|
||||
content_payload_key=content_payload_key,
|
||||
metadata_payload_key=metadata_payload_key,
|
||||
vector_name=vector_name,
|
||||
)
|
||||
score_threshold = 0.99 # for almost exact match
|
||||
negative_filter = models.Filter(
|
||||
must=[
|
||||
models.FieldCondition(
|
||||
key=f"{metadata_payload_key}.page", match=models.MatchValue(value=1)
|
||||
)
|
||||
]
|
||||
)
|
||||
kwargs = {"filter": negative_filter, "score_threshold": score_threshold}
|
||||
output = docsearch.similarity_search_with_relevance_scores("foo", k=3, **kwargs)
|
||||
assert len(output) == 0
|
||||
positive_filter = models.Filter(
|
||||
must=[
|
||||
models.FieldCondition(
|
||||
key=f"{metadata_payload_key}.page", match=models.MatchValue(value=0)
|
||||
)
|
||||
]
|
||||
)
|
||||
kwargs = {"filter": positive_filter, "score_threshold": score_threshold}
|
||||
output = docsearch.similarity_search_with_relevance_scores("foo", k=3, **kwargs)
|
||||
assert len(output) == 1
|
||||
assert all([score >= score_threshold for _, score in output])
|
||||
|
||||
|
||||
@pytest.mark.parametrize("location", qdrant_locations())
|
||||
@pytest.mark.parametrize("content_payload_key", [QdrantVectorStore.CONTENT_KEY, "foo"])
|
||||
@pytest.mark.parametrize(
|
||||
"metadata_payload_key", [QdrantVectorStore.METADATA_KEY, "bar"]
|
||||
)
|
||||
@pytest.mark.parametrize("retrieval_mode", retrieval_modes())
|
||||
def test_similarity_search_filters_with_qdrant_filters(
|
||||
location: str,
|
||||
content_payload_key: str,
|
||||
metadata_payload_key: str,
|
||||
retrieval_mode: RetrievalMode,
|
||||
) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [
|
||||
{"page": i, "details": {"page": i + 1, "pages": [i + 2, -1]}}
|
||||
for i in range(len(texts))
|
||||
]
|
||||
docsearch = QdrantVectorStore.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
location=location,
|
||||
metadatas=metadatas,
|
||||
content_payload_key=content_payload_key,
|
||||
metadata_payload_key=metadata_payload_key,
|
||||
retrieval_mode=retrieval_mode,
|
||||
sparse_embedding=ConsistentFakeSparseEmbeddings(),
|
||||
)
|
||||
|
||||
qdrant_filter = models.Filter(
|
||||
must=[
|
||||
models.FieldCondition(
|
||||
key=content_payload_key, match=models.MatchValue(value="bar")
|
||||
),
|
||||
models.FieldCondition(
|
||||
key=f"{metadata_payload_key}.page",
|
||||
match=models.MatchValue(value=1),
|
||||
),
|
||||
models.FieldCondition(
|
||||
key=f"{metadata_payload_key}.details.page",
|
||||
match=models.MatchValue(value=2),
|
||||
),
|
||||
models.FieldCondition(
|
||||
key=f"{metadata_payload_key}.details.pages",
|
||||
match=models.MatchAny(any=[3]),
|
||||
),
|
||||
]
|
||||
)
|
||||
output = docsearch.similarity_search("foo", k=1, filter=qdrant_filter)
|
||||
assert_documents_equals(
|
||||
actual=output,
|
||||
expected=[
|
||||
Document(
|
||||
page_content="bar",
|
||||
metadata={"page": 1, "details": {"page": 2, "pages": [3, -1]}},
|
||||
)
|
||||
],
|
||||
)
|
Loading…
Reference in New Issue
Block a user