langchain/tests/integration_tests/vectorstores/test_qdrant.py

"""Test Qdrant functionality."""
from typing import Callable, Optional

import pytest

from langchain.docstore.document import Document
from langchain.embeddings.base import Embeddings
from langchain.vectorstores import Qdrant
from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings


@pytest.mark.parametrize(
    ["content_payload_key", "metadata_payload_key"],
    [
        (Qdrant.CONTENT_KEY, Qdrant.METADATA_KEY),
        ("foo", "bar"),
        (Qdrant.CONTENT_KEY, "bar"),
        ("foo", Qdrant.METADATA_KEY),
    ],
)
def test_qdrant(content_payload_key: str, metadata_payload_key: str) -> None:
    """Test end to end construction and search."""
    texts = ["foo", "bar", "baz"]
    docsearch = Qdrant.from_texts(
        texts,
        FakeEmbeddings(),
        location=":memory:",
        content_payload_key=content_payload_key,
        metadata_payload_key=metadata_payload_key,
    )
    output = docsearch.similarity_search("foo", k=1)
    assert output == [Document(page_content="foo")]


def test_qdrant_add_documents() -> None:
    """Test end to end construction and search."""
    texts = ["foo", "bar", "baz"]
    docsearch: Qdrant = Qdrant.from_texts(texts, FakeEmbeddings(), location=":memory:")

    new_texts = ["foobar", "foobaz"]
    docsearch.add_documents([Document(page_content=content) for content in new_texts])
    output = docsearch.similarity_search("foobar", k=1)
    # FakeEmbeddings return the same query embedding as the first document embedding
    # computed in `embedding.embed_documents`. Since embed_documents is called twice,
    # "foo" embedding is the same as "foobar" embedding
    assert output == [Document(page_content="foobar")] or output == [
        Document(page_content="foo")
    ]


@pytest.mark.parametrize(
    ["content_payload_key", "metadata_payload_key"],
    [
        (Qdrant.CONTENT_KEY, Qdrant.METADATA_KEY),
        ("test_content", "test_payload"),
        (Qdrant.CONTENT_KEY, "payload_test"),
        ("content_test", Qdrant.METADATA_KEY),
    ],
)
def test_qdrant_with_metadatas(
    content_payload_key: str, metadata_payload_key: str
) -> None:
    """Test end to end construction and search."""
    texts = ["foo", "bar", "baz"]
    metadatas = [{"page": i} for i in range(len(texts))]
    docsearch = Qdrant.from_texts(
        texts,
        FakeEmbeddings(),
        metadatas=metadatas,
        location=":memory:",
        content_payload_key=content_payload_key,
        metadata_payload_key=metadata_payload_key,
    )
    output = docsearch.similarity_search("foo", k=1)
    assert output == [Document(page_content="foo", metadata={"page": 0})]


def test_qdrant_similarity_search_filters() -> None:
    """Test end to end construction and search."""
    texts = ["foo", "bar", "baz"]
    metadatas = [
        {"page": i, "metadata": {"page": i + 1, "pages": [i + 2, -1]}}
        for i in range(len(texts))
    ]
    docsearch = Qdrant.from_texts(
        texts,
        FakeEmbeddings(),
        metadatas=metadatas,
        location=":memory:",
    )

    output = docsearch.similarity_search(
        "foo", k=1, filter={"page": 1, "metadata": {"page": 2, "pages": [3]}}
    )
    assert output == [
        Document(
            page_content="bar",
            metadata={"page": 1, "metadata": {"page": 2, "pages": [3, -1]}},
        )
    ]


@pytest.mark.parametrize(
    ["content_payload_key", "metadata_payload_key"],
    [
        (Qdrant.CONTENT_KEY, Qdrant.METADATA_KEY),
        ("test_content", "test_payload"),
        (Qdrant.CONTENT_KEY, "payload_test"),
        ("content_test", Qdrant.METADATA_KEY),
    ],
)
def test_qdrant_max_marginal_relevance_search(
    content_payload_key: str, metadata_payload_key: str
) -> None:
    """Test end to end construction and MRR search."""
    texts = ["foo", "bar", "baz"]
    metadatas = [{"page": i} for i in range(len(texts))]
    docsearch = Qdrant.from_texts(
        texts,
        FakeEmbeddings(),
        metadatas=metadatas,
        location=":memory:",
        content_payload_key=content_payload_key,
        metadata_payload_key=metadata_payload_key,
    )
    output = docsearch.max_marginal_relevance_search("foo", k=2, fetch_k=3)
    assert output == [
        Document(page_content="foo", metadata={"page": 0}),
        Document(page_content="bar", metadata={"page": 1}),
    ]


@pytest.mark.parametrize(
    ["embeddings", "embedding_function"],
    [
        (FakeEmbeddings(), None),
        (FakeEmbeddings().embed_query, None),
        (None, FakeEmbeddings().embed_query),
    ],
)
def test_qdrant_embedding_interface(
    embeddings: Optional[Embeddings], embedding_function: Optional[Callable]
) -> None:
    from qdrant_client import QdrantClient

    client = QdrantClient(":memory:")
    collection_name = "test"

    Qdrant(
        client,
        collection_name,
        embeddings=embeddings,
        embedding_function=embedding_function,
    )


@pytest.mark.parametrize(
    ["embeddings", "embedding_function"],
    [
        (FakeEmbeddings(), FakeEmbeddings().embed_query),
        (None, None),
    ],
)
def test_qdrant_embedding_interface_raises(
    embeddings: Optional[Embeddings], embedding_function: Optional[Callable]
) -> None:
    from qdrant_client import QdrantClient

    client = QdrantClient(":memory:")
    collection_name = "test"

    with pytest.raises(ValueError):
        Qdrant(
            client,
            collection_name,
            embeddings=embeddings,
            embedding_function=embedding_function,
        )
Harrison/quadrant (#665) Co-authored-by: Kacper Łukawski <kacperlukawski@users.noreply.github.com> 2023-01-20 17:45:01 +00:00			`"""Test Qdrant functionality."""`
Update qdrant interface (#3971) Hello 1) Passing `embedding_function` as a callable seems to be outdated and the common interface is to pass `Embeddings` instance 2) At the moment `Qdrant.add_texts` is designed to be used with `embeddings.embed_query`, which is 1) slow 2) causes ambiguity due to 1. It should be used with `embeddings.embed_documents` This PR solves both problems and also provides some new tests 2023-05-05 23:46:40 +00:00			`from typing import Callable, Optional`

Add Qdrant named arguments (#1386) This PR: - Increases `qdrant-client` version to 1.0.4 - Introduces custom content and metadata keys (as requested in #1087) - Moves all the `QdrantClient` parameters into the method parameters to simplify code completion 2023-03-02 15:05:14 +00:00			`import pytest`

Harrison/quadrant (#665) Co-authored-by: Kacper Łukawski <kacperlukawski@users.noreply.github.com> 2023-01-20 17:45:01 +00:00			`from langchain.docstore.document import Document`
Update qdrant interface (#3971) Hello 1) Passing `embedding_function` as a callable seems to be outdated and the common interface is to pass `Embeddings` instance 2) At the moment `Qdrant.add_texts` is designed to be used with `embeddings.embed_query`, which is 1) slow 2) causes ambiguity due to 1. It should be used with `embeddings.embed_documents` This PR solves both problems and also provides some new tests 2023-05-05 23:46:40 +00:00			`from langchain.embeddings.base import Embeddings`
Harrison/quadrant (#665) Co-authored-by: Kacper Łukawski <kacperlukawski@users.noreply.github.com> 2023-01-20 17:45:01 +00:00			`from langchain.vectorstores import Qdrant`
Harrison/milvus (#856) Signed-off-by: Filip Haltmayer <filip.haltmayer@zilliz.com> Signed-off-by: Frank Liu <frank.liu@zilliz.com> Co-authored-by: Filip Haltmayer <81822489+filip-halt@users.noreply.github.com> Co-authored-by: Frank Liu <frank@frankzliu.com> 2023-02-03 06:05:47 +00:00			`from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings`
Harrison/quadrant (#665) Co-authored-by: Kacper Łukawski <kacperlukawski@users.noreply.github.com> 2023-01-20 17:45:01 +00:00

Add Qdrant named arguments (#1386) This PR: - Increases `qdrant-client` version to 1.0.4 - Introduces custom content and metadata keys (as requested in #1087) - Moves all the `QdrantClient` parameters into the method parameters to simplify code completion 2023-03-02 15:05:14 +00:00			`@pytest.mark.parametrize(`
			`["content_payload_key", "metadata_payload_key"],`
			`[`
			`(Qdrant.CONTENT_KEY, Qdrant.METADATA_KEY),`
			`("foo", "bar"),`
			`(Qdrant.CONTENT_KEY, "bar"),`
			`("foo", Qdrant.METADATA_KEY),`
			`],`
			`)`
			`def test_qdrant(content_payload_key: str, metadata_payload_key: str) -> None:`
Harrison/quadrant (#665) Co-authored-by: Kacper Łukawski <kacperlukawski@users.noreply.github.com> 2023-01-20 17:45:01 +00:00			`"""Test end to end construction and search."""`
			`texts = ["foo", "bar", "baz"]`
Add Qdrant named arguments (#1386) This PR: - Increases `qdrant-client` version to 1.0.4 - Introduces custom content and metadata keys (as requested in #1087) - Moves all the `QdrantClient` parameters into the method parameters to simplify code completion 2023-03-02 15:05:14 +00:00			`docsearch = Qdrant.from_texts(`
			`texts,`
			`FakeEmbeddings(),`
Qdrant update to 1.1.1 & docs polishing (#2388) This PR updates Qdrant to 1.1.1 and introduces local mode, so there is no need to spin up the Qdrant server. By that occasion, the Qdrant example notebooks also got updated, covering more cases and answering some commonly asked questions. All the Qdrant's integration tests were switched to local mode, so no Docker container is required to launch them. 2023-04-04 13:48:21 +00:00			`location=":memory:",`
Add Qdrant named arguments (#1386) This PR: - Increases `qdrant-client` version to 1.0.4 - Introduces custom content and metadata keys (as requested in #1087) - Moves all the `QdrantClient` parameters into the method parameters to simplify code completion 2023-03-02 15:05:14 +00:00			`content_payload_key=content_payload_key,`
			`metadata_payload_key=metadata_payload_key,`
			`)`
Harrison/quadrant (#665) Co-authored-by: Kacper Łukawski <kacperlukawski@users.noreply.github.com> 2023-01-20 17:45:01 +00:00			`output = docsearch.similarity_search("foo", k=1)`
			`assert output == [Document(page_content="foo")]`


Update qdrant interface (#3971) Hello 1) Passing `embedding_function` as a callable seems to be outdated and the common interface is to pass `Embeddings` instance 2) At the moment `Qdrant.add_texts` is designed to be used with `embeddings.embed_query`, which is 1) slow 2) causes ambiguity due to 1. It should be used with `embeddings.embed_documents` This PR solves both problems and also provides some new tests 2023-05-05 23:46:40 +00:00			`def test_qdrant_add_documents() -> None:`
			`"""Test end to end construction and search."""`
			`texts = ["foo", "bar", "baz"]`
			`docsearch: Qdrant = Qdrant.from_texts(texts, FakeEmbeddings(), location=":memory:")`

			`new_texts = ["foobar", "foobaz"]`
			`docsearch.add_documents([Document(page_content=content) for content in new_texts])`
			`output = docsearch.similarity_search("foobar", k=1)`
			`# FakeEmbeddings return the same query embedding as the first document embedding`
			# computed in `embedding.embed_documents`. Since embed_documents is called twice,
			`# "foo" embedding is the same as "foobar" embedding`
			`assert output == [Document(page_content="foobar")] or output == [`
			`Document(page_content="foo")`
			`]`


Add Qdrant named arguments (#1386) This PR: - Increases `qdrant-client` version to 1.0.4 - Introduces custom content and metadata keys (as requested in #1087) - Moves all the `QdrantClient` parameters into the method parameters to simplify code completion 2023-03-02 15:05:14 +00:00			`@pytest.mark.parametrize(`
			`["content_payload_key", "metadata_payload_key"],`
			`[`
			`(Qdrant.CONTENT_KEY, Qdrant.METADATA_KEY),`
			`("test_content", "test_payload"),`
			`(Qdrant.CONTENT_KEY, "payload_test"),`
			`("content_test", Qdrant.METADATA_KEY),`
			`],`
			`)`
			`def test_qdrant_with_metadatas(`
			`content_payload_key: str, metadata_payload_key: str`
			`) -> None:`
Harrison/quadrant (#665) Co-authored-by: Kacper Łukawski <kacperlukawski@users.noreply.github.com> 2023-01-20 17:45:01 +00:00			`"""Test end to end construction and search."""`
			`texts = ["foo", "bar", "baz"]`
			`metadatas = [{"page": i} for i in range(len(texts))]`
			`docsearch = Qdrant.from_texts(`
			`texts,`
			`FakeEmbeddings(),`
			`metadatas=metadatas,`
Qdrant update to 1.1.1 & docs polishing (#2388) This PR updates Qdrant to 1.1.1 and introduces local mode, so there is no need to spin up the Qdrant server. By that occasion, the Qdrant example notebooks also got updated, covering more cases and answering some commonly asked questions. All the Qdrant's integration tests were switched to local mode, so no Docker container is required to launch them. 2023-04-04 13:48:21 +00:00			`location=":memory:",`
Add Qdrant named arguments (#1386) This PR: - Increases `qdrant-client` version to 1.0.4 - Introduces custom content and metadata keys (as requested in #1087) - Moves all the `QdrantClient` parameters into the method parameters to simplify code completion 2023-03-02 15:05:14 +00:00			`content_payload_key=content_payload_key,`
			`metadata_payload_key=metadata_payload_key,`
Harrison/quadrant (#665) Co-authored-by: Kacper Łukawski <kacperlukawski@users.noreply.github.com> 2023-01-20 17:45:01 +00:00			`)`
			`output = docsearch.similarity_search("foo", k=1)`
			`assert output == [Document(page_content="foo", metadata={"page": 0})]`


Implement basic metadata filtering in Qdrant (#1689) This PR implements a basic metadata filtering mechanism similar to the ones in Chroma and Pinecone. It still cannot express complex conditions, as there are no operators, but some users requested to have that feature available. 2023-03-15 14:31:39 +00:00			`def test_qdrant_similarity_search_filters() -> None:`
			`"""Test end to end construction and search."""`
			`texts = ["foo", "bar", "baz"]`
Add support for Qdrant nested filter (#4354) # Add support for Qdrant nested filter This extends the filter functionality for the Qdrant vectorstore. The current filter implementation is limited to a single-level metadata structure; however, Qdrant supports nested metadata filtering. This extends the functionality for users to maximize the filter functionality when using Qdrant as the vectorstore. Reference: https://qdrant.tech/documentation/filtering/#nested-key --------- Signed-off-by: Aivin V. Solatorio <avsolatorio@gmail.com> 2023-05-09 17:34:11 +00:00			`metadatas = [`
			`{"page": i, "metadata": {"page": i + 1, "pages": [i + 2, -1]}}`
			`for i in range(len(texts))`
			`]`
Implement basic metadata filtering in Qdrant (#1689) This PR implements a basic metadata filtering mechanism similar to the ones in Chroma and Pinecone. It still cannot express complex conditions, as there are no operators, but some users requested to have that feature available. 2023-03-15 14:31:39 +00:00			`docsearch = Qdrant.from_texts(`
			`texts,`
			`FakeEmbeddings(),`
			`metadatas=metadatas,`
Qdrant update to 1.1.1 & docs polishing (#2388) This PR updates Qdrant to 1.1.1 and introduces local mode, so there is no need to spin up the Qdrant server. By that occasion, the Qdrant example notebooks also got updated, covering more cases and answering some commonly asked questions. All the Qdrant's integration tests were switched to local mode, so no Docker container is required to launch them. 2023-04-04 13:48:21 +00:00			`location=":memory:",`
Implement basic metadata filtering in Qdrant (#1689) This PR implements a basic metadata filtering mechanism similar to the ones in Chroma and Pinecone. It still cannot express complex conditions, as there are no operators, but some users requested to have that feature available. 2023-03-15 14:31:39 +00:00			`)`
Add support for Qdrant nested filter (#4354) # Add support for Qdrant nested filter This extends the filter functionality for the Qdrant vectorstore. The current filter implementation is limited to a single-level metadata structure; however, Qdrant supports nested metadata filtering. This extends the functionality for users to maximize the filter functionality when using Qdrant as the vectorstore. Reference: https://qdrant.tech/documentation/filtering/#nested-key --------- Signed-off-by: Aivin V. Solatorio <avsolatorio@gmail.com> 2023-05-09 17:34:11 +00:00
			`output = docsearch.similarity_search(`
			`"foo", k=1, filter={"page": 1, "metadata": {"page": 2, "pages": [3]}}`
			`)`
			`assert output == [`
			`Document(`
			`page_content="bar",`
			`metadata={"page": 1, "metadata": {"page": 2, "pages": [3, -1]}},`
			`)`
			`]`
Implement basic metadata filtering in Qdrant (#1689) This PR implements a basic metadata filtering mechanism similar to the ones in Chroma and Pinecone. It still cannot express complex conditions, as there are no operators, but some users requested to have that feature available. 2023-03-15 14:31:39 +00:00

Add Qdrant named arguments (#1386) This PR: - Increases `qdrant-client` version to 1.0.4 - Introduces custom content and metadata keys (as requested in #1087) - Moves all the `QdrantClient` parameters into the method parameters to simplify code completion 2023-03-02 15:05:14 +00:00			`@pytest.mark.parametrize(`
			`["content_payload_key", "metadata_payload_key"],`
			`[`
			`(Qdrant.CONTENT_KEY, Qdrant.METADATA_KEY),`
			`("test_content", "test_payload"),`
			`(Qdrant.CONTENT_KEY, "payload_test"),`
			`("content_test", Qdrant.METADATA_KEY),`
			`],`
			`)`
			`def test_qdrant_max_marginal_relevance_search(`
			`content_payload_key: str, metadata_payload_key: str`
			`) -> None:`
Harrison/quadrant (#665) Co-authored-by: Kacper Łukawski <kacperlukawski@users.noreply.github.com> 2023-01-20 17:45:01 +00:00			`"""Test end to end construction and MRR search."""`
			`texts = ["foo", "bar", "baz"]`
			`metadatas = [{"page": i} for i in range(len(texts))]`
			`docsearch = Qdrant.from_texts(`
			`texts,`
			`FakeEmbeddings(),`
			`metadatas=metadatas,`
Qdrant update to 1.1.1 & docs polishing (#2388) This PR updates Qdrant to 1.1.1 and introduces local mode, so there is no need to spin up the Qdrant server. By that occasion, the Qdrant example notebooks also got updated, covering more cases and answering some commonly asked questions. All the Qdrant's integration tests were switched to local mode, so no Docker container is required to launch them. 2023-04-04 13:48:21 +00:00			`location=":memory:",`
Add Qdrant named arguments (#1386) This PR: - Increases `qdrant-client` version to 1.0.4 - Introduces custom content and metadata keys (as requested in #1087) - Moves all the `QdrantClient` parameters into the method parameters to simplify code completion 2023-03-02 15:05:14 +00:00			`content_payload_key=content_payload_key,`
			`metadata_payload_key=metadata_payload_key,`
Harrison/quadrant (#665) Co-authored-by: Kacper Łukawski <kacperlukawski@users.noreply.github.com> 2023-01-20 17:45:01 +00:00			`)`
			`output = docsearch.max_marginal_relevance_search("foo", k=2, fetch_k=3)`
			`assert output == [`
			`Document(page_content="foo", metadata={"page": 0}),`
			`Document(page_content="bar", metadata={"page": 1}),`
			`]`
Update qdrant interface (#3971) Hello 1) Passing `embedding_function` as a callable seems to be outdated and the common interface is to pass `Embeddings` instance 2) At the moment `Qdrant.add_texts` is designed to be used with `embeddings.embed_query`, which is 1) slow 2) causes ambiguity due to 1. It should be used with `embeddings.embed_documents` This PR solves both problems and also provides some new tests 2023-05-05 23:46:40 +00:00

			`@pytest.mark.parametrize(`
			`["embeddings", "embedding_function"],`
			`[`
			`(FakeEmbeddings(), None),`
			`(FakeEmbeddings().embed_query, None),`
			`(None, FakeEmbeddings().embed_query),`
			`],`
			`)`
			`def test_qdrant_embedding_interface(`
			`embeddings: Optional[Embeddings], embedding_function: Optional[Callable]`
			`) -> None:`
			`from qdrant_client import QdrantClient`

			`client = QdrantClient(":memory:")`
			`collection_name = "test"`

			`Qdrant(`
			`client,`
			`collection_name,`
			`embeddings=embeddings,`
			`embedding_function=embedding_function,`
			`)`


			`@pytest.mark.parametrize(`
			`["embeddings", "embedding_function"],`
			`[`
			`(FakeEmbeddings(), FakeEmbeddings().embed_query),`
			`(None, None),`
			`],`
			`)`
			`def test_qdrant_embedding_interface_raises(`
			`embeddings: Optional[Embeddings], embedding_function: Optional[Callable]`
			`) -> None:`
			`from qdrant_client import QdrantClient`

			`client = QdrantClient(":memory:")`
			`collection_name = "test"`

			`with pytest.raises(ValueError):`
			`Qdrant(`
			`client,`
			`collection_name,`
			`embeddings=embeddings,`
			`embedding_function=embedding_function,`
			`)`