langchain/tests/integration_tests/vectorstores/test_mongodb_atlas.py

"""Test MongoDB Atlas Vector Search functionality."""
from __future__ import annotations

import os
from time import sleep
from typing import TYPE_CHECKING

import pytest

from langchain.docstore.document import Document
from langchain.embeddings.base import Embeddings
from langchain.vectorstores.mongodb_atlas import MongoDBAtlasVectorSearch

if TYPE_CHECKING:
    from pymongo import MongoClient

INDEX_NAME = "langchain-test-index"
NAMESPACE = "langchain_test_db.langchain_test_collection"
CONNECTION_STRING = os.environ.get("MONGODB_ATLAS_URI")
DB_NAME, COLLECTION_NAME = NAMESPACE.split(".")

# Instantiate as constant instead of pytest fixture to prevent needing to make multiple
# connections.
TEST_CLIENT: MongoClient = MongoClient(CONNECTION_STRING)
collection = TEST_CLIENT[DB_NAME][COLLECTION_NAME]


class TestMongoDBAtlasVectorSearch:
    @classmethod
    def setup_class(cls) -> None:
        # insure the test collection is empty
        assert collection.count_documents({}) == 0  # type: ignore[index]  # noqa: E501

    @classmethod
    def teardown_class(cls) -> None:
        # delete all the documents in the collection
        collection.delete_many({})  # type: ignore[index]

    @pytest.fixture(autouse=True)
    def setup(self) -> None:
        # delete all the documents in the collection
        collection.delete_many({})  # type: ignore[index]

    def test_from_documents(self, embedding_openai: Embeddings) -> None:
        """Test end to end construction and search."""
        documents = [
            Document(page_content="Dogs are tough.", metadata={"a": 1}),
            Document(page_content="Cats have fluff.", metadata={"b": 1}),
            Document(page_content="What is a sandwich?", metadata={"c": 1}),
            Document(page_content="That fence is purple.", metadata={"d": 1, "e": 2}),
        ]
        vectorstore = MongoDBAtlasVectorSearch.from_documents(
            documents,
            embedding_openai,
            collection=collection,
            index_name=INDEX_NAME,
        )
        sleep(1)  # waits for mongot to update Lucene's index
        output = vectorstore.similarity_search("Sandwich", k=1)
        assert output[0].page_content == "What is a sandwich?"
        assert output[0].metadata["c"] == 1

    def test_from_texts(self, embedding_openai: Embeddings) -> None:
        texts = [
            "Dogs are tough.",
            "Cats have fluff.",
            "What is a sandwich?",
            "That fence is purple.",
        ]
        vectorstore = MongoDBAtlasVectorSearch.from_texts(
            texts,
            embedding_openai,
            collection=collection,
            index_name=INDEX_NAME,
        )
        sleep(1)  # waits for mongot to update Lucene's index
        output = vectorstore.similarity_search("Sandwich", k=1)
        assert output[0].page_content == "What is a sandwich?"

    def test_from_texts_with_metadatas(self, embedding_openai: Embeddings) -> None:
        texts = [
            "Dogs are tough.",
            "Cats have fluff.",
            "What is a sandwich?",
            "The fence is purple.",
        ]
        metadatas = [{"a": 1}, {"b": 1}, {"c": 1}, {"d": 1, "e": 2}]
        vectorstore = MongoDBAtlasVectorSearch.from_texts(
            texts,
            embedding_openai,
            metadatas=metadatas,
            collection=collection,
            index_name=INDEX_NAME,
        )
        sleep(1)  # waits for mongot to update Lucene's index
        output = vectorstore.similarity_search("Sandwich", k=1)
        assert output[0].page_content == "What is a sandwich?"
        assert output[0].metadata["c"] == 1

    def test_from_texts_with_metadatas_and_pre_filter(
        self, embedding_openai: Embeddings
    ) -> None:
        texts = [
            "Dogs are tough.",
            "Cats have fluff.",
            "What is a sandwich?",
            "The fence is purple.",
        ]
        metadatas = [{"a": 1}, {"b": 1}, {"c": 1}, {"d": 1, "e": 2}]
        vectorstore = MongoDBAtlasVectorSearch.from_texts(
            texts,
            embedding_openai,
            metadatas=metadatas,
            collection=collection,
            index_name=INDEX_NAME,
        )
        sleep(1)  # waits for mongot to update Lucene's index
        output = vectorstore.similarity_search(
            "Sandwich", k=1, pre_filter={"range": {"lte": 0, "path": "c"}}
        )
        assert output == []
adding MongoDBAtlasVectorSearch (#5338) # Add MongoDBAtlasVectorSearch for the python library Fixes #5337 --------- Co-authored-by: Dev 2049 <dev.dev2049@gmail.com> 2023-05-30 14:59:01 +00:00			`"""Test MongoDB Atlas Vector Search functionality."""`
			`from __future__ import annotations`

			`import os`
			`from time import sleep`
removing client+namespace in favor of collection (#5610) removing client+namespace in favor of collection for an easier instantiation and to be similar to the typescript library @dev2049 2023-06-03 23:27:31 +00:00			`from typing import TYPE_CHECKING`
adding MongoDBAtlasVectorSearch (#5338) # Add MongoDBAtlasVectorSearch for the python library Fixes #5337 --------- Co-authored-by: Dev 2049 <dev.dev2049@gmail.com> 2023-05-30 14:59:01 +00:00
			`import pytest`

			`from langchain.docstore.document import Document`
			`from langchain.embeddings.base import Embeddings`
			`from langchain.vectorstores.mongodb_atlas import MongoDBAtlasVectorSearch`

			`if TYPE_CHECKING:`
			`from pymongo import MongoClient`

			`INDEX_NAME = "langchain-test-index"`
			`NAMESPACE = "langchain_test_db.langchain_test_collection"`
			`CONNECTION_STRING = os.environ.get("MONGODB_ATLAS_URI")`
			`DB_NAME, COLLECTION_NAME = NAMESPACE.split(".")`

			`# Instantiate as constant instead of pytest fixture to prevent needing to make multiple`
			`# connections.`
Harrison/myscale self query (#6376) Co-authored-by: Fangrui Liu <fangruil@moqi.ai> Co-authored-by: 刘方瑞 <fangrui.liu@outlook.com> Co-authored-by: Fangrui.Liu <fangrui.liu@ubc.ca> 2023-06-18 23:53:10 +00:00			`TEST_CLIENT: MongoClient = MongoClient(CONNECTION_STRING)`
removing client+namespace in favor of collection (#5610) removing client+namespace in favor of collection for an easier instantiation and to be similar to the typescript library @dev2049 2023-06-03 23:27:31 +00:00			`collection = TEST_CLIENT[DB_NAME][COLLECTION_NAME]`
adding MongoDBAtlasVectorSearch (#5338) # Add MongoDBAtlasVectorSearch for the python library Fixes #5337 --------- Co-authored-by: Dev 2049 <dev.dev2049@gmail.com> 2023-05-30 14:59:01 +00:00

			`class TestMongoDBAtlasVectorSearch:`
			`@classmethod`
			`def setup_class(cls) -> None:`
			`# insure the test collection is empty`
removing client+namespace in favor of collection (#5610) removing client+namespace in favor of collection for an easier instantiation and to be similar to the typescript library @dev2049 2023-06-03 23:27:31 +00:00			`assert collection.count_documents({}) == 0 # type: ignore[index] # noqa: E501`
adding MongoDBAtlasVectorSearch (#5338) # Add MongoDBAtlasVectorSearch for the python library Fixes #5337 --------- Co-authored-by: Dev 2049 <dev.dev2049@gmail.com> 2023-05-30 14:59:01 +00:00
			`@classmethod`
			`def teardown_class(cls) -> None:`
			`# delete all the documents in the collection`
removing client+namespace in favor of collection (#5610) removing client+namespace in favor of collection for an easier instantiation and to be similar to the typescript library @dev2049 2023-06-03 23:27:31 +00:00			`collection.delete_many({}) # type: ignore[index]`
adding MongoDBAtlasVectorSearch (#5338) # Add MongoDBAtlasVectorSearch for the python library Fixes #5337 --------- Co-authored-by: Dev 2049 <dev.dev2049@gmail.com> 2023-05-30 14:59:01 +00:00
			`@pytest.fixture(autouse=True)`
			`def setup(self) -> None:`
			`# delete all the documents in the collection`
removing client+namespace in favor of collection (#5610) removing client+namespace in favor of collection for an easier instantiation and to be similar to the typescript library @dev2049 2023-06-03 23:27:31 +00:00			`collection.delete_many({}) # type: ignore[index]`
adding MongoDBAtlasVectorSearch (#5338) # Add MongoDBAtlasVectorSearch for the python library Fixes #5337 --------- Co-authored-by: Dev 2049 <dev.dev2049@gmail.com> 2023-05-30 14:59:01 +00:00
			`def test_from_documents(self, embedding_openai: Embeddings) -> None:`
			`"""Test end to end construction and search."""`
			`documents = [`
			`Document(page_content="Dogs are tough.", metadata={"a": 1}),`
			`Document(page_content="Cats have fluff.", metadata={"b": 1}),`
			`Document(page_content="What is a sandwich?", metadata={"c": 1}),`
			`Document(page_content="That fence is purple.", metadata={"d": 1, "e": 2}),`
			`]`
			`vectorstore = MongoDBAtlasVectorSearch.from_documents(`
			`documents,`
			`embedding_openai,`
removing client+namespace in favor of collection (#5610) removing client+namespace in favor of collection for an easier instantiation and to be similar to the typescript library @dev2049 2023-06-03 23:27:31 +00:00			`collection=collection,`
adding MongoDBAtlasVectorSearch (#5338) # Add MongoDBAtlasVectorSearch for the python library Fixes #5337 --------- Co-authored-by: Dev 2049 <dev.dev2049@gmail.com> 2023-05-30 14:59:01 +00:00			`index_name=INDEX_NAME,`
			`)`
			`sleep(1) # waits for mongot to update Lucene's index`
			`output = vectorstore.similarity_search("Sandwich", k=1)`
			`assert output[0].page_content == "What is a sandwich?"`
			`assert output[0].metadata["c"] == 1`

			`def test_from_texts(self, embedding_openai: Embeddings) -> None:`
			`texts = [`
			`"Dogs are tough.",`
			`"Cats have fluff.",`
			`"What is a sandwich?",`
			`"That fence is purple.",`
			`]`
			`vectorstore = MongoDBAtlasVectorSearch.from_texts(`
			`texts,`
			`embedding_openai,`
removing client+namespace in favor of collection (#5610) removing client+namespace in favor of collection for an easier instantiation and to be similar to the typescript library @dev2049 2023-06-03 23:27:31 +00:00			`collection=collection,`
adding MongoDBAtlasVectorSearch (#5338) # Add MongoDBAtlasVectorSearch for the python library Fixes #5337 --------- Co-authored-by: Dev 2049 <dev.dev2049@gmail.com> 2023-05-30 14:59:01 +00:00			`index_name=INDEX_NAME,`
			`)`
			`sleep(1) # waits for mongot to update Lucene's index`
			`output = vectorstore.similarity_search("Sandwich", k=1)`
			`assert output[0].page_content == "What is a sandwich?"`

			`def test_from_texts_with_metadatas(self, embedding_openai: Embeddings) -> None:`
			`texts = [`
			`"Dogs are tough.",`
			`"Cats have fluff.",`
			`"What is a sandwich?",`
			`"The fence is purple.",`
			`]`
			`metadatas = [{"a": 1}, {"b": 1}, {"c": 1}, {"d": 1, "e": 2}]`
			`vectorstore = MongoDBAtlasVectorSearch.from_texts(`
			`texts,`
			`embedding_openai,`
			`metadatas=metadatas,`
removing client+namespace in favor of collection (#5610) removing client+namespace in favor of collection for an easier instantiation and to be similar to the typescript library @dev2049 2023-06-03 23:27:31 +00:00			`collection=collection,`
adding MongoDBAtlasVectorSearch (#5338) # Add MongoDBAtlasVectorSearch for the python library Fixes #5337 --------- Co-authored-by: Dev 2049 <dev.dev2049@gmail.com> 2023-05-30 14:59:01 +00:00			`index_name=INDEX_NAME,`
			`)`
			`sleep(1) # waits for mongot to update Lucene's index`
			`output = vectorstore.similarity_search("Sandwich", k=1)`
			`assert output[0].page_content == "What is a sandwich?"`
			`assert output[0].metadata["c"] == 1`

			`def test_from_texts_with_metadatas_and_pre_filter(`
			`self, embedding_openai: Embeddings`
			`) -> None:`
			`texts = [`
			`"Dogs are tough.",`
			`"Cats have fluff.",`
			`"What is a sandwich?",`
			`"The fence is purple.",`
			`]`
			`metadatas = [{"a": 1}, {"b": 1}, {"c": 1}, {"d": 1, "e": 2}]`
			`vectorstore = MongoDBAtlasVectorSearch.from_texts(`
			`texts,`
			`embedding_openai,`
			`metadatas=metadatas,`
removing client+namespace in favor of collection (#5610) removing client+namespace in favor of collection for an easier instantiation and to be similar to the typescript library @dev2049 2023-06-03 23:27:31 +00:00			`collection=collection,`
adding MongoDBAtlasVectorSearch (#5338) # Add MongoDBAtlasVectorSearch for the python library Fixes #5337 --------- Co-authored-by: Dev 2049 <dev.dev2049@gmail.com> 2023-05-30 14:59:01 +00:00			`index_name=INDEX_NAME,`
			`)`
			`sleep(1) # waits for mongot to update Lucene's index`
			`output = vectorstore.similarity_search(`
			`"Sandwich", k=1, pre_filter={"range": {"lte": 0, "path": "c"}}`
			`)`
			`assert output == []`