From b65a1d4cfdcdf48edd80d5ccf4a85370c02c243b Mon Sep 17 00:00:00 2001 From: Eugene Yurtsev Date: Fri, 12 Apr 2024 10:19:18 -0400 Subject: [PATCH] langchain[patch]: Add another unit test for indexing code (#20387) Add another unit test for indexing --- .../tests/unit_tests/indexes/test_indexing.py | 64 +++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/libs/langchain/tests/unit_tests/indexes/test_indexing.py b/libs/langchain/tests/unit_tests/indexes/test_indexing.py index 10275db943..5826687f28 100644 --- a/libs/langchain/tests/unit_tests/indexes/test_indexing.py +++ b/libs/langchain/tests/unit_tests/indexes/test_indexing.py @@ -736,6 +736,70 @@ def test_incremental_delete( } +def test_incremental_indexing_with_batch_size( + record_manager: SQLRecordManager, vector_store: InMemoryVectorStore +) -> None: + """Test indexing with incremental indexing""" + loader = ToyLoader( + documents=[ + Document( + page_content="1", + metadata={"source": "1"}, + ), + Document( + page_content="2", + metadata={"source": "1"}, + ), + Document( + page_content="3", + metadata={"source": "1"}, + ), + Document( + page_content="4", + metadata={"source": "1"}, + ), + ] + ) + + with patch.object( + record_manager, "get_time", return_value=datetime(2021, 1, 2).timestamp() + ): + assert index( + loader, + record_manager, + vector_store, + cleanup="incremental", + source_id_key="source", + batch_size=2, + ) == { + "num_added": 4, + "num_deleted": 0, + "num_skipped": 0, + "num_updated": 0, + } + + assert index( + loader, + record_manager, + vector_store, + cleanup="incremental", + source_id_key="source", + batch_size=2, + ) == { + "num_added": 0, + "num_deleted": 0, + "num_skipped": 4, + "num_updated": 0, + } + + doc_texts = set( + # Ignoring type since doc should be in the store and not a None + vector_store.store.get(uid).page_content # type: ignore + for uid in vector_store.store + ) + assert doc_texts == {"1", "2", "3", "4"} + + def test_incremental_delete_with_batch_size( record_manager: SQLRecordManager, vector_store: InMemoryVectorStore ) -> None: