|
|
|
@ -736,6 +736,70 @@ def test_incremental_delete(
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_incremental_indexing_with_batch_size(
|
|
|
|
|
record_manager: SQLRecordManager, vector_store: InMemoryVectorStore
|
|
|
|
|
) -> None:
|
|
|
|
|
"""Test indexing with incremental indexing"""
|
|
|
|
|
loader = ToyLoader(
|
|
|
|
|
documents=[
|
|
|
|
|
Document(
|
|
|
|
|
page_content="1",
|
|
|
|
|
metadata={"source": "1"},
|
|
|
|
|
),
|
|
|
|
|
Document(
|
|
|
|
|
page_content="2",
|
|
|
|
|
metadata={"source": "1"},
|
|
|
|
|
),
|
|
|
|
|
Document(
|
|
|
|
|
page_content="3",
|
|
|
|
|
metadata={"source": "1"},
|
|
|
|
|
),
|
|
|
|
|
Document(
|
|
|
|
|
page_content="4",
|
|
|
|
|
metadata={"source": "1"},
|
|
|
|
|
),
|
|
|
|
|
]
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
with patch.object(
|
|
|
|
|
record_manager, "get_time", return_value=datetime(2021, 1, 2).timestamp()
|
|
|
|
|
):
|
|
|
|
|
assert index(
|
|
|
|
|
loader,
|
|
|
|
|
record_manager,
|
|
|
|
|
vector_store,
|
|
|
|
|
cleanup="incremental",
|
|
|
|
|
source_id_key="source",
|
|
|
|
|
batch_size=2,
|
|
|
|
|
) == {
|
|
|
|
|
"num_added": 4,
|
|
|
|
|
"num_deleted": 0,
|
|
|
|
|
"num_skipped": 0,
|
|
|
|
|
"num_updated": 0,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
assert index(
|
|
|
|
|
loader,
|
|
|
|
|
record_manager,
|
|
|
|
|
vector_store,
|
|
|
|
|
cleanup="incremental",
|
|
|
|
|
source_id_key="source",
|
|
|
|
|
batch_size=2,
|
|
|
|
|
) == {
|
|
|
|
|
"num_added": 0,
|
|
|
|
|
"num_deleted": 0,
|
|
|
|
|
"num_skipped": 4,
|
|
|
|
|
"num_updated": 0,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
doc_texts = set(
|
|
|
|
|
# Ignoring type since doc should be in the store and not a None
|
|
|
|
|
vector_store.store.get(uid).page_content # type: ignore
|
|
|
|
|
for uid in vector_store.store
|
|
|
|
|
)
|
|
|
|
|
assert doc_texts == {"1", "2", "3", "4"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_incremental_delete_with_batch_size(
|
|
|
|
|
record_manager: SQLRecordManager, vector_store: InMemoryVectorStore
|
|
|
|
|
) -> None:
|
|
|
|
|