diff --git a/libs/text-splitters/langchain_text_splitters/json.py b/libs/text-splitters/langchain_text_splitters/json.py index 7f21d5b9f6..c83d8b2a42 100644 --- a/libs/text-splitters/langchain_text_splitters/json.py +++ b/libs/text-splitters/langchain_text_splitters/json.py @@ -55,7 +55,7 @@ class RecursiveJsonSplitter: Split json into maximum size dictionaries while preserving structure. """ current_path = current_path or [] - chunks = chunks or [{}] + chunks = chunks if chunks is not None else [{}] if isinstance(data, dict): for key, value in data.items(): new_path = current_path + [key] diff --git a/libs/text-splitters/tests/unit_tests/test_text_splitters.py b/libs/text-splitters/tests/unit_tests/test_text_splitters.py index d1d113009d..2229480522 100644 --- a/libs/text-splitters/tests/unit_tests/test_text_splitters.py +++ b/libs/text-splitters/tests/unit_tests/test_text_splitters.py @@ -1953,3 +1953,24 @@ def test_split_json_with_lists() -> None: texts_list = splitter.split_text(json_data=test_data_list, convert_lists=True) assert len(texts_list) >= len(texts) + + +def test_split_json_many_calls() -> None: + x = {"a": 1, "b": 2} + y = {"c": 3, "d": 4} + + splitter = RecursiveJsonSplitter() + chunk0 = splitter.split_json(x) + assert chunk0 == [{"a": 1, "b": 2}] + + chunk1 = splitter.split_json(y) + assert chunk1 == [{"c": 3, "d": 4}] + + # chunk0 is now altered by creating chunk1 + assert chunk0 == [{"a": 1, "b": 2}] + + chunk0_output = [{"a": 1, "b": 2}] + chunk1_output = [{"c": 3, "d": 4}] + + assert chunk0 == chunk0_output + assert chunk1 == chunk1_output