langchain/tests/unit_tests/document_loaders/test_json_loader.py
Raduan Al-Shedivat 00c6ec8a2d
fix(document_loaders/telegram): fix pandas calls + add tests (#4806)
# Fix Telegram API loader + add tests.
I was testing this integration and it was broken with next error:
```python
message_threads = loader._get_message_threads(df)
KeyError: False
```
Also, this particular loader didn't have any tests / related group in
poetry, so I added those as well.

@hwchase17 / @eyurtsev please take a look on this fix PR.

---------

Co-authored-by: Dev 2049 <dev.dev2049@gmail.com>
2023-05-16 14:35:25 -07:00

124 lines
3.7 KiB
Python

import pytest
from pytest import raises
from pytest_mock import MockerFixture
from langchain.docstore.document import Document
from langchain.document_loaders.json_loader import JSONLoader
@pytest.mark.requires("jq")
def test_load_valid_string_content(mocker: MockerFixture) -> None:
file_path = "/workspaces/langchain/test.json"
expected_docs = [
Document(
page_content="value1",
metadata={"source": file_path, "seq_num": 1},
),
Document(
page_content="value2",
metadata={"source": file_path, "seq_num": 2},
),
]
mocker.patch("builtins.open", mocker.mock_open())
mock_csv_reader = mocker.patch("pathlib.Path.read_text")
mock_csv_reader.return_value = '[{"text": "value1"}, {"text": "value2"}]'
loader = JSONLoader(file_path=file_path, jq_schema=".[].text", text_content=True)
result = loader.load()
assert result == expected_docs
@pytest.mark.requires("jq")
def test_load_valid_dict_content(mocker: MockerFixture) -> None:
file_path = "/workspaces/langchain/test.json"
expected_docs = [
Document(
page_content='{"text": "value1"}',
metadata={"source": file_path, "seq_num": 1},
),
Document(
page_content='{"text": "value2"}',
metadata={"source": file_path, "seq_num": 2},
),
]
mocker.patch("builtins.open", mocker.mock_open())
mock_csv_reader = mocker.patch("pathlib.Path.read_text")
mock_csv_reader.return_value = """
[{"text": "value1"}, {"text": "value2"}]
"""
loader = JSONLoader(file_path=file_path, jq_schema=".[]", text_content=False)
result = loader.load()
assert result == expected_docs
@pytest.mark.requires("jq")
def test_load_valid_bool_content(mocker: MockerFixture) -> None:
file_path = "/workspaces/langchain/test.json"
expected_docs = [
Document(
page_content="False",
metadata={"source": file_path, "seq_num": 1},
),
Document(
page_content="True",
metadata={"source": file_path, "seq_num": 2},
),
]
mocker.patch("builtins.open", mocker.mock_open())
mock_csv_reader = mocker.patch("pathlib.Path.read_text")
mock_csv_reader.return_value = """
[
{"flag": false}, {"flag": true}
]
"""
loader = JSONLoader(file_path=file_path, jq_schema=".[].flag", text_content=False)
result = loader.load()
assert result == expected_docs
@pytest.mark.requires("jq")
def test_load_valid_numeric_content(mocker: MockerFixture) -> None:
file_path = "/workspaces/langchain/test.json"
expected_docs = [
Document(
page_content="99",
metadata={"source": file_path, "seq_num": 1},
),
Document(
page_content="99.5",
metadata={"source": file_path, "seq_num": 2},
),
]
mocker.patch("builtins.open", mocker.mock_open())
mock_csv_reader = mocker.patch("pathlib.Path.read_text")
mock_csv_reader.return_value = """
[
{"num": 99}, {"num": 99.5}
]
"""
loader = JSONLoader(file_path=file_path, jq_schema=".[].num", text_content=False)
result = loader.load()
assert result == expected_docs
@pytest.mark.requires("jq")
def test_load_invalid_test_content(mocker: MockerFixture) -> None:
file_path = "/workspaces/langchain/test.json"
mocker.patch("builtins.open", mocker.mock_open())
mock_csv_reader = mocker.patch("pathlib.Path.read_text")
mock_csv_reader.return_value = """
[{"text": "value1"}, {"text": "value2"}]
"""
loader = JSONLoader(file_path=file_path, jq_schema=".[]", text_content=True)
with raises(ValueError):
loader.load()