You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
langchain/libs/community/tests/unit_tests/chat_loaders/test_telegram.py

98 lines
3.4 KiB
Python

"""Test the telegram chat loader."""
import pathlib
import tempfile
import zipfile
from typing import Sequence
import pytest
from langchain_core.messages import AIMessage, BaseMessage, HumanMessage
from langchain_community.chat_loaders import telegram, utils
def _assert_messages_are_equal(
actual_messages: Sequence[BaseMessage],
expected_messages: Sequence[BaseMessage],
) -> None:
assert len(actual_messages) == len(expected_messages)
for actual, expected in zip(actual_messages, expected_messages):
assert actual.content == expected.content
assert (
actual.additional_kwargs["sender"] == expected.additional_kwargs["sender"]
)
def _check_telegram_chat_loader(path: str) -> None:
_data_dir = pathlib.Path(__file__).parent / "data"
source_path = _data_dir / path
# Create a zip file from the directory in a temp directory
with tempfile.TemporaryDirectory() as temp_dir_:
temp_dir = pathlib.Path(temp_dir_)
if path.endswith(".zip"):
# Make a new zip file
zip_path = temp_dir / "telegram_chat.zip"
with zipfile.ZipFile(zip_path, "w") as zip_file:
original_path = _data_dir / path.replace(".zip", "")
for file_path in original_path.iterdir():
zip_file.write(file_path, arcname=file_path.name)
source_path = zip_path
loader = telegram.TelegramChatLoader(str(source_path))
chat_sessions_ = loader.lazy_load()
chat_sessions_ = utils.merge_chat_runs(chat_sessions_)
chat_sessions = list(
utils.map_ai_messages(chat_sessions_, sender="Batman & Robin")
)
assert len(chat_sessions) == 1
session = chat_sessions[0]
assert len(session["messages"]) > 0
assert session["messages"][0].content == "i refuse to converse with you"
expected_content = [
HumanMessage(
content="i refuse to converse with you",
additional_kwargs={
"sender": "Jimmeny Marvelton",
"events": [{"message_time": "23.08.2023 13:11:23 UTC-08:00"}],
},
),
AIMessage(
content="Hi nemesis",
additional_kwargs={
"sender": "Batman & Robin",
"events": [{"message_time": "23.08.2023 13:13:20 UTC-08:00"}],
},
),
HumanMessage(
content="we meet again\n\nyou will not trick me this time",
additional_kwargs={
"sender": "Jimmeny Marvelton",
"events": [{"message_time": "23.08.2023 13:15:35 UTC-08:00"}],
},
),
]
_assert_messages_are_equal(session["messages"], expected_content)
@pytest.mark.parametrize(
"path",
[
"telegram_chat_json",
"telegram_chat_json.zip",
"telegram_chat_json/result.json",
],
)
def test_telegram_chat_loader(path: str) -> None:
_check_telegram_chat_loader(path)
@pytest.mark.skip(reason="requires bs4 but marking it as such doesn't seem to work")
@pytest.mark.parametrize(
"path",
[
"telegram_chat_json",
"telegram_chat_json.zip",
"telegram_chat_json/result.json",
],
)
def test_telegram_chat_loader_html(path: str) -> None:
_check_telegram_chat_loader(path)