You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
langchain/libs/community/tests/unit_tests/chat_loaders/test_telegram.py

98 lines
3.4 KiB
Python

"""Test the telegram chat loader."""
import pathlib
import tempfile
import zipfile
from typing import Sequence
import pytest
from langchain_core.messages import AIMessage, BaseMessage, HumanMessage
community[major], core[patch], langchain[patch], experimental[patch]: Create langchain-community (#14463) Moved the following modules to new package langchain-community in a backwards compatible fashion: ``` mv langchain/langchain/adapters community/langchain_community mv langchain/langchain/callbacks community/langchain_community/callbacks mv langchain/langchain/chat_loaders community/langchain_community mv langchain/langchain/chat_models community/langchain_community mv langchain/langchain/document_loaders community/langchain_community mv langchain/langchain/docstore community/langchain_community mv langchain/langchain/document_transformers community/langchain_community mv langchain/langchain/embeddings community/langchain_community mv langchain/langchain/graphs community/langchain_community mv langchain/langchain/llms community/langchain_community mv langchain/langchain/memory/chat_message_histories community/langchain_community mv langchain/langchain/retrievers community/langchain_community mv langchain/langchain/storage community/langchain_community mv langchain/langchain/tools community/langchain_community mv langchain/langchain/utilities community/langchain_community mv langchain/langchain/vectorstores community/langchain_community mv langchain/langchain/agents/agent_toolkits community/langchain_community mv langchain/langchain/cache.py community/langchain_community mv langchain/langchain/adapters community/langchain_community mv langchain/langchain/callbacks community/langchain_community/callbacks mv langchain/langchain/chat_loaders community/langchain_community mv langchain/langchain/chat_models community/langchain_community mv langchain/langchain/document_loaders community/langchain_community mv langchain/langchain/docstore community/langchain_community mv langchain/langchain/document_transformers community/langchain_community mv langchain/langchain/embeddings community/langchain_community mv langchain/langchain/graphs community/langchain_community mv langchain/langchain/llms community/langchain_community mv langchain/langchain/memory/chat_message_histories community/langchain_community mv langchain/langchain/retrievers community/langchain_community mv langchain/langchain/storage community/langchain_community mv langchain/langchain/tools community/langchain_community mv langchain/langchain/utilities community/langchain_community mv langchain/langchain/vectorstores community/langchain_community mv langchain/langchain/agents/agent_toolkits community/langchain_community mv langchain/langchain/cache.py community/langchain_community ``` Moved the following to core ``` mv langchain/langchain/utils/json_schema.py core/langchain_core/utils mv langchain/langchain/utils/html.py core/langchain_core/utils mv langchain/langchain/utils/strings.py core/langchain_core/utils cat langchain/langchain/utils/env.py >> core/langchain_core/utils/env.py rm langchain/langchain/utils/env.py ``` See .scripts/community_split/script_integrations.sh for all changes
10 months ago
from langchain_community.chat_loaders import telegram, utils
def _assert_messages_are_equal(
actual_messages: Sequence[BaseMessage],
expected_messages: Sequence[BaseMessage],
) -> None:
assert len(actual_messages) == len(expected_messages)
for actual, expected in zip(actual_messages, expected_messages):
assert actual.content == expected.content
assert (
actual.additional_kwargs["sender"] == expected.additional_kwargs["sender"]
)
def _check_telegram_chat_loader(path: str) -> None:
_data_dir = pathlib.Path(__file__).parent / "data"
source_path = _data_dir / path
# Create a zip file from the directory in a temp directory
with tempfile.TemporaryDirectory() as temp_dir_:
temp_dir = pathlib.Path(temp_dir_)
if path.endswith(".zip"):
# Make a new zip file
zip_path = temp_dir / "telegram_chat.zip"
with zipfile.ZipFile(zip_path, "w") as zip_file:
original_path = _data_dir / path.replace(".zip", "")
for file_path in original_path.iterdir():
zip_file.write(file_path, arcname=file_path.name)
source_path = zip_path
loader = telegram.TelegramChatLoader(str(source_path))
chat_sessions_ = loader.lazy_load()
chat_sessions_ = utils.merge_chat_runs(chat_sessions_)
chat_sessions = list(
utils.map_ai_messages(chat_sessions_, sender="Batman & Robin")
)
assert len(chat_sessions) == 1
session = chat_sessions[0]
assert len(session["messages"]) > 0
assert session["messages"][0].content == "i refuse to converse with you"
expected_content = [
HumanMessage(
content="i refuse to converse with you",
additional_kwargs={
"sender": "Jimmeny Marvelton",
"events": [{"message_time": "23.08.2023 13:11:23 UTC-08:00"}],
},
),
AIMessage(
content="Hi nemesis",
additional_kwargs={
"sender": "Batman & Robin",
"events": [{"message_time": "23.08.2023 13:13:20 UTC-08:00"}],
},
),
HumanMessage(
content="we meet again\n\nyou will not trick me this time",
additional_kwargs={
"sender": "Jimmeny Marvelton",
"events": [{"message_time": "23.08.2023 13:15:35 UTC-08:00"}],
},
),
]
_assert_messages_are_equal(session["messages"], expected_content)
@pytest.mark.parametrize(
"path",
[
"telegram_chat_json",
"telegram_chat_json.zip",
"telegram_chat_json/result.json",
],
)
def test_telegram_chat_loader(path: str) -> None:
_check_telegram_chat_loader(path)
@pytest.mark.skip(reason="requires bs4 but marking it as such doesn't seem to work")
@pytest.mark.parametrize(
"path",
[
"telegram_chat_json",
"telegram_chat_json.zip",
"telegram_chat_json/result.json",
],
)
def test_telegram_chat_loader_html(path: str) -> None:
_check_telegram_chat_loader(path)