`chat_loaders` refactoring (#10381)

Replaced unnecessary namespace renaming
`from langchain.chat_loaders import base as chat_loaders`
with
`from langchain.chat_loaders.base import BaseChatLoader, ChatSession` 
and simplified correspondent types.

@eyurtsev
pull/10414/head
Leonid Ganeline 12 months ago committed by GitHub
parent 40d9191955
commit 90504fc499
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -4,13 +4,13 @@ from pathlib import Path
from typing import TYPE_CHECKING, Iterator, List, Optional, Union
from langchain import schema
from langchain.chat_loaders import base as chat_loaders
from langchain.chat_loaders.base import BaseChatLoader, ChatSession
if TYPE_CHECKING:
import sqlite3
class IMessageChatLoader(chat_loaders.BaseChatLoader):
class IMessageChatLoader(BaseChatLoader):
"""Load chat sessions from the `iMessage` chat.db SQLite file.
It only works on macOS when you have iMessage enabled and have the chat.db file.
@ -18,8 +18,8 @@ class IMessageChatLoader(chat_loaders.BaseChatLoader):
The chat.db file is likely located at ~/Library/Messages/chat.db. However, your
terminal may not have permission to access this file. To resolve this, you can
copy the file to a different location, change the permissions of the file, or
grant full disk access for your terminal emulator in System Settings > Security
and Privacy > Full Disk Access.
grant full disk access for your terminal emulator
in System Settings > Security and Privacy > Full Disk Access.
"""
def __init__(self, path: Optional[Union[str, Path]] = None):
@ -46,7 +46,7 @@ class IMessageChatLoader(chat_loaders.BaseChatLoader):
def _load_single_chat_session(
self, cursor: "sqlite3.Cursor", chat_id: int
) -> chat_loaders.ChatSession:
) -> ChatSession:
"""
Load a single chat session from the iMessage chat.db.
@ -83,9 +83,9 @@ class IMessageChatLoader(chat_loaders.BaseChatLoader):
)
)
return chat_loaders.ChatSession(messages=results)
return ChatSession(messages=results)
def lazy_load(self) -> Iterator[chat_loaders.ChatSession]:
def lazy_load(self) -> Iterator[ChatSession]:
"""
Lazy load the chat sessions from the iMessage chat.db
and yield them in the required format.

@ -6,12 +6,12 @@ from pathlib import Path
from typing import Dict, Iterator, List, Union
from langchain import schema
from langchain.chat_loaders import base as chat_loaders
from langchain.chat_loaders.base import BaseChatLoader, ChatSession
logger = logging.getLogger(__name__)
class SlackChatLoader(chat_loaders.BaseChatLoader):
class SlackChatLoader(BaseChatLoader):
"""Load `Slack` conversations from a dump zip file."""
def __init__(
@ -27,9 +27,7 @@ class SlackChatLoader(chat_loaders.BaseChatLoader):
if not self.zip_path.exists():
raise FileNotFoundError(f"File {self.zip_path} not found")
def _load_single_chat_session(
self, messages: List[Dict]
) -> chat_loaders.ChatSession:
def _load_single_chat_session(self, messages: List[Dict]) -> ChatSession:
results: List[Union[schema.AIMessage, schema.HumanMessage]] = []
previous_sender = None
for message in messages:
@ -62,7 +60,7 @@ class SlackChatLoader(chat_loaders.BaseChatLoader):
)
)
previous_sender = sender
return chat_loaders.ChatSession(messages=results)
return ChatSession(messages=results)
def _read_json(self, zip_file: zipfile.ZipFile, file_path: str) -> List[dict]:
"""Read JSON data from a zip subfile."""
@ -72,7 +70,7 @@ class SlackChatLoader(chat_loaders.BaseChatLoader):
raise ValueError(f"Expected list of dictionaries, got {type(data)}")
return data
def lazy_load(self) -> Iterator[chat_loaders.ChatSession]:
def lazy_load(self) -> Iterator[ChatSession]:
"""
Lazy load the chat sessions from the Slack dump file and yield them
in the required format.

@ -7,12 +7,12 @@ from pathlib import Path
from typing import Iterator, List, Union
from langchain import schema
from langchain.chat_loaders import base as chat_loaders
from langchain.chat_loaders.base import BaseChatLoader, ChatSession
logger = logging.getLogger(__name__)
class TelegramChatLoader(chat_loaders.BaseChatLoader):
class TelegramChatLoader(BaseChatLoader):
"""Load `telegram` conversations to LangChain chat messages.
To export, use the Telegram Desktop app from
@ -35,16 +35,14 @@ class TelegramChatLoader(chat_loaders.BaseChatLoader):
"""
self.path = path if isinstance(path, str) else str(path)
def _load_single_chat_session_html(
self, file_path: str
) -> chat_loaders.ChatSession:
def _load_single_chat_session_html(self, file_path: str) -> ChatSession:
"""Load a single chat session from an HTML file.
Args:
file_path (str): Path to the HTML file.
Returns:
chat_loaders.ChatSession: The loaded chat session.
ChatSession: The loaded chat session.
"""
try:
from bs4 import BeautifulSoup
@ -81,18 +79,16 @@ class TelegramChatLoader(chat_loaders.BaseChatLoader):
)
previous_sender = from_name
return chat_loaders.ChatSession(messages=results)
return ChatSession(messages=results)
def _load_single_chat_session_json(
self, file_path: str
) -> chat_loaders.ChatSession:
def _load_single_chat_session_json(self, file_path: str) -> ChatSession:
"""Load a single chat session from a JSON file.
Args:
file_path (str): Path to the JSON file.
Returns:
chat_loaders.ChatSession: The loaded chat session.
ChatSession: The loaded chat session.
"""
with open(file_path, "r", encoding="utf-8") as file:
data = json.load(file)
@ -114,7 +110,7 @@ class TelegramChatLoader(chat_loaders.BaseChatLoader):
)
)
return chat_loaders.ChatSession(messages=results)
return ChatSession(messages=results)
def _iterate_files(self, path: str) -> Iterator[str]:
"""Iterate over files in a directory or zip file.
@ -139,12 +135,12 @@ class TelegramChatLoader(chat_loaders.BaseChatLoader):
with tempfile.TemporaryDirectory() as temp_dir:
yield zip_file.extract(file, path=temp_dir)
def lazy_load(self) -> Iterator[chat_loaders.ChatSession]:
def lazy_load(self) -> Iterator[ChatSession]:
"""Lazy load the messages from the chat file and yield them
in as chat sessions.
Yields:
chat_loaders.ChatSession: The loaded chat session.
ChatSession: The loaded chat session.
"""
for file_path in self._iterate_files(self.path):
if file_path.endswith(".html"):

@ -5,13 +5,13 @@ import zipfile
from typing import Iterator, List, Union
from langchain import schema
from langchain.chat_loaders import base as chat_loaders
from langchain.chat_loaders.base import BaseChatLoader, ChatSession
from langchain.schema import messages
logger = logging.getLogger(__name__)
class WhatsAppChatLoader(chat_loaders.BaseChatLoader):
class WhatsAppChatLoader(BaseChatLoader):
"""Load `WhatsApp` conversations from a dump zip file or directory."""
def __init__(self, path: str):
@ -42,7 +42,7 @@ class WhatsAppChatLoader(chat_loaders.BaseChatLoader):
flags=re.IGNORECASE,
)
def _load_single_chat_session(self, file_path: str) -> chat_loaders.ChatSession:
def _load_single_chat_session(self, file_path: str) -> ChatSession:
"""Load a single chat session from a file.
Args:
@ -84,7 +84,7 @@ class WhatsAppChatLoader(chat_loaders.BaseChatLoader):
)
else:
logger.debug(f"Could not parse line: {line}")
return chat_loaders.ChatSession(messages=results)
return ChatSession(messages=results)
def _iterate_files(self, path: str) -> Iterator[str]:
"""Iterate over the files in a directory or zip file.
@ -108,7 +108,7 @@ class WhatsAppChatLoader(chat_loaders.BaseChatLoader):
if file.endswith(".txt"):
yield zip_file.extract(file)
def lazy_load(self) -> Iterator[chat_loaders.ChatSession]:
def lazy_load(self) -> Iterator[ChatSession]:
"""Lazy load the messages from the chat file and yield
them as chat sessions.

Loading…
Cancel
Save