chat_loaders refactoring (#10381)

Replaced unnecessary namespace renaming
`from langchain.chat_loaders import base as chat_loaders`
with
`from langchain.chat_loaders.base import BaseChatLoader, ChatSession` 
and simplified correspondent types.

@eyurtsev
This commit is contained in:
Leonid Ganeline 2023-09-09 15:22:56 -07:00 committed by GitHub
parent 40d9191955
commit 90504fc499
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 27 additions and 33 deletions

View File

@ -4,13 +4,13 @@ from pathlib import Path
from typing import TYPE_CHECKING, Iterator, List, Optional, Union from typing import TYPE_CHECKING, Iterator, List, Optional, Union
from langchain import schema from langchain import schema
from langchain.chat_loaders import base as chat_loaders from langchain.chat_loaders.base import BaseChatLoader, ChatSession
if TYPE_CHECKING: if TYPE_CHECKING:
import sqlite3 import sqlite3
class IMessageChatLoader(chat_loaders.BaseChatLoader): class IMessageChatLoader(BaseChatLoader):
"""Load chat sessions from the `iMessage` chat.db SQLite file. """Load chat sessions from the `iMessage` chat.db SQLite file.
It only works on macOS when you have iMessage enabled and have the chat.db file. It only works on macOS when you have iMessage enabled and have the chat.db file.
@ -18,8 +18,8 @@ class IMessageChatLoader(chat_loaders.BaseChatLoader):
The chat.db file is likely located at ~/Library/Messages/chat.db. However, your The chat.db file is likely located at ~/Library/Messages/chat.db. However, your
terminal may not have permission to access this file. To resolve this, you can terminal may not have permission to access this file. To resolve this, you can
copy the file to a different location, change the permissions of the file, or copy the file to a different location, change the permissions of the file, or
grant full disk access for your terminal emulator in System Settings > Security grant full disk access for your terminal emulator
and Privacy > Full Disk Access. in System Settings > Security and Privacy > Full Disk Access.
""" """
def __init__(self, path: Optional[Union[str, Path]] = None): def __init__(self, path: Optional[Union[str, Path]] = None):
@ -46,7 +46,7 @@ class IMessageChatLoader(chat_loaders.BaseChatLoader):
def _load_single_chat_session( def _load_single_chat_session(
self, cursor: "sqlite3.Cursor", chat_id: int self, cursor: "sqlite3.Cursor", chat_id: int
) -> chat_loaders.ChatSession: ) -> ChatSession:
""" """
Load a single chat session from the iMessage chat.db. Load a single chat session from the iMessage chat.db.
@ -83,9 +83,9 @@ class IMessageChatLoader(chat_loaders.BaseChatLoader):
) )
) )
return chat_loaders.ChatSession(messages=results) return ChatSession(messages=results)
def lazy_load(self) -> Iterator[chat_loaders.ChatSession]: def lazy_load(self) -> Iterator[ChatSession]:
""" """
Lazy load the chat sessions from the iMessage chat.db Lazy load the chat sessions from the iMessage chat.db
and yield them in the required format. and yield them in the required format.

View File

@ -6,12 +6,12 @@ from pathlib import Path
from typing import Dict, Iterator, List, Union from typing import Dict, Iterator, List, Union
from langchain import schema from langchain import schema
from langchain.chat_loaders import base as chat_loaders from langchain.chat_loaders.base import BaseChatLoader, ChatSession
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
class SlackChatLoader(chat_loaders.BaseChatLoader): class SlackChatLoader(BaseChatLoader):
"""Load `Slack` conversations from a dump zip file.""" """Load `Slack` conversations from a dump zip file."""
def __init__( def __init__(
@ -27,9 +27,7 @@ class SlackChatLoader(chat_loaders.BaseChatLoader):
if not self.zip_path.exists(): if not self.zip_path.exists():
raise FileNotFoundError(f"File {self.zip_path} not found") raise FileNotFoundError(f"File {self.zip_path} not found")
def _load_single_chat_session( def _load_single_chat_session(self, messages: List[Dict]) -> ChatSession:
self, messages: List[Dict]
) -> chat_loaders.ChatSession:
results: List[Union[schema.AIMessage, schema.HumanMessage]] = [] results: List[Union[schema.AIMessage, schema.HumanMessage]] = []
previous_sender = None previous_sender = None
for message in messages: for message in messages:
@ -62,7 +60,7 @@ class SlackChatLoader(chat_loaders.BaseChatLoader):
) )
) )
previous_sender = sender previous_sender = sender
return chat_loaders.ChatSession(messages=results) return ChatSession(messages=results)
def _read_json(self, zip_file: zipfile.ZipFile, file_path: str) -> List[dict]: def _read_json(self, zip_file: zipfile.ZipFile, file_path: str) -> List[dict]:
"""Read JSON data from a zip subfile.""" """Read JSON data from a zip subfile."""
@ -72,7 +70,7 @@ class SlackChatLoader(chat_loaders.BaseChatLoader):
raise ValueError(f"Expected list of dictionaries, got {type(data)}") raise ValueError(f"Expected list of dictionaries, got {type(data)}")
return data return data
def lazy_load(self) -> Iterator[chat_loaders.ChatSession]: def lazy_load(self) -> Iterator[ChatSession]:
""" """
Lazy load the chat sessions from the Slack dump file and yield them Lazy load the chat sessions from the Slack dump file and yield them
in the required format. in the required format.

View File

@ -7,12 +7,12 @@ from pathlib import Path
from typing import Iterator, List, Union from typing import Iterator, List, Union
from langchain import schema from langchain import schema
from langchain.chat_loaders import base as chat_loaders from langchain.chat_loaders.base import BaseChatLoader, ChatSession
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
class TelegramChatLoader(chat_loaders.BaseChatLoader): class TelegramChatLoader(BaseChatLoader):
"""Load `telegram` conversations to LangChain chat messages. """Load `telegram` conversations to LangChain chat messages.
To export, use the Telegram Desktop app from To export, use the Telegram Desktop app from
@ -35,16 +35,14 @@ class TelegramChatLoader(chat_loaders.BaseChatLoader):
""" """
self.path = path if isinstance(path, str) else str(path) self.path = path if isinstance(path, str) else str(path)
def _load_single_chat_session_html( def _load_single_chat_session_html(self, file_path: str) -> ChatSession:
self, file_path: str
) -> chat_loaders.ChatSession:
"""Load a single chat session from an HTML file. """Load a single chat session from an HTML file.
Args: Args:
file_path (str): Path to the HTML file. file_path (str): Path to the HTML file.
Returns: Returns:
chat_loaders.ChatSession: The loaded chat session. ChatSession: The loaded chat session.
""" """
try: try:
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
@ -81,18 +79,16 @@ class TelegramChatLoader(chat_loaders.BaseChatLoader):
) )
previous_sender = from_name previous_sender = from_name
return chat_loaders.ChatSession(messages=results) return ChatSession(messages=results)
def _load_single_chat_session_json( def _load_single_chat_session_json(self, file_path: str) -> ChatSession:
self, file_path: str
) -> chat_loaders.ChatSession:
"""Load a single chat session from a JSON file. """Load a single chat session from a JSON file.
Args: Args:
file_path (str): Path to the JSON file. file_path (str): Path to the JSON file.
Returns: Returns:
chat_loaders.ChatSession: The loaded chat session. ChatSession: The loaded chat session.
""" """
with open(file_path, "r", encoding="utf-8") as file: with open(file_path, "r", encoding="utf-8") as file:
data = json.load(file) data = json.load(file)
@ -114,7 +110,7 @@ class TelegramChatLoader(chat_loaders.BaseChatLoader):
) )
) )
return chat_loaders.ChatSession(messages=results) return ChatSession(messages=results)
def _iterate_files(self, path: str) -> Iterator[str]: def _iterate_files(self, path: str) -> Iterator[str]:
"""Iterate over files in a directory or zip file. """Iterate over files in a directory or zip file.
@ -139,12 +135,12 @@ class TelegramChatLoader(chat_loaders.BaseChatLoader):
with tempfile.TemporaryDirectory() as temp_dir: with tempfile.TemporaryDirectory() as temp_dir:
yield zip_file.extract(file, path=temp_dir) yield zip_file.extract(file, path=temp_dir)
def lazy_load(self) -> Iterator[chat_loaders.ChatSession]: def lazy_load(self) -> Iterator[ChatSession]:
"""Lazy load the messages from the chat file and yield them """Lazy load the messages from the chat file and yield them
in as chat sessions. in as chat sessions.
Yields: Yields:
chat_loaders.ChatSession: The loaded chat session. ChatSession: The loaded chat session.
""" """
for file_path in self._iterate_files(self.path): for file_path in self._iterate_files(self.path):
if file_path.endswith(".html"): if file_path.endswith(".html"):

View File

@ -5,13 +5,13 @@ import zipfile
from typing import Iterator, List, Union from typing import Iterator, List, Union
from langchain import schema from langchain import schema
from langchain.chat_loaders import base as chat_loaders from langchain.chat_loaders.base import BaseChatLoader, ChatSession
from langchain.schema import messages from langchain.schema import messages
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
class WhatsAppChatLoader(chat_loaders.BaseChatLoader): class WhatsAppChatLoader(BaseChatLoader):
"""Load `WhatsApp` conversations from a dump zip file or directory.""" """Load `WhatsApp` conversations from a dump zip file or directory."""
def __init__(self, path: str): def __init__(self, path: str):
@ -42,7 +42,7 @@ class WhatsAppChatLoader(chat_loaders.BaseChatLoader):
flags=re.IGNORECASE, flags=re.IGNORECASE,
) )
def _load_single_chat_session(self, file_path: str) -> chat_loaders.ChatSession: def _load_single_chat_session(self, file_path: str) -> ChatSession:
"""Load a single chat session from a file. """Load a single chat session from a file.
Args: Args:
@ -84,7 +84,7 @@ class WhatsAppChatLoader(chat_loaders.BaseChatLoader):
) )
else: else:
logger.debug(f"Could not parse line: {line}") logger.debug(f"Could not parse line: {line}")
return chat_loaders.ChatSession(messages=results) return ChatSession(messages=results)
def _iterate_files(self, path: str) -> Iterator[str]: def _iterate_files(self, path: str) -> Iterator[str]:
"""Iterate over the files in a directory or zip file. """Iterate over the files in a directory or zip file.
@ -108,7 +108,7 @@ class WhatsAppChatLoader(chat_loaders.BaseChatLoader):
if file.endswith(".txt"): if file.endswith(".txt"):
yield zip_file.extract(file) yield zip_file.extract(file)
def lazy_load(self) -> Iterator[chat_loaders.ChatSession]: def lazy_load(self) -> Iterator[ChatSession]:
"""Lazy load the messages from the chat file and yield """Lazy load the messages from the chat file and yield
them as chat sessions. them as chat sessions.