mirror of
https://github.com/hwchase17/langchain
synced 2024-11-06 03:20:49 +00:00
chat_loaders
refactoring (#10381)
Replaced unnecessary namespace renaming `from langchain.chat_loaders import base as chat_loaders` with `from langchain.chat_loaders.base import BaseChatLoader, ChatSession` and simplified correspondent types. @eyurtsev
This commit is contained in:
parent
40d9191955
commit
90504fc499
@ -4,13 +4,13 @@ from pathlib import Path
|
|||||||
from typing import TYPE_CHECKING, Iterator, List, Optional, Union
|
from typing import TYPE_CHECKING, Iterator, List, Optional, Union
|
||||||
|
|
||||||
from langchain import schema
|
from langchain import schema
|
||||||
from langchain.chat_loaders import base as chat_loaders
|
from langchain.chat_loaders.base import BaseChatLoader, ChatSession
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
import sqlite3
|
import sqlite3
|
||||||
|
|
||||||
|
|
||||||
class IMessageChatLoader(chat_loaders.BaseChatLoader):
|
class IMessageChatLoader(BaseChatLoader):
|
||||||
"""Load chat sessions from the `iMessage` chat.db SQLite file.
|
"""Load chat sessions from the `iMessage` chat.db SQLite file.
|
||||||
|
|
||||||
It only works on macOS when you have iMessage enabled and have the chat.db file.
|
It only works on macOS when you have iMessage enabled and have the chat.db file.
|
||||||
@ -18,8 +18,8 @@ class IMessageChatLoader(chat_loaders.BaseChatLoader):
|
|||||||
The chat.db file is likely located at ~/Library/Messages/chat.db. However, your
|
The chat.db file is likely located at ~/Library/Messages/chat.db. However, your
|
||||||
terminal may not have permission to access this file. To resolve this, you can
|
terminal may not have permission to access this file. To resolve this, you can
|
||||||
copy the file to a different location, change the permissions of the file, or
|
copy the file to a different location, change the permissions of the file, or
|
||||||
grant full disk access for your terminal emulator in System Settings > Security
|
grant full disk access for your terminal emulator
|
||||||
and Privacy > Full Disk Access.
|
in System Settings > Security and Privacy > Full Disk Access.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, path: Optional[Union[str, Path]] = None):
|
def __init__(self, path: Optional[Union[str, Path]] = None):
|
||||||
@ -46,7 +46,7 @@ class IMessageChatLoader(chat_loaders.BaseChatLoader):
|
|||||||
|
|
||||||
def _load_single_chat_session(
|
def _load_single_chat_session(
|
||||||
self, cursor: "sqlite3.Cursor", chat_id: int
|
self, cursor: "sqlite3.Cursor", chat_id: int
|
||||||
) -> chat_loaders.ChatSession:
|
) -> ChatSession:
|
||||||
"""
|
"""
|
||||||
Load a single chat session from the iMessage chat.db.
|
Load a single chat session from the iMessage chat.db.
|
||||||
|
|
||||||
@ -83,9 +83,9 @@ class IMessageChatLoader(chat_loaders.BaseChatLoader):
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
return chat_loaders.ChatSession(messages=results)
|
return ChatSession(messages=results)
|
||||||
|
|
||||||
def lazy_load(self) -> Iterator[chat_loaders.ChatSession]:
|
def lazy_load(self) -> Iterator[ChatSession]:
|
||||||
"""
|
"""
|
||||||
Lazy load the chat sessions from the iMessage chat.db
|
Lazy load the chat sessions from the iMessage chat.db
|
||||||
and yield them in the required format.
|
and yield them in the required format.
|
||||||
|
@ -6,12 +6,12 @@ from pathlib import Path
|
|||||||
from typing import Dict, Iterator, List, Union
|
from typing import Dict, Iterator, List, Union
|
||||||
|
|
||||||
from langchain import schema
|
from langchain import schema
|
||||||
from langchain.chat_loaders import base as chat_loaders
|
from langchain.chat_loaders.base import BaseChatLoader, ChatSession
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class SlackChatLoader(chat_loaders.BaseChatLoader):
|
class SlackChatLoader(BaseChatLoader):
|
||||||
"""Load `Slack` conversations from a dump zip file."""
|
"""Load `Slack` conversations from a dump zip file."""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
@ -27,9 +27,7 @@ class SlackChatLoader(chat_loaders.BaseChatLoader):
|
|||||||
if not self.zip_path.exists():
|
if not self.zip_path.exists():
|
||||||
raise FileNotFoundError(f"File {self.zip_path} not found")
|
raise FileNotFoundError(f"File {self.zip_path} not found")
|
||||||
|
|
||||||
def _load_single_chat_session(
|
def _load_single_chat_session(self, messages: List[Dict]) -> ChatSession:
|
||||||
self, messages: List[Dict]
|
|
||||||
) -> chat_loaders.ChatSession:
|
|
||||||
results: List[Union[schema.AIMessage, schema.HumanMessage]] = []
|
results: List[Union[schema.AIMessage, schema.HumanMessage]] = []
|
||||||
previous_sender = None
|
previous_sender = None
|
||||||
for message in messages:
|
for message in messages:
|
||||||
@ -62,7 +60,7 @@ class SlackChatLoader(chat_loaders.BaseChatLoader):
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
previous_sender = sender
|
previous_sender = sender
|
||||||
return chat_loaders.ChatSession(messages=results)
|
return ChatSession(messages=results)
|
||||||
|
|
||||||
def _read_json(self, zip_file: zipfile.ZipFile, file_path: str) -> List[dict]:
|
def _read_json(self, zip_file: zipfile.ZipFile, file_path: str) -> List[dict]:
|
||||||
"""Read JSON data from a zip subfile."""
|
"""Read JSON data from a zip subfile."""
|
||||||
@ -72,7 +70,7 @@ class SlackChatLoader(chat_loaders.BaseChatLoader):
|
|||||||
raise ValueError(f"Expected list of dictionaries, got {type(data)}")
|
raise ValueError(f"Expected list of dictionaries, got {type(data)}")
|
||||||
return data
|
return data
|
||||||
|
|
||||||
def lazy_load(self) -> Iterator[chat_loaders.ChatSession]:
|
def lazy_load(self) -> Iterator[ChatSession]:
|
||||||
"""
|
"""
|
||||||
Lazy load the chat sessions from the Slack dump file and yield them
|
Lazy load the chat sessions from the Slack dump file and yield them
|
||||||
in the required format.
|
in the required format.
|
||||||
|
@ -7,12 +7,12 @@ from pathlib import Path
|
|||||||
from typing import Iterator, List, Union
|
from typing import Iterator, List, Union
|
||||||
|
|
||||||
from langchain import schema
|
from langchain import schema
|
||||||
from langchain.chat_loaders import base as chat_loaders
|
from langchain.chat_loaders.base import BaseChatLoader, ChatSession
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class TelegramChatLoader(chat_loaders.BaseChatLoader):
|
class TelegramChatLoader(BaseChatLoader):
|
||||||
"""Load `telegram` conversations to LangChain chat messages.
|
"""Load `telegram` conversations to LangChain chat messages.
|
||||||
|
|
||||||
To export, use the Telegram Desktop app from
|
To export, use the Telegram Desktop app from
|
||||||
@ -35,16 +35,14 @@ class TelegramChatLoader(chat_loaders.BaseChatLoader):
|
|||||||
"""
|
"""
|
||||||
self.path = path if isinstance(path, str) else str(path)
|
self.path = path if isinstance(path, str) else str(path)
|
||||||
|
|
||||||
def _load_single_chat_session_html(
|
def _load_single_chat_session_html(self, file_path: str) -> ChatSession:
|
||||||
self, file_path: str
|
|
||||||
) -> chat_loaders.ChatSession:
|
|
||||||
"""Load a single chat session from an HTML file.
|
"""Load a single chat session from an HTML file.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
file_path (str): Path to the HTML file.
|
file_path (str): Path to the HTML file.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
chat_loaders.ChatSession: The loaded chat session.
|
ChatSession: The loaded chat session.
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
@ -81,18 +79,16 @@ class TelegramChatLoader(chat_loaders.BaseChatLoader):
|
|||||||
)
|
)
|
||||||
previous_sender = from_name
|
previous_sender = from_name
|
||||||
|
|
||||||
return chat_loaders.ChatSession(messages=results)
|
return ChatSession(messages=results)
|
||||||
|
|
||||||
def _load_single_chat_session_json(
|
def _load_single_chat_session_json(self, file_path: str) -> ChatSession:
|
||||||
self, file_path: str
|
|
||||||
) -> chat_loaders.ChatSession:
|
|
||||||
"""Load a single chat session from a JSON file.
|
"""Load a single chat session from a JSON file.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
file_path (str): Path to the JSON file.
|
file_path (str): Path to the JSON file.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
chat_loaders.ChatSession: The loaded chat session.
|
ChatSession: The loaded chat session.
|
||||||
"""
|
"""
|
||||||
with open(file_path, "r", encoding="utf-8") as file:
|
with open(file_path, "r", encoding="utf-8") as file:
|
||||||
data = json.load(file)
|
data = json.load(file)
|
||||||
@ -114,7 +110,7 @@ class TelegramChatLoader(chat_loaders.BaseChatLoader):
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
return chat_loaders.ChatSession(messages=results)
|
return ChatSession(messages=results)
|
||||||
|
|
||||||
def _iterate_files(self, path: str) -> Iterator[str]:
|
def _iterate_files(self, path: str) -> Iterator[str]:
|
||||||
"""Iterate over files in a directory or zip file.
|
"""Iterate over files in a directory or zip file.
|
||||||
@ -139,12 +135,12 @@ class TelegramChatLoader(chat_loaders.BaseChatLoader):
|
|||||||
with tempfile.TemporaryDirectory() as temp_dir:
|
with tempfile.TemporaryDirectory() as temp_dir:
|
||||||
yield zip_file.extract(file, path=temp_dir)
|
yield zip_file.extract(file, path=temp_dir)
|
||||||
|
|
||||||
def lazy_load(self) -> Iterator[chat_loaders.ChatSession]:
|
def lazy_load(self) -> Iterator[ChatSession]:
|
||||||
"""Lazy load the messages from the chat file and yield them
|
"""Lazy load the messages from the chat file and yield them
|
||||||
in as chat sessions.
|
in as chat sessions.
|
||||||
|
|
||||||
Yields:
|
Yields:
|
||||||
chat_loaders.ChatSession: The loaded chat session.
|
ChatSession: The loaded chat session.
|
||||||
"""
|
"""
|
||||||
for file_path in self._iterate_files(self.path):
|
for file_path in self._iterate_files(self.path):
|
||||||
if file_path.endswith(".html"):
|
if file_path.endswith(".html"):
|
||||||
|
@ -5,13 +5,13 @@ import zipfile
|
|||||||
from typing import Iterator, List, Union
|
from typing import Iterator, List, Union
|
||||||
|
|
||||||
from langchain import schema
|
from langchain import schema
|
||||||
from langchain.chat_loaders import base as chat_loaders
|
from langchain.chat_loaders.base import BaseChatLoader, ChatSession
|
||||||
from langchain.schema import messages
|
from langchain.schema import messages
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class WhatsAppChatLoader(chat_loaders.BaseChatLoader):
|
class WhatsAppChatLoader(BaseChatLoader):
|
||||||
"""Load `WhatsApp` conversations from a dump zip file or directory."""
|
"""Load `WhatsApp` conversations from a dump zip file or directory."""
|
||||||
|
|
||||||
def __init__(self, path: str):
|
def __init__(self, path: str):
|
||||||
@ -42,7 +42,7 @@ class WhatsAppChatLoader(chat_loaders.BaseChatLoader):
|
|||||||
flags=re.IGNORECASE,
|
flags=re.IGNORECASE,
|
||||||
)
|
)
|
||||||
|
|
||||||
def _load_single_chat_session(self, file_path: str) -> chat_loaders.ChatSession:
|
def _load_single_chat_session(self, file_path: str) -> ChatSession:
|
||||||
"""Load a single chat session from a file.
|
"""Load a single chat session from a file.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
@ -84,7 +84,7 @@ class WhatsAppChatLoader(chat_loaders.BaseChatLoader):
|
|||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
logger.debug(f"Could not parse line: {line}")
|
logger.debug(f"Could not parse line: {line}")
|
||||||
return chat_loaders.ChatSession(messages=results)
|
return ChatSession(messages=results)
|
||||||
|
|
||||||
def _iterate_files(self, path: str) -> Iterator[str]:
|
def _iterate_files(self, path: str) -> Iterator[str]:
|
||||||
"""Iterate over the files in a directory or zip file.
|
"""Iterate over the files in a directory or zip file.
|
||||||
@ -108,7 +108,7 @@ class WhatsAppChatLoader(chat_loaders.BaseChatLoader):
|
|||||||
if file.endswith(".txt"):
|
if file.endswith(".txt"):
|
||||||
yield zip_file.extract(file)
|
yield zip_file.extract(file)
|
||||||
|
|
||||||
def lazy_load(self) -> Iterator[chat_loaders.ChatSession]:
|
def lazy_load(self) -> Iterator[ChatSession]:
|
||||||
"""Lazy load the messages from the chat file and yield
|
"""Lazy load the messages from the chat file and yield
|
||||||
them as chat sessions.
|
them as chat sessions.
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user