diff --git a/docs/modules/indexes/document_loaders/examples/slack_directory.ipynb b/docs/modules/indexes/document_loaders/examples/slack_directory.ipynb deleted file mode 100644 index bf2a5395..00000000 --- a/docs/modules/indexes/document_loaders/examples/slack_directory.ipynb +++ /dev/null @@ -1,85 +0,0 @@ -{ - "cells": [ - { - "attachments": {}, - "cell_type": "markdown", - "id": "1dc7df1d", - "metadata": {}, - "source": [ - "# Slack\n", - "\n", - "This notebook covers how to load documents from a Slack export dumped locally.\n", - "\n", - "In order to get this Slack dump, follow these instructions:\n", - "\n", - "## 🧑 Instructions for ingesting your own dataset\n", - "\n", - "Export your Slack data. You can do this by going to your Workspace Management page and clicking the Import/Export option ({your_slack_domain}.slack.com/services/export). Then, choose the right date range and click `Start export`. Slack will send you an email and a DM when the export is ready.\n", - "\n", - "The download will produce a `.zip` file in your Downloads folder (or wherever your downloads can be found, depending on your OS configuration).\n", - "\n", - "Run the following command to unzip the zip file (replace the `Export...` with your own file name as needed) or unzip using built-in tools.\n", - "\n", - "```shell\n", - "unzip xxx.zip -d Slack_Exports\n", - "```\n", - "\n", - "Once ready, move the directory to the directory you are running this notebook from." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "007c5cbf", - "metadata": {}, - "outputs": [], - "source": [ - "from langchain.document_loaders import SLackDirectoryLoader" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a1caec59", - "metadata": {}, - "outputs": [], - "source": [ - "# Optionally set your Slack URL. This will give you proper URLs in sources which is very convernient.\n", - "SLACK_WORKSPACE_URL = \"https://xxx.slack.com\"\n", - "\n", - "loader = (\"Slack_Exports\", SLACK_WORKSPACE_URL)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b1c30ff7", - "metadata": {}, - "outputs": [], - "source": [ - "docs = loader.load()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.9" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/langchain/document_loaders/slack_directory.py b/langchain/document_loaders/slack_directory.py deleted file mode 100644 index df0a1f11..00000000 --- a/langchain/document_loaders/slack_directory.py +++ /dev/null @@ -1,57 +0,0 @@ -import json -from pathlib import Path -from typing import Dict, List, Optional - -from langchain.docstore.document import Document -from langchain.document_loaders.base import BaseLoader - - -class SlackDirectoryLoader(BaseLoader): - """Loader that loads documents from Slack directory dump.""" - - def __init__(self, path: str, workspace_url: Optional[str] = None): - """Initialize with path and optional workspace URL. Including the URL will turn sources into links.""" - self.file_path = path - self.workspace_url = workspace_url - self.channel_id_map = self._get_channel_id_map() - - def _get_channel_id_map(self) -> Dict[str, str]: - """Get a dictionary mapping channel names to their respective IDs.""" - channels_json_path = Path(self.file_path) / "channels.json" - if channels_json_path.exists(): - with open(channels_json_path, encoding="utf-8") as f: - channels = json.load(f) - return {channel["name"]: channel["id"] for channel in channels} - return {} - - def load(self) -> List[Document]: - """Load documents.""" - channel_paths = list(Path(self.file_path).glob("*")) - docs = [] - for channel_path in channel_paths: - if channel_path.is_dir(): - channel_name = channel_path.name - json_files = list(channel_path.glob("*.json")) - for json_file in json_files: - with open(json_file, encoding='utf-8') as f: - messages = json.load(f) - for message in messages: - text = message.get("text", "") - timestamp = message.get("ts") - user = message.get("user") - if self.workspace_url: - channel_id = self.channel_id_map.get( - channel_name, "") - message_link = f"{self.workspace_url}/archives/{channel_id}/p{timestamp.replace('.', '')}" - source = message_link - else: - source = f"{channel_name} - {user} - {timestamp}" - metadata = { - "source": source, - "channel": channel_name, - "timestamp": timestamp, - "user": user, - } - docs.append( - Document(page_content=text, metadata=metadata)) - return docs