Revert "Add Slack Directory Loader (#2835)" (#2839)

This reverts commit a6f767ae7a. To fix the linting error.
2023-04-13 09:42:54 -07:00 · 2023-04-13 09:42:54 -07:00 · 2db9b7a45d
commit 2db9b7a45d
parent 802363eb6a
2 changed files with 0 additions and 142 deletions
--- a/docs/modules/indexes/document_loaders/examples/slack_directory.ipynb
+++ b/docs/modules/indexes/document_loaders/examples/slack_directory.ipynb
@ -1,85 +0,0 @@
-{
- "cells": [
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "id": "1dc7df1d",
-   "metadata": {},
-   "source": [
-    "# Slack\n",
-    "\n",
-    "This notebook covers how to load documents from a Slack export dumped locally.\n",
-    "\n",
-    "In order to get this Slack dump, follow these instructions:\n",
-    "\n",
-    "## 🧑 Instructions for ingesting your own dataset\n",
-    "\n",
-    "Export your Slack data. You can do this by going to your Workspace Management page and clicking the Import/Export option ({your_slack_domain}.slack.com/services/export). Then, choose the right date range and click `Start export`. Slack will send you an email and a DM when the export is ready.\n",
-    "\n",
-    "The download will produce a `.zip` file in your Downloads folder (or wherever your downloads can be found, depending on your OS configuration).\n",
-    "\n",
-    "Run the following command to unzip the zip file (replace the `Export...` with your own file name as needed) or unzip using built-in tools.\n",
-    "\n",
-    "```shell\n",
-    "unzip xxx.zip -d Slack_Exports\n",
-    "```\n",
-    "\n",
-    "Once ready, move the directory to the directory you are running this notebook from."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "007c5cbf",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from langchain.document_loaders import SLackDirectoryLoader"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "a1caec59",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Optionally set your Slack URL. This will give you proper URLs in sources which is very convernient.\n",
-    "SLACK_WORKSPACE_URL = \"https://xxx.slack.com\"\n",
-    "\n",
-    "loader = (\"Slack_Exports\", SLACK_WORKSPACE_URL)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "b1c30ff7",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "docs = loader.load()"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.10.9"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
--- a/langchain/document_loaders/slack_directory.py
+++ b/langchain/document_loaders/slack_directory.py
@ -1,57 +0,0 @@
-import json
-from pathlib import Path
-from typing import Dict, List, Optional
-
-from langchain.docstore.document import Document
-from langchain.document_loaders.base import BaseLoader
-
-
-class SlackDirectoryLoader(BaseLoader):
-    """Loader that loads documents from Slack directory dump."""
-
-    def __init__(self, path: str, workspace_url: Optional[str] = None):
-        """Initialize with path and optional workspace URL. Including the URL will turn sources into links."""
-        self.file_path = path
-        self.workspace_url = workspace_url
-        self.channel_id_map = self._get_channel_id_map()
-
-    def _get_channel_id_map(self) -> Dict[str, str]:
-        """Get a dictionary mapping channel names to their respective IDs."""
-        channels_json_path = Path(self.file_path) / "channels.json"
-        if channels_json_path.exists():
-            with open(channels_json_path, encoding="utf-8") as f:
-                channels = json.load(f)
-            return {channel["name"]: channel["id"] for channel in channels}
-        return {}
-
-    def load(self) -> List[Document]:
-        """Load documents."""
-        channel_paths = list(Path(self.file_path).glob("*"))
-        docs = []
-        for channel_path in channel_paths:
-            if channel_path.is_dir():
-                channel_name = channel_path.name
-                json_files = list(channel_path.glob("*.json"))
-                for json_file in json_files:
-                    with open(json_file, encoding='utf-8') as f:
-                        messages = json.load(f)
-                    for message in messages:
-                        text = message.get("text", "")
-                        timestamp = message.get("ts")
-                        user = message.get("user")
-                        if self.workspace_url:
-                            channel_id = self.channel_id_map.get(
-                                channel_name, "")
-                            message_link = f"{self.workspace_url}/archives/{channel_id}/p{timestamp.replace('.', '')}"
-                            source = message_link
-                        else:
-                            source = f"{channel_name} - {user} - {timestamp}"
-                        metadata = {
-                            "source": source,
-                            "channel": channel_name,
-                            "timestamp": timestamp,
-                            "user": user,
-                        }
-                        docs.append(
-                            Document(page_content=text, metadata=metadata))
-        return docs