From 5eec74d9a5435c671382e69412072a8725b2ec60 Mon Sep 17 00:00:00 2001
From: Leonid Ganeline <leo.gan.57@gmail.com>
Date: Mon, 10 Jul 2023 08:56:53 -0700
Subject: [PATCH] docstrings `document_loaders` 3 (#6937)

- Updated docstrings for `document_loaders`
- Mass update `"""Loader that loads` to `"""Loads`

@baskaryan  - please, review
---
 langchain/document_loaders/acreom.py          |  2 +-
 langchain/document_loaders/airbyte_json.py    |  4 +-
 langchain/document_loaders/azlyrics.py        |  4 +-
 langchain/document_loaders/bilibili.py        |  2 +-
 langchain/document_loaders/blackboard.py      |  2 +-
 langchain/document_loaders/chatgpt.py         |  3 +-
 .../document_loaders/college_confidential.py  |  4 +-
 langchain/document_loaders/epub.py            |  2 +-
 langchain/document_loaders/excel.py           |  2 +-
 langchain/document_loaders/facebook_chat.py   |  2 +-
 langchain/document_loaders/figma.py           |  2 +-
 langchain/document_loaders/gitbook.py         |  2 +-
 langchain/document_loaders/googledrive.py     |  2 +-
 langchain/document_loaders/hn.py              |  2 +-
 langchain/document_loaders/ifixit.py          |  2 +-
 langchain/document_loaders/image.py           |  2 +-
 langchain/document_loaders/mastodon.py        |  6 +--
 langchain/document_loaders/mhtml.py           |  7 +--
 langchain/document_loaders/modern_treasury.py | 10 ++++
 langchain/document_loaders/notebook.py        | 33 +++++++++++--
 langchain/document_loaders/notion.py          |  6 +--
 langchain/document_loaders/notiondb.py        |  9 +++-
 langchain/document_loaders/obsidian.py        | 13 +++--
 langchain/document_loaders/odt.py             | 12 ++++-
 langchain/document_loaders/onedrive.py        |  9 +++-
 langchain/document_loaders/onedrive_file.py   |  5 ++
 langchain/document_loaders/open_city_data.py  | 16 ++++--
 langchain/document_loaders/org_mode.py        | 10 +++-
 langchain/document_loaders/pdf.py             | 49 ++++++++++++++-----
 langchain/document_loaders/powerpoint.py      |  4 +-
 langchain/document_loaders/psychic.py         | 12 +++--
 .../document_loaders/pyspark_dataframe.py     | 12 ++++-
 langchain/document_loaders/python.py          |  5 ++
 langchain/document_loaders/readthedocs.py     |  8 +--
 .../document_loaders/recursive_url_loader.py  | 18 +++++--
 langchain/document_loaders/reddit.py          | 16 +++++-
 langchain/document_loaders/roam.py            |  6 +--
 langchain/document_loaders/rst.py             | 12 ++++-
 langchain/document_loaders/rtf.py             | 12 ++++-
 langchain/document_loaders/s3_directory.py    | 11 +++--
 langchain/document_loaders/s3_file.py         | 11 +++--
 langchain/document_loaders/sitemap.py         | 14 ++++--
 langchain/document_loaders/slack_directory.py |  2 +-
 .../document_loaders/snowflake_loader.py      |  3 +-
 langchain/document_loaders/spreedly.py        |  6 +++
 langchain/document_loaders/srt.py             |  2 +-
 langchain/document_loaders/stripe.py          |  6 +++
 langchain/document_loaders/telegram.py        | 19 +++++--
 langchain/document_loaders/tomarkdown.py      |  4 +-
 langchain/document_loaders/trello.py          |  2 +-
 langchain/document_loaders/whatsapp_chat.py   |  2 +-
 langchain/document_loaders/word_document.py   |  2 +-
 langchain/document_loaders/xml.py             |  2 +-
 langchain/document_loaders/youtube.py         |  6 +--
 54 files changed, 316 insertions(+), 105 deletions(-)

diff --git a/langchain/document_loaders/acreom.py b/langchain/document_loaders/acreom.py
index e43a72751c..b157c4d4e7 100644
--- a/langchain/document_loaders/acreom.py
+++ b/langchain/document_loaders/acreom.py
@@ -1,4 +1,4 @@
-"""Loader that loads acreom vault from a directory."""
+"""Loads acreom vault from a directory."""
 import re
 from pathlib import Path
 from typing import Iterator, List
diff --git a/langchain/document_loaders/airbyte_json.py b/langchain/document_loaders/airbyte_json.py
index f92b4e408c..b9033b39f2 100644
--- a/langchain/document_loaders/airbyte_json.py
+++ b/langchain/document_loaders/airbyte_json.py
@@ -1,4 +1,4 @@
-"""Loader that loads local airbyte json files."""
+"""Loads local airbyte json files."""
 import json
 from typing import List
 
@@ -8,7 +8,7 @@ from langchain.utils import stringify_dict
 
 
 class AirbyteJSONLoader(BaseLoader):
-    """Loader that loads local airbyte json files."""
+    """Loads local airbyte json files."""
 
     def __init__(self, file_path: str):
         """Initialize with a file path. This should start with '/tmp/airbyte_local/'."""
diff --git a/langchain/document_loaders/azlyrics.py b/langchain/document_loaders/azlyrics.py
index 219ea5a873..d7b6fe28ba 100644
--- a/langchain/document_loaders/azlyrics.py
+++ b/langchain/document_loaders/azlyrics.py
@@ -1,4 +1,4 @@
-"""Loader that loads AZLyrics."""
+"""Loads AZLyrics."""
 from typing import List
 
 from langchain.docstore.document import Document
@@ -6,7 +6,7 @@ from langchain.document_loaders.web_base import WebBaseLoader
 
 
 class AZLyricsLoader(WebBaseLoader):
-    """Loader that loads AZLyrics webpages."""
+    """Loads AZLyrics webpages."""
 
     def load(self) -> List[Document]:
         """Load webpages into Documents."""
diff --git a/langchain/document_loaders/bilibili.py b/langchain/document_loaders/bilibili.py
index d7fe3c8062..d3269de0c6 100644
--- a/langchain/document_loaders/bilibili.py
+++ b/langchain/document_loaders/bilibili.py
@@ -10,7 +10,7 @@ from langchain.document_loaders.base import BaseLoader
 
 
 class BiliBiliLoader(BaseLoader):
-    """Loader that loads bilibili transcripts."""
+    """Loads bilibili transcripts."""
 
     def __init__(self, video_urls: List[str]):
         """Initialize with bilibili url.
diff --git a/langchain/document_loaders/blackboard.py b/langchain/document_loaders/blackboard.py
index 2564a648dc..0ab6ca11be 100644
--- a/langchain/document_loaders/blackboard.py
+++ b/langchain/document_loaders/blackboard.py
@@ -1,4 +1,4 @@
-"""Loader that loads all documents from a blackboard course."""
+"""Loads all documents from a blackboard course."""
 import contextlib
 import re
 from pathlib import Path
diff --git a/langchain/document_loaders/chatgpt.py b/langchain/document_loaders/chatgpt.py
index d281b2ee13..5dbb90585f 100644
--- a/langchain/document_loaders/chatgpt.py
+++ b/langchain/document_loaders/chatgpt.py
@@ -1,3 +1,4 @@
+"""Load conversations from ChatGPT data export"""
 import datetime
 import json
 from typing import List
@@ -31,7 +32,7 @@ class ChatGPTLoader(BaseLoader):
     """Load conversations from exported ChatGPT data."""
 
     def __init__(self, log_file: str, num_logs: int = -1):
-        """
+        """Initialize a class object.
 
         Args:
             log_file: Path to the log file
diff --git a/langchain/document_loaders/college_confidential.py b/langchain/document_loaders/college_confidential.py
index 6094046984..5763fdcdf0 100644
--- a/langchain/document_loaders/college_confidential.py
+++ b/langchain/document_loaders/college_confidential.py
@@ -1,4 +1,4 @@
-"""Loader that loads College Confidential."""
+"""Loads College Confidential."""
 from typing import List
 
 from langchain.docstore.document import Document
@@ -6,7 +6,7 @@ from langchain.document_loaders.web_base import WebBaseLoader
 
 
 class CollegeConfidentialLoader(WebBaseLoader):
-    """Loader that loads College Confidential webpages."""
+    """Loads College Confidential webpages."""
 
     def load(self) -> List[Document]:
         """Load webpages as Documents."""
diff --git a/langchain/document_loaders/epub.py b/langchain/document_loaders/epub.py
index 91ea1a14a9..f99f0a9d7b 100644
--- a/langchain/document_loaders/epub.py
+++ b/langchain/document_loaders/epub.py
@@ -1,4 +1,4 @@
-"""Loader that loads EPub files."""
+"""Loads EPub files."""
 from typing import List
 
 from langchain.document_loaders.unstructured import (
diff --git a/langchain/document_loaders/excel.py b/langchain/document_loaders/excel.py
index 946430f0c6..619a082f5b 100644
--- a/langchain/document_loaders/excel.py
+++ b/langchain/document_loaders/excel.py
@@ -1,4 +1,4 @@
-"""Loader that loads Microsoft Excel files."""
+"""Loads Microsoft Excel files."""
 from typing import Any, List
 
 from langchain.document_loaders.unstructured import (
diff --git a/langchain/document_loaders/facebook_chat.py b/langchain/document_loaders/facebook_chat.py
index 53ec5b216a..58de558a81 100644
--- a/langchain/document_loaders/facebook_chat.py
+++ b/langchain/document_loaders/facebook_chat.py
@@ -1,4 +1,4 @@
-"""Loader that loads Facebook chat json dump."""
+"""Loads Facebook chat json dump."""
 import datetime
 import json
 from pathlib import Path
diff --git a/langchain/document_loaders/figma.py b/langchain/document_loaders/figma.py
index 37b842719b..ff67c9c45c 100644
--- a/langchain/document_loaders/figma.py
+++ b/langchain/document_loaders/figma.py
@@ -1,4 +1,4 @@
-"""Loader that loads Figma files json dump."""
+"""Loads Figma files json dump."""
 import json
 import urllib.request
 from typing import Any, List
diff --git a/langchain/document_loaders/gitbook.py b/langchain/document_loaders/gitbook.py
index f0bf88d757..e293c70c0a 100644
--- a/langchain/document_loaders/gitbook.py
+++ b/langchain/document_loaders/gitbook.py
@@ -1,4 +1,4 @@
-"""Loader that loads GitBook."""
+"""Loads GitBook."""
 from typing import Any, List, Optional
 from urllib.parse import urljoin, urlparse
 
diff --git a/langchain/document_loaders/googledrive.py b/langchain/document_loaders/googledrive.py
index cc4b3f1d6b..4538b469d0 100644
--- a/langchain/document_loaders/googledrive.py
+++ b/langchain/document_loaders/googledrive.py
@@ -1,4 +1,4 @@
-"""Loader that loads data from Google Drive."""
+"""Loads data from Google Drive."""
 
 # Prerequisites:
 # 1. Create a Google Cloud project
diff --git a/langchain/document_loaders/hn.py b/langchain/document_loaders/hn.py
index d786285f70..34b284648b 100644
--- a/langchain/document_loaders/hn.py
+++ b/langchain/document_loaders/hn.py
@@ -1,4 +1,4 @@
-"""Loader that loads Hacker News."""
+"""Loads HN."""
 from typing import Any, List
 
 from langchain.docstore.document import Document
diff --git a/langchain/document_loaders/ifixit.py b/langchain/document_loaders/ifixit.py
index 8e3f42c8cd..1669dace99 100644
--- a/langchain/document_loaders/ifixit.py
+++ b/langchain/document_loaders/ifixit.py
@@ -1,4 +1,4 @@
-"""Loader that loads iFixit data."""
+"""Loads iFixit data."""
 from typing import List, Optional
 
 import requests
diff --git a/langchain/document_loaders/image.py b/langchain/document_loaders/image.py
index 9732495d42..6954d04dea 100644
--- a/langchain/document_loaders/image.py
+++ b/langchain/document_loaders/image.py
@@ -1,4 +1,4 @@
-"""Loader that loads image files."""
+"""Loads image files."""
 from typing import List
 
 from langchain.document_loaders.unstructured import UnstructuredFileLoader
diff --git a/langchain/document_loaders/mastodon.py b/langchain/document_loaders/mastodon.py
index ef64cf463a..ae353790cd 100644
--- a/langchain/document_loaders/mastodon.py
+++ b/langchain/document_loaders/mastodon.py
@@ -37,13 +37,13 @@ class MastodonTootsLoader(BaseLoader):
 
         Args:
             mastodon_accounts: The list of Mastodon accounts to query.
-            number_toots: How many toots to pull for each account. Default is 100.
+            number_toots: How many toots to pull for each account. Defaults to 100.
             exclude_replies: Whether to exclude reply toots from the load.
-                Default is False.
+                Defaults to False.
             access_token: An access token if toots are loaded as a Mastodon app. Can
                 also be specified via the environment variables "MASTODON_ACCESS_TOKEN".
             api_base_url: A Mastodon API base URL to talk to, if not using the default.
-                Default is "https://mastodon.social".
+                Defaults to "https://mastodon.social".
         """
         mastodon = _dependable_mastodon_import()
         access_token = access_token or os.environ.get("MASTODON_ACCESS_TOKEN")
diff --git a/langchain/document_loaders/mhtml.py b/langchain/document_loaders/mhtml.py
index 6f1a4699ea..4def89a2dd 100644
--- a/langchain/document_loaders/mhtml.py
+++ b/langchain/document_loaders/mhtml.py
@@ -24,10 +24,11 @@ class MHTMLLoader(BaseLoader):
         to pass to the BeautifulSoup object.
 
         Args:
-            file_path: The path to the file to load.
+            file_path: Path to file to load.
             open_encoding: The encoding to use when opening the file.
-            bs_kwargs: soup kwargs to pass to the BeautifulSoup object.
-            get_text_separator: The separator to use when getting text from the soup.
+            bs_kwargs: Any kwargs to pass to the BeautifulSoup object.
+            get_text_separator: The separator to use when getting the text
+                from the soup.
         """
         try:
             import bs4  # noqa:F401
diff --git a/langchain/document_loaders/modern_treasury.py b/langchain/document_loaders/modern_treasury.py
index d981d330f2..2d12d6bd8e 100644
--- a/langchain/document_loaders/modern_treasury.py
+++ b/langchain/document_loaders/modern_treasury.py
@@ -35,6 +35,16 @@ class ModernTreasuryLoader(BaseLoader):
         organization_id: Optional[str] = None,
         api_key: Optional[str] = None,
     ) -> None:
+        """
+
+        Args:
+            resource: The Modern Treasury resource to load.
+            organization_id: The Modern Treasury organization ID. It can also be
+               specified via the environment variable
+               "MODERN_TREASURY_ORGANIZATION_ID".
+            api_key: The Modern Treasury API key. It can also be specified via
+               the environment variable "MODERN_TREASURY_API_KEY".
+        """
         self.resource = resource
         organization_id = organization_id or get_from_env(
             "organization_id", "MODERN_TREASURY_ORGANIZATION_ID"
diff --git a/langchain/document_loaders/notebook.py b/langchain/document_loaders/notebook.py
index afc572cb35..19522c2a5c 100644
--- a/langchain/document_loaders/notebook.py
+++ b/langchain/document_loaders/notebook.py
@@ -1,4 +1,4 @@
-"""Loader that loads .ipynb notebook files."""
+"""Loads .ipynb notebook files."""
 import json
 from pathlib import Path
 from typing import Any, List
@@ -10,7 +10,18 @@ from langchain.document_loaders.base import BaseLoader
 def concatenate_cells(
     cell: dict, include_outputs: bool, max_output_length: int, traceback: bool
 ) -> str:
-    """Combine cells information in a readable format ready to be used."""
+    """Combine cells information in a readable format ready to be used.
+
+    Args:
+        cell: A dictionary
+        include_outputs: Whether to include the outputs of the cell.
+        max_output_length: Maximum length of the output to be displayed.
+        traceback: Whether to return a traceback of the error.
+
+    Returns:
+        A string with the cell information.
+
+    """
     cell_type = cell["cell_type"]
     source = cell["source"]
     output = cell["outputs"]
@@ -45,7 +56,7 @@ def concatenate_cells(
 
 
 def remove_newlines(x: Any) -> Any:
-    """Remove recursively newlines, no matter the data structure they are stored in."""
+    """Recursively removes newlines, no matter the data structure they are stored in."""
     import pandas as pd
 
     if isinstance(x, str):
@@ -59,7 +70,7 @@ def remove_newlines(x: Any) -> Any:
 
 
 class NotebookLoader(BaseLoader):
-    """Loader that loads .ipynb notebook files."""
+    """Loads .ipynb notebook files."""
 
     def __init__(
         self,
@@ -69,7 +80,19 @@ class NotebookLoader(BaseLoader):
         remove_newline: bool = False,
         traceback: bool = False,
     ):
-        """Initialize with path."""
+        """Initialize with path.
+
+        Args:
+            path: The path to load the notebook from.
+            include_outputs: Whether to include the outputs of the cell.
+                Defaults to False.
+            max_output_length: Maximum length of the output to be displayed.
+                Defaults to 10.
+            remove_newline: Whether to remove newlines from the notebook.
+                Defaults to False.
+            traceback: Whether to return a traceback of the error.
+                Defaults to False.
+        """
         self.file_path = path
         self.include_outputs = include_outputs
         self.max_output_length = max_output_length
diff --git a/langchain/document_loaders/notion.py b/langchain/document_loaders/notion.py
index f5d83bf9ee..4801052558 100644
--- a/langchain/document_loaders/notion.py
+++ b/langchain/document_loaders/notion.py
@@ -1,4 +1,4 @@
-"""Loader that loads Notion directory dump."""
+"""Loads Notion directory dump."""
 from pathlib import Path
 from typing import List
 
@@ -7,10 +7,10 @@ from langchain.document_loaders.base import BaseLoader
 
 
 class NotionDirectoryLoader(BaseLoader):
-    """Loader that loads Notion directory dump."""
+    """Loads Notion directory dump."""
 
     def __init__(self, path: str):
-        """Initialize with path."""
+        """Initialize with a file path."""
         self.file_path = path
 
     def load(self) -> List[Document]:
diff --git a/langchain/document_loaders/notiondb.py b/langchain/document_loaders/notiondb.py
index 9a666eeab5..6a09668466 100644
--- a/langchain/document_loaders/notiondb.py
+++ b/langchain/document_loaders/notiondb.py
@@ -15,11 +15,12 @@ BLOCK_URL = NOTION_BASE_URL + "/blocks/{block_id}/children"
 
 class NotionDBLoader(BaseLoader):
     """Notion DB Loader.
-    Reads content from pages within a Noton Database.
+    Reads content from pages within a Notion Database.
     Args:
         integration_token (str): Notion integration token.
         database_id (str): Notion database id.
         request_timeout_sec (int): Timeout for Notion requests in seconds.
+            Defaults to 10.
     """
 
     def __init__(
@@ -75,7 +76,11 @@ class NotionDBLoader(BaseLoader):
         return pages
 
     def load_page(self, page_summary: Dict[str, Any]) -> Document:
-        """Read a page."""
+        """Read a page.
+
+        Args:
+            page_summary: Page summary from Notion API.
+        """
         page_id = page_summary["id"]
 
         # load properties as metadata
diff --git a/langchain/document_loaders/obsidian.py b/langchain/document_loaders/obsidian.py
index cee046e954..fd43b07543 100644
--- a/langchain/document_loaders/obsidian.py
+++ b/langchain/document_loaders/obsidian.py
@@ -1,4 +1,4 @@
-"""Loader that loads Obsidian directory dump."""
+"""Loads Obsidian directory dump."""
 import re
 from pathlib import Path
 from typing import List
@@ -8,14 +8,21 @@ from langchain.document_loaders.base import BaseLoader
 
 
 class ObsidianLoader(BaseLoader):
-    """Loader that loads Obsidian files from disk."""
+    """Loads Obsidian files from disk."""
 
     FRONT_MATTER_REGEX = re.compile(r"^---\n(.*?)\n---\n", re.MULTILINE | re.DOTALL)
 
     def __init__(
         self, path: str, encoding: str = "UTF-8", collect_metadata: bool = True
     ):
-        """Initialize with path."""
+        """Initialize with a path.
+
+        Args:
+            path: Path to the directory containing the Obsidian files.
+            encoding: Charset encoding, defaults to "UTF-8"
+            collect_metadata: Whether to collect metadata from the front matter.
+                Defaults to True.
+        """
         self.file_path = path
         self.encoding = encoding
         self.collect_metadata = collect_metadata
diff --git a/langchain/document_loaders/odt.py b/langchain/document_loaders/odt.py
index b8eedb314d..63685e10f1 100644
--- a/langchain/document_loaders/odt.py
+++ b/langchain/document_loaders/odt.py
@@ -1,4 +1,4 @@
-"""Loader that loads Open Office ODT files."""
+"""Loads OpenOffice ODT files."""
 from typing import Any, List
 
 from langchain.document_loaders.unstructured import (
@@ -8,11 +8,19 @@ from langchain.document_loaders.unstructured import (
 
 
 class UnstructuredODTLoader(UnstructuredFileLoader):
-    """Loader that uses unstructured to load open office ODT files."""
+    """Loader that uses unstructured to load OpenOffice ODT files."""
 
     def __init__(
         self, file_path: str, mode: str = "single", **unstructured_kwargs: Any
     ):
+        """
+
+        Args:
+            file_path: The path to the file to load.
+            mode: The mode to use when loading the file. Can be one of "single",
+                "multi", or "all". Default is "single".
+            **unstructured_kwargs: Any kwargs to pass to the unstructured.
+        """
         validate_unstructured_version(min_unstructured_version="0.6.3")
         super().__init__(file_path=file_path, mode=mode, **unstructured_kwargs)
 
diff --git a/langchain/document_loaders/onedrive.py b/langchain/document_loaders/onedrive.py
index 4129f80f50..d5f2fedfc6 100644
--- a/langchain/document_loaders/onedrive.py
+++ b/langchain/document_loaders/onedrive.py
@@ -1,4 +1,4 @@
-"""Loader that loads data from OneDrive"""
+"""Loads data from OneDrive"""
 from __future__ import annotations
 
 import logging
@@ -60,11 +60,18 @@ class _SupportedFileTypes(BaseModel):
 
 
 class OneDriveLoader(BaseLoader, BaseModel):
+    """Loads data from OneDrive."""
+
     settings: _OneDriveSettings = Field(default_factory=_OneDriveSettings)
+    """ The settings for the OneDrive API client."""
     drive_id: str = Field(...)
+    """ The ID of the OneDrive drive to load data from."""
     folder_path: Optional[str] = None
+    """ The path to the folder to load data from."""
     object_ids: Optional[List[str]] = None
+    """ The IDs of the objects to load data from."""
     auth_with_token: bool = False
+    """ Whether to authenticate with a token or not. Defaults to False."""
 
     def _auth(self) -> Type[Account]:
         """
diff --git a/langchain/document_loaders/onedrive_file.py b/langchain/document_loaders/onedrive_file.py
index 8bdf157371..c83a216196 100644
--- a/langchain/document_loaders/onedrive_file.py
+++ b/langchain/document_loaders/onedrive_file.py
@@ -16,10 +16,15 @@ CHUNK_SIZE = 1024 * 1024 * 5
 
 
 class OneDriveFileLoader(BaseLoader, BaseModel):
+    """Loads a file from OneDrive."""
+
     file: File = Field(...)
+    """The file to load."""
 
     class Config:
         arbitrary_types_allowed = True
+        """Allow arbitrary types. This is needed for the File type. Default is True.
+         See https://pydantic-docs.helpmanual.io/usage/types/#arbitrary-types-allowed"""
 
     def load(self) -> List[Document]:
         """Load Documents"""
diff --git a/langchain/document_loaders/open_city_data.py b/langchain/document_loaders/open_city_data.py
index a5af89b41c..03801d7995 100644
--- a/langchain/document_loaders/open_city_data.py
+++ b/langchain/document_loaders/open_city_data.py
@@ -5,13 +5,19 @@ from langchain.document_loaders.base import BaseLoader
 
 
 class OpenCityDataLoader(BaseLoader):
-    """Loader that loads Open city data."""
+    """Loads Open City data."""
 
     def __init__(self, city_id: str, dataset_id: str, limit: int):
-        """Initialize with dataset_id"""
-        """ Example: https://dev.socrata.com/foundry/data.sfgov.org/vw6y-z8j6 """
-        """ e.g., city_id = data.sfgov.org """
-        """ e.g., dataset_id = vw6y-z8j6 """
+        """Initialize with dataset_id.
+        Example: https://dev.socrata.com/foundry/data.sfgov.org/vw6y-z8j6
+        e.g., city_id = data.sfgov.org
+        e.g., dataset_id = vw6y-z8j6
+
+        Args:
+            city_id: The Open City city identifier.
+            dataset_id: The Open City dataset identifier.
+            limit: The maximum number of documents to load.
+        """
         self.city_id = city_id
         self.dataset_id = dataset_id
         self.limit = limit
diff --git a/langchain/document_loaders/org_mode.py b/langchain/document_loaders/org_mode.py
index 15c4a2c090..022cfca02b 100644
--- a/langchain/document_loaders/org_mode.py
+++ b/langchain/document_loaders/org_mode.py
@@ -1,4 +1,4 @@
-"""Loader that loads Org-Mode files."""
+"""Loads Org-Mode files."""
 from typing import Any, List
 
 from langchain.document_loaders.unstructured import (
@@ -13,6 +13,14 @@ class UnstructuredOrgModeLoader(UnstructuredFileLoader):
     def __init__(
         self, file_path: str, mode: str = "single", **unstructured_kwargs: Any
     ):
+        """
+
+        Args:
+            file_path: The path to the file to load.
+            mode: The mode to load the file from. Default is "single".
+            **unstructured_kwargs: Any additional keyword arguments to pass
+                to the unstructured.
+        """
         validate_unstructured_version(min_unstructured_version="0.7.9")
         super().__init__(file_path=file_path, mode=mode, **unstructured_kwargs)
 
diff --git a/langchain/document_loaders/pdf.py b/langchain/document_loaders/pdf.py
index 8dc035a44f..f8a5a3bcc9 100644
--- a/langchain/document_loaders/pdf.py
+++ b/langchain/document_loaders/pdf.py
@@ -1,4 +1,4 @@
-"""Loader that loads PDF files."""
+"""Loads PDF files."""
 import json
 import logging
 import os
@@ -41,11 +41,11 @@ class BasePDFLoader(BaseLoader, ABC):
     """Base loader class for PDF files.
 
     Defaults to check for local file, but if the file is a web path, it will download it
-    to a temporary file, and use that, then clean up the temporary file after completion
+    to a temporary file, use it, then clean up the temporary file after completion
     """
 
     def __init__(self, file_path: str):
-        """Initialize with file path."""
+        """Initialize with a file path."""
         self.file_path = file_path
         self.web_path = None
         if "~" in self.file_path:
@@ -86,7 +86,7 @@ class BasePDFLoader(BaseLoader, ABC):
 
 
 class OnlinePDFLoader(BasePDFLoader):
-    """Loader that loads online PDFs."""
+    """Loads online PDFs."""
 
     def load(self) -> List[Document]:
         """Load documents."""
@@ -97,13 +97,13 @@ class OnlinePDFLoader(BasePDFLoader):
 class PyPDFLoader(BasePDFLoader):
     """Loads a PDF with pypdf and chunks at character level.
 
-    Loader also stores page numbers in metadatas.
+    Loader also stores page numbers in metadata.
     """
 
     def __init__(
         self, file_path: str, password: Optional[Union[str, bytes]] = None
     ) -> None:
-        """Initialize with file path."""
+        """Initialize with a file path."""
         try:
             import pypdf  # noqa:F401
         except ImportError:
@@ -129,7 +129,7 @@ class PyPDFium2Loader(BasePDFLoader):
     """Loads a PDF with pypdfium2 and chunks at character level."""
 
     def __init__(self, file_path: str):
-        """Initialize with file path."""
+        """Initialize with a file path."""
         super().__init__(file_path)
         self.parser = PyPDFium2Parser()
 
@@ -148,7 +148,7 @@ class PyPDFium2Loader(BasePDFLoader):
 class PyPDFDirectoryLoader(BaseLoader):
     """Loads a directory with PDF files with pypdf and chunks at character level.
 
-    Loader also stores page numbers in metadatas.
+    Loader also stores page numbers in metadata.
     """
 
     def __init__(
@@ -222,7 +222,7 @@ class PDFMinerPDFasHTMLLoader(BasePDFLoader):
     """Loader that uses PDFMiner to load PDF files as HTML content."""
 
     def __init__(self, file_path: str):
-        """Initialize with file path."""
+        """Initialize with a file path."""
         try:
             from pdfminer.high_level import extract_text_to_fp  # noqa:F401
         except ImportError:
@@ -256,7 +256,7 @@ class PyMuPDFLoader(BasePDFLoader):
     """Loader that uses PyMuPDF to load PDF files."""
 
     def __init__(self, file_path: str) -> None:
-        """Initialize with file path."""
+        """Initialize with a file path."""
         try:
             import fitz  # noqa:F401
         except ImportError:
@@ -278,6 +278,8 @@ class PyMuPDFLoader(BasePDFLoader):
 # MathpixPDFLoader implementation taken largely from Daniel Gross's:
 # https://gist.github.com/danielgross/3ab4104e14faccc12b49200843adab21
 class MathpixPDFLoader(BasePDFLoader):
+    """This class uses Mathpix service to load PDF files."""
+
     def __init__(
         self,
         file_path: str,
@@ -286,6 +288,16 @@ class MathpixPDFLoader(BasePDFLoader):
         should_clean_pdf: bool = False,
         **kwargs: Any,
     ) -> None:
+        """Initialize with a file path.
+
+        Args:
+            file_path: a file for loading.
+            processed_file_format: a format of the processed file. Default is "mmd".
+            max_wait_time_seconds: a maximum time to wait for the response from
+             the server. Default is 500.
+            should_clean_pdf: a flag to clean the PDF file. Default is False.
+            **kwargs: additional keyword arguments.
+        """
         super().__init__(file_path)
         self.mathpix_api_key = get_from_dict_or_env(
             kwargs, "mathpix_api_key", "MATHPIX_API_KEY"
@@ -324,6 +336,13 @@ class MathpixPDFLoader(BasePDFLoader):
             raise ValueError("Unable to send PDF to Mathpix.")
 
     def wait_for_processing(self, pdf_id: str) -> None:
+        """Wait for processing to complete.
+
+        Args:
+            pdf_id: a PDF id.
+
+        Returns: None
+        """
         url = self.url + "/" + pdf_id
         for _ in range(0, self.max_wait_time_seconds, 5):
             response = requests.get(url, headers=self.headers)
@@ -346,6 +365,14 @@ class MathpixPDFLoader(BasePDFLoader):
         return response.content.decode("utf-8")
 
     def clean_pdf(self, contents: str) -> str:
+        """Clean the PDF file.
+
+        Args:
+            contents: a PDF file contents.
+
+        Returns:
+
+        """
         contents = "\n".join(
             [line for line in contents.split("\n") if not line.startswith("![]")]
         )
@@ -375,7 +402,7 @@ class PDFPlumberLoader(BasePDFLoader):
     def __init__(
         self, file_path: str, text_kwargs: Optional[Mapping[str, Any]] = None
     ) -> None:
-        """Initialize with file path."""
+        """Initialize with a file path."""
         try:
             import pdfplumber  # noqa:F401
         except ImportError:
diff --git a/langchain/document_loaders/powerpoint.py b/langchain/document_loaders/powerpoint.py
index be6e67ab2f..7a00501e67 100644
--- a/langchain/document_loaders/powerpoint.py
+++ b/langchain/document_loaders/powerpoint.py
@@ -1,4 +1,4 @@
-"""Loader that loads powerpoint files."""
+"""Loads PowerPoint files."""
 import os
 from typing import List
 
@@ -6,7 +6,7 @@ from langchain.document_loaders.unstructured import UnstructuredFileLoader
 
 
 class UnstructuredPowerPointLoader(UnstructuredFileLoader):
-    """Loader that uses unstructured to load powerpoint files."""
+    """Loader that uses unstructured to load PowerPoint files."""
 
     def _get_elements(self) -> List:
         from unstructured.__version__ import __version__ as __unstructured_version__
diff --git a/langchain/document_loaders/psychic.py b/langchain/document_loaders/psychic.py
index 43346510e4..d1adee8e3c 100644
--- a/langchain/document_loaders/psychic.py
+++ b/langchain/document_loaders/psychic.py
@@ -1,4 +1,4 @@
-"""Loader that loads documents from Psychic.dev."""
+"""Loads documents from Psychic.dev."""
 from typing import List, Optional
 
 from langchain.docstore.document import Document
@@ -6,12 +6,18 @@ from langchain.document_loaders.base import BaseLoader
 
 
 class PsychicLoader(BaseLoader):
-    """Loader that loads documents from Psychic.dev."""
+    """Loads documents from Psychic.dev."""
 
     def __init__(
         self, api_key: str, account_id: str, connector_id: Optional[str] = None
     ):
-        """Initialize with API key, connector id, and account id."""
+        """Initialize with API key, connector id, and account id.
+
+        Args:
+            api_key: The Psychic API key.
+            account_id: The Psychic account id.
+            connector_id: The Psychic connector id.
+        """
 
         try:
             from psychicapi import ConnectorId, Psychic  # noqa: F401
diff --git a/langchain/document_loaders/pyspark_dataframe.py b/langchain/document_loaders/pyspark_dataframe.py
index c1f186cd34..490926fb10 100644
--- a/langchain/document_loaders/pyspark_dataframe.py
+++ b/langchain/document_loaders/pyspark_dataframe.py
@@ -23,7 +23,15 @@ class PySparkDataFrameLoader(BaseLoader):
         page_content_column: str = "text",
         fraction_of_memory: float = 0.1,
     ):
-        """Initialize with a Spark DataFrame object."""
+        """Initialize with a Spark DataFrame object.
+
+        Args:
+            spark_session: The SparkSession object.
+            df: The Spark DataFrame object.
+            page_content_column: The name of the column containing the page content.
+             Defaults to "text".
+            fraction_of_memory: The fraction of memory to use. Defaults to 0.1.
+        """
         try:
             from pyspark.sql import DataFrame, SparkSession
         except ImportError:
@@ -48,7 +56,7 @@ class PySparkDataFrameLoader(BaseLoader):
         self.column_names = self.df.columns
 
     def get_num_rows(self) -> Tuple[int, int]:
-        """Gets the amount of "feasible" rows for the DataFrame"""
+        """Gets the number of "feasible" rows for the DataFrame"""
         try:
             import psutil
         except ImportError as e:
diff --git a/langchain/document_loaders/python.py b/langchain/document_loaders/python.py
index 65487323f2..e8e238afc6 100644
--- a/langchain/document_loaders/python.py
+++ b/langchain/document_loaders/python.py
@@ -9,6 +9,11 @@ class PythonLoader(TextLoader):
     """
 
     def __init__(self, file_path: str):
+        """Initialize with a file path.
+
+        Args:
+            file_path: The path to the file to load.
+        """
         with open(file_path, "rb") as f:
             encoding, _ = tokenize.detect_encoding(f.readline)
         super().__init__(file_path=file_path, encoding=encoding)
diff --git a/langchain/document_loaders/readthedocs.py b/langchain/document_loaders/readthedocs.py
index 979b4ae173..219364bd7d 100644
--- a/langchain/document_loaders/readthedocs.py
+++ b/langchain/document_loaders/readthedocs.py
@@ -1,4 +1,4 @@
-"""Loader that loads ReadTheDocs documentation directory dump."""
+"""Loads ReadTheDocs documentation directory dump."""
 from pathlib import Path
 from typing import Any, List, Optional, Tuple, Union
 
@@ -7,7 +7,7 @@ from langchain.document_loaders.base import BaseLoader
 
 
 class ReadTheDocsLoader(BaseLoader):
-    """Loader that loads ReadTheDocs documentation directory dump."""
+    """Loads ReadTheDocs documentation directory dump."""
 
     def __init__(
         self,
@@ -20,7 +20,7 @@ class ReadTheDocsLoader(BaseLoader):
         """
         Initialize ReadTheDocsLoader
 
-        The loader loops over all files under `path` and extract the actual content of
+        The loader loops over all files under `path` and extracts the actual content of
         the files by retrieving main html tags. Default main html tags include
         `<main id="main-content>`, <`div role="main>`, and `<article role="main">`. You
         can also define your own html tags by passing custom_html_tag, e.g.
@@ -31,7 +31,7 @@ class ReadTheDocsLoader(BaseLoader):
         Args:
             path: The location of pulled readthedocs folder.
             encoding: The encoding with which to open the documents.
-            errors: Specifies how encoding and decoding errors are to be handled—this
+            errors: Specify how encoding and decoding errors are to be handled—this
                 cannot be used in binary mode.
             custom_html_tag: Optional custom html tag to retrieve the content from
                 files.
diff --git a/langchain/document_loaders/recursive_url_loader.py b/langchain/document_loaders/recursive_url_loader.py
index b1a0250d74..7462d85888 100644
--- a/langchain/document_loaders/recursive_url_loader.py
+++ b/langchain/document_loaders/recursive_url_loader.py
@@ -8,17 +8,27 @@ from langchain.document_loaders.base import BaseLoader
 
 
 class RecursiveUrlLoader(BaseLoader):
-    """Loader that loads all child links from a given url."""
+    """Loads all child links from a given url."""
 
     def __init__(self, url: str, exclude_dirs: Optional[str] = None) -> None:
-        """Initialize with URL to crawl and any sub-directories to exclude."""
+        """Initialize with URL to crawl and any subdirectories to exclude.
+
+        Args:
+            url: The URL to crawl.
+            exclude_dirs: A list of subdirectories to exclude.
+        """
         self.url = url
         self.exclude_dirs = exclude_dirs
 
     def get_child_links_recursive(
         self, url: str, visited: Optional[Set[str]] = None
     ) -> Set[str]:
-        """Recursively get all child links starting with the path of the input URL."""
+        """Recursively get all child links starting with the path of the input URL.
+
+        Args:
+            url: The URL to crawl.
+            visited: A set of visited URLs.
+        """
 
         try:
             from bs4 import BeautifulSoup
@@ -39,7 +49,7 @@ class RecursiveUrlLoader(BaseLoader):
         if not parent_url.endswith("/"):
             parent_url += "/"
 
-        # Exclude the root and parent from list
+        # Exclude the root and parent from a list
         visited = set() if visited is None else visited
 
         # Exclude the links that start with any of the excluded directories
diff --git a/langchain/document_loaders/reddit.py b/langchain/document_loaders/reddit.py
index 80e9fbb599..22fd112db1 100644
--- a/langchain/document_loaders/reddit.py
+++ b/langchain/document_loaders/reddit.py
@@ -23,7 +23,7 @@ def _dependable_praw_import() -> praw:
 class RedditPostsLoader(BaseLoader):
     """Reddit posts loader.
     Read posts on a subreddit.
-    First you need to go to
+    First, you need to go to
     https://www.reddit.com/prefs/apps/
     and create your application
     """
@@ -38,6 +38,20 @@ class RedditPostsLoader(BaseLoader):
         categories: Sequence[str] = ["new"],
         number_posts: Optional[int] = 10,
     ):
+        """
+        Initialize with client_id, client_secret, user_agent, search_queries, mode,
+            categories, number_posts.
+        Example: https://www.reddit.com/r/learnpython/
+
+        Args:
+            client_id: Reddit client id.
+            client_secret: Reddit client secret.
+            user_agent: Reddit user agent.
+            search_queries: The search queries.
+            mode: The mode.
+            categories: The categories. Default: ["new"]
+            number_posts: The number of posts. Default: 10
+        """
         self.client_id = client_id
         self.client_secret = client_secret
         self.user_agent = user_agent
diff --git a/langchain/document_loaders/roam.py b/langchain/document_loaders/roam.py
index ff06885764..136bc116d0 100644
--- a/langchain/document_loaders/roam.py
+++ b/langchain/document_loaders/roam.py
@@ -1,4 +1,4 @@
-"""Loader that loads Roam directory dump."""
+"""Loads Roam directory dump."""
 from pathlib import Path
 from typing import List
 
@@ -7,10 +7,10 @@ from langchain.document_loaders.base import BaseLoader
 
 
 class RoamLoader(BaseLoader):
-    """Loader that loads Roam files from disk."""
+    """Loads Roam files from disk."""
 
     def __init__(self, path: str):
-        """Initialize with path."""
+        """Initialize with a path."""
         self.file_path = path
 
     def load(self) -> List[Document]:
diff --git a/langchain/document_loaders/rst.py b/langchain/document_loaders/rst.py
index 9b20e7bab4..abff302227 100644
--- a/langchain/document_loaders/rst.py
+++ b/langchain/document_loaders/rst.py
@@ -1,4 +1,4 @@
-"""Loader that loads RST files."""
+"""Loads RST files."""
 from typing import Any, List
 
 from langchain.document_loaders.unstructured import (
@@ -13,6 +13,16 @@ class UnstructuredRSTLoader(UnstructuredFileLoader):
     def __init__(
         self, file_path: str, mode: str = "single", **unstructured_kwargs: Any
     ):
+        """
+        Initialize with a file path.
+
+        Args:
+            file_path: The path to the file to load.
+            mode: The mode to use for partitioning. See unstructured for details.
+                Defaults to "single".
+            **unstructured_kwargs: Additional keyword arguments to pass
+                to unstructured.
+        """
         validate_unstructured_version(min_unstructured_version="0.7.5")
         super().__init__(file_path=file_path, mode=mode, **unstructured_kwargs)
 
diff --git a/langchain/document_loaders/rtf.py b/langchain/document_loaders/rtf.py
index c4113be206..3536cd3791 100644
--- a/langchain/document_loaders/rtf.py
+++ b/langchain/document_loaders/rtf.py
@@ -1,4 +1,4 @@
-"""Loader that loads rich text files."""
+"""Loads rich text files."""
 from typing import Any, List
 
 from langchain.document_loaders.unstructured import (
@@ -13,6 +13,16 @@ class UnstructuredRTFLoader(UnstructuredFileLoader):
     def __init__(
         self, file_path: str, mode: str = "single", **unstructured_kwargs: Any
     ):
+        """
+        Initialize with a file path.
+
+        Args:
+            file_path: The path to the file to load.
+            mode: The mode to use for partitioning. See unstructured for details.
+                Defaults to "single".
+            **unstructured_kwargs: Additional keyword arguments to pass
+                to unstructured.
+        """
         min_unstructured_version = "0.5.12"
         if not satisfies_min_unstructured_version(min_unstructured_version):
             raise ValueError(
diff --git a/langchain/document_loaders/s3_directory.py b/langchain/document_loaders/s3_directory.py
index 0c842d0523..60085ee904 100644
--- a/langchain/document_loaders/s3_directory.py
+++ b/langchain/document_loaders/s3_directory.py
@@ -1,4 +1,4 @@
-"""Loading logic for loading documents from an s3 directory."""
+"""Loading logic for loading documents from an AWS S3 directory."""
 from typing import List
 
 from langchain.docstore.document import Document
@@ -7,10 +7,15 @@ from langchain.document_loaders.s3_file import S3FileLoader
 
 
 class S3DirectoryLoader(BaseLoader):
-    """Loading logic for loading documents from s3."""
+    """Loading logic for loading documents from an AWS S3."""
 
     def __init__(self, bucket: str, prefix: str = ""):
-        """Initialize with bucket and key name."""
+        """Initialize with bucket and key name.
+
+        Args:
+            bucket: The name of the S3 bucket.
+            prefix: The prefix of the S3 key. Defaults to "".
+        """
         self.bucket = bucket
         self.prefix = prefix
 
diff --git a/langchain/document_loaders/s3_file.py b/langchain/document_loaders/s3_file.py
index 246b0095b5..28195d8fe6 100644
--- a/langchain/document_loaders/s3_file.py
+++ b/langchain/document_loaders/s3_file.py
@@ -1,4 +1,4 @@
-"""Loading logic for loading documents from an s3 file."""
+"""Loading logic for loading documents from an AWS S3 file."""
 import os
 import tempfile
 from typing import List
@@ -9,10 +9,15 @@ from langchain.document_loaders.unstructured import UnstructuredFileLoader
 
 
 class S3FileLoader(BaseLoader):
-    """Loading logic for loading documents from s3."""
+    """Loading logic for loading documents from an AWS S3 file."""
 
     def __init__(self, bucket: str, key: str):
-        """Initialize with bucket and key name."""
+        """Initialize with bucket and key name.
+
+        Args:
+            bucket: The name of the S3 bucket.
+            key: The key of the S3 object.
+        """
         self.bucket = bucket
         self.key = key
 
diff --git a/langchain/document_loaders/sitemap.py b/langchain/document_loaders/sitemap.py
index 64e3707a3d..68fe88eefb 100644
--- a/langchain/document_loaders/sitemap.py
+++ b/langchain/document_loaders/sitemap.py
@@ -42,11 +42,12 @@ class SitemapLoader(WebBaseLoader):
                 urls that are parsed and loaded
             parsing_function: Function to parse bs4.Soup output
             blocksize: number of sitemap locations per block
-            blocknum: the number of the block that should be loaded - zero indexed
+            blocknum: the number of the block that should be loaded - zero indexed.
+                Default: 0
             meta_function: Function to parse bs4.Soup output for metadata
                 remember when setting this method to also copy metadata["loc"]
                 to metadata["source"] if you are using this field
-            is_local: whether the sitemap is a local file
+            is_local: whether the sitemap is a local file. Default: False
         """
 
         if blocksize is not None and blocksize < 1:
@@ -72,7 +73,14 @@ class SitemapLoader(WebBaseLoader):
         self.is_local = is_local
 
     def parse_sitemap(self, soup: Any) -> List[dict]:
-        """Parse sitemap xml and load into a list of dicts."""
+        """Parse sitemap xml and load into a list of dicts.
+
+        Args:
+            soup: BeautifulSoup object.
+
+        Returns:
+            List of dicts.
+        """
         els = []
         for url in soup.find_all("url"):
             loc = url.find("loc")
diff --git a/langchain/document_loaders/slack_directory.py b/langchain/document_loaders/slack_directory.py
index 718367c4d4..16aa5b4fc5 100644
--- a/langchain/document_loaders/slack_directory.py
+++ b/langchain/document_loaders/slack_directory.py
@@ -9,7 +9,7 @@ from langchain.document_loaders.base import BaseLoader
 
 
 class SlackDirectoryLoader(BaseLoader):
-    """Loader for loading documents from a Slack directory dump."""
+    """Loads documents from a Slack directory dump."""
 
     def __init__(self, zip_path: str, workspace_url: Optional[str] = None):
         """Initialize the SlackDirectoryLoader.
diff --git a/langchain/document_loaders/snowflake_loader.py b/langchain/document_loaders/snowflake_loader.py
index 59164124dc..50219ce35b 100644
--- a/langchain/document_loaders/snowflake_loader.py
+++ b/langchain/document_loaders/snowflake_loader.py
@@ -41,6 +41,7 @@ class SnowflakeLoader(BaseLoader):
             role: Snowflake role.
             database: Snowflake database
             schema: Snowflake schema
+            parameters: Optional. Parameters to pass to the query.
             page_content_columns: Optional. Columns written to Document `page_content`.
             metadata_columns: Optional. Columns written to Document `metadata`.
         """
@@ -62,7 +63,7 @@ class SnowflakeLoader(BaseLoader):
         try:
             import snowflake.connector
         except ImportError as ex:
-            raise ValueError(
+            raise ImportError(
                 "Could not import snowflake-connector-python package. "
                 "Please install it with `pip install snowflake-connector-python`."
             ) from ex
diff --git a/langchain/document_loaders/spreedly.py b/langchain/document_loaders/spreedly.py
index b471341e79..2ec0cfc4c0 100644
--- a/langchain/document_loaders/spreedly.py
+++ b/langchain/document_loaders/spreedly.py
@@ -23,6 +23,12 @@ class SpreedlyLoader(BaseLoader):
     """Loader that fetches data from Spreedly API."""
 
     def __init__(self, access_token: str, resource: str) -> None:
+        """Initialize with an access token and a resource.
+
+        Args:
+            access_token: The access token.
+            resource: The resource.
+        """
         self.access_token = access_token
         self.resource = resource
         self.headers = {
diff --git a/langchain/document_loaders/srt.py b/langchain/document_loaders/srt.py
index ee26d3230a..c6114beba9 100644
--- a/langchain/document_loaders/srt.py
+++ b/langchain/document_loaders/srt.py
@@ -9,7 +9,7 @@ class SRTLoader(BaseLoader):
     """Loader for .srt (subtitle) files."""
 
     def __init__(self, file_path: str):
-        """Initialize with file path."""
+        """Initialize with a file path."""
         try:
             import pysrt  # noqa:F401
         except ImportError:
diff --git a/langchain/document_loaders/stripe.py b/langchain/document_loaders/stripe.py
index efc55824f6..41f978d194 100644
--- a/langchain/document_loaders/stripe.py
+++ b/langchain/document_loaders/stripe.py
@@ -21,6 +21,12 @@ class StripeLoader(BaseLoader):
     """Loader that fetches data from Stripe."""
 
     def __init__(self, resource: str, access_token: Optional[str] = None) -> None:
+        """Initialize with a resource and an access token.
+
+        Args:
+            resource: The resource.
+            access_token: The access token.
+        """
         self.resource = resource
         access_token = access_token or get_from_env(
             "access_token", "STRIPE_ACCESS_TOKEN"
diff --git a/langchain/document_loaders/telegram.py b/langchain/document_loaders/telegram.py
index 3e4bf0e5ea..88225ecc44 100644
--- a/langchain/document_loaders/telegram.py
+++ b/langchain/document_loaders/telegram.py
@@ -1,4 +1,4 @@
-"""Loader that loads Telegram chat json dump."""
+"""Loads Telegram chat json dump."""
 from __future__ import annotations
 
 import asyncio
@@ -24,10 +24,10 @@ def concatenate_rows(row: dict) -> str:
 
 
 class TelegramChatFileLoader(BaseLoader):
-    """Loader that loads Telegram chat json directory dump."""
+    """Loads Telegram chat json directory dump."""
 
     def __init__(self, path: str):
-        """Initialize with path."""
+        """Initialize with a path."""
         self.file_path = path
 
     def load(self) -> List[Document]:
@@ -79,7 +79,7 @@ def text_to_docs(text: Union[str, List[str]]) -> List[Document]:
 
 
 class TelegramChatApiLoader(BaseLoader):
-    """Loader that loads Telegram chat json directory dump."""
+    """Loads Telegram chat json directory dump."""
 
     def __init__(
         self,
@@ -89,7 +89,16 @@ class TelegramChatApiLoader(BaseLoader):
         username: Optional[str] = None,
         file_path: str = "telegram_data.json",
     ):
-        """Initialize with API parameters."""
+        """Initialize with API parameters.
+
+        Args:
+            chat_entity: The chat entity to fetch data from.
+            api_id: The API ID.
+            api_hash: The API hash.
+            username: The username.
+            file_path: The file path to save the data to. Defaults to
+                 "telegram_data.json".
+        """
         self.chat_entity = chat_entity
         self.api_id = api_id
         self.api_hash = api_hash
diff --git a/langchain/document_loaders/tomarkdown.py b/langchain/document_loaders/tomarkdown.py
index a3fbf6f792..00ba512dd2 100644
--- a/langchain/document_loaders/tomarkdown.py
+++ b/langchain/document_loaders/tomarkdown.py
@@ -1,4 +1,4 @@
-"""Loader that loads HTML to markdown using 2markdown."""
+"""Loads HTML to markdown using 2markdown."""
 from __future__ import annotations
 
 from typing import Iterator, List
@@ -10,7 +10,7 @@ from langchain.document_loaders.base import BaseLoader
 
 
 class ToMarkdownLoader(BaseLoader):
-    """Loader that loads HTML to markdown using 2markdown."""
+    """Loads HTML to markdown using 2markdown."""
 
     def __init__(self, url: str, api_key: str):
         """Initialize with url and api key."""
diff --git a/langchain/document_loaders/trello.py b/langchain/document_loaders/trello.py
index 5c24358661..11a59a4f5c 100644
--- a/langchain/document_loaders/trello.py
+++ b/langchain/document_loaders/trello.py
@@ -1,4 +1,4 @@
-"""Loader that loads cards from Trello"""
+"""Loads cards from Trello"""
 from __future__ import annotations
 
 from typing import TYPE_CHECKING, Any, List, Literal, Optional, Tuple
diff --git a/langchain/document_loaders/whatsapp_chat.py b/langchain/document_loaders/whatsapp_chat.py
index b3022a555b..cad93ac8d0 100644
--- a/langchain/document_loaders/whatsapp_chat.py
+++ b/langchain/document_loaders/whatsapp_chat.py
@@ -12,7 +12,7 @@ def concatenate_rows(date: str, sender: str, text: str) -> str:
 
 
 class WhatsAppChatLoader(BaseLoader):
-    """Loader that loads WhatsApp messages text file."""
+    """Loads WhatsApp messages text file."""
 
     def __init__(self, path: str):
         """Initialize with path."""
diff --git a/langchain/document_loaders/word_document.py b/langchain/document_loaders/word_document.py
index f0272b2efe..3ee91ab741 100644
--- a/langchain/document_loaders/word_document.py
+++ b/langchain/document_loaders/word_document.py
@@ -1,4 +1,4 @@
-"""Loader that loads word documents."""
+"""Loads word documents."""
 import os
 import tempfile
 from abc import ABC
diff --git a/langchain/document_loaders/xml.py b/langchain/document_loaders/xml.py
index 78156ee205..4239a49396 100644
--- a/langchain/document_loaders/xml.py
+++ b/langchain/document_loaders/xml.py
@@ -1,4 +1,4 @@
-"""Loader that loads Microsoft Excel files."""
+"""Loads Microsoft Excel files."""
 from typing import Any, List
 
 from langchain.document_loaders.unstructured import (
diff --git a/langchain/document_loaders/youtube.py b/langchain/document_loaders/youtube.py
index b828c3b0b7..86d7c42a8e 100644
--- a/langchain/document_loaders/youtube.py
+++ b/langchain/document_loaders/youtube.py
@@ -1,4 +1,4 @@
-"""Loader that loads YouTube transcript."""
+"""Loads YouTube transcript."""
 from __future__ import annotations
 
 import logging
@@ -140,7 +140,7 @@ def _parse_video_id(url: str) -> Optional[str]:
 
 
 class YoutubeLoader(BaseLoader):
-    """Loader that loads Youtube transcripts."""
+    """Loads Youtube transcripts."""
 
     def __init__(
         self,
@@ -252,7 +252,7 @@ class YoutubeLoader(BaseLoader):
 
 @dataclass
 class GoogleApiYoutubeLoader(BaseLoader):
-    """Loader that loads all Videos from a Channel
+    """Loads all Videos from a Channel
 
     To use, you should have the ``googleapiclient,youtube_transcript_api``
     python package installed.