docstrings: document_loaders consitency 2 (#9148)

This is Part 2. See #9139 (Part 1).
pull/9156/head
Leonid Ganeline 1 year ago committed by GitHub
parent 1b58460fe3
commit 19f504790e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -1,4 +1,3 @@
"""Loads HuggingFace datasets."""
from typing import Iterator, List, Mapping, Optional, Sequence, Union
from langchain.docstore.document import Document
@ -6,7 +5,7 @@ from langchain.document_loaders.base import BaseLoader
class HuggingFaceDatasetLoader(BaseLoader):
"""Load Documents from the Hugging Face Hub."""
"""Load from `Hugging Face Hub` datasets."""
def __init__(
self,

@ -1,4 +1,3 @@
"""Loads iFixit data."""
from typing import List, Optional
import requests
@ -11,7 +10,7 @@ IFIXIT_BASE_URL = "https://www.ifixit.com/api/2.0"
class IFixitLoader(BaseLoader):
"""Load iFixit repair guides, device wikis and answers.
"""Load `iFixit` repair guides, device wikis and answers.
iFixit is the largest, open repair community on the web. The site contains nearly
100k repair manuals, 200k Questions & Answers on 42k devices, and all the data is

@ -1,11 +1,10 @@
"""Loads image files."""
from typing import List
from langchain.document_loaders.unstructured import UnstructuredFileLoader
class UnstructuredImageLoader(UnstructuredFileLoader):
"""Loader that uses Unstructured to load PNG and JPG files.
"""Load `PNG` and `JPG` files using `Unstructured`.
You can run the loader in one of two modes: "single" and "elements".
If you use "single" mode, the document will be returned as a single

@ -1,9 +1,3 @@
"""Loads image captions.
By default, the loader utilizes the pre-trained BLIP image captioning model.
https://huggingface.co/Salesforce/blip-image-captioning-base
"""
from typing import Any, List, Tuple, Union
import requests
@ -13,7 +7,12 @@ from langchain.document_loaders.base import BaseLoader
class ImageCaptionLoader(BaseLoader):
"""Loads the captions of an image"""
"""Load image captions.
By default, the loader utilizes the pre-trained
Salesforce BLIP image captioning model.
https://huggingface.co/Salesforce/blip-image-captioning-base
"""
def __init__(
self,

@ -1,4 +1,3 @@
"""Loads IMSDb."""
from typing import List
from langchain.docstore.document import Document
@ -6,7 +5,7 @@ from langchain.document_loaders.web_base import WebBaseLoader
class IMSDbLoader(WebBaseLoader):
"""Loads IMSDb webpages."""
"""Load `IMSDb` webpages."""
def load(self) -> List[Document]:
"""Load webpage."""

@ -1,4 +1,3 @@
"""Loader that fetches data from IUGU"""
import json
import urllib.request
from typing import List, Optional
@ -17,7 +16,7 @@ IUGU_ENDPOINTS = {
class IuguLoader(BaseLoader):
"""Loader that fetches data from IUGU."""
"""Load from `IUGU`."""
def __init__(self, resource: str, api_token: Optional[str] = None) -> None:
"""Initialize the IUGU resource.

@ -11,8 +11,7 @@ LINK_NOTE_TEMPLATE = "joplin://x-callback-url/openNote?id={id}"
class JoplinLoader(BaseLoader):
"""
Loader that fetches notes from Joplin.
"""Load notes from `Joplin`.
In order to use this loader, you need to have Joplin running with the
Web Clipper enabled (look for "Web Clipper" in the app settings).

@ -1,4 +1,3 @@
"""Loads data from JSON."""
import json
from pathlib import Path
from typing import Any, Callable, Dict, List, Optional, Union
@ -8,7 +7,7 @@ from langchain.document_loaders.base import BaseLoader
class JSONLoader(BaseLoader):
"""Loads a JSON file using a jq schema.
"""Load a `JSON` file using a `jq` schema.
Example:
[{"text": ...}, {"text": ...}, {"text": ...}] -> schema = .[].text

@ -1,4 +1,3 @@
"""Loads LarkSuite (FeiShu) document json dump."""
import json
import urllib.request
from typing import Any, Iterator, List
@ -8,7 +7,7 @@ from langchain.document_loaders.base import BaseLoader
class LarkSuiteDocLoader(BaseLoader):
"""Loads LarkSuite (FeiShu) document."""
"""Load from `LarkSuite` (`FeiShu`)."""
def __init__(self, domain: str, access_token: str, document_id: str):
"""Initialize with domain, access_token (tenant / user), and document_id.

@ -1,11 +1,10 @@
"""Loads Markdown files."""
from typing import List
from langchain.document_loaders.unstructured import UnstructuredFileLoader
class UnstructuredMarkdownLoader(UnstructuredFileLoader):
"""Loader that uses Unstructured to load markdown files.
"""Load `Markdown` files using `Unstructured`.
You can run the loader in one of two modes: "single" and "elements".
If you use "single" mode, the document will be returned as a single

@ -1,4 +1,3 @@
"""Mastodon document loader."""
from __future__ import annotations
import os
@ -23,7 +22,7 @@ def _dependable_mastodon_import() -> mastodon:
class MastodonTootsLoader(BaseLoader):
"""Mastodon toots loader."""
"""Load the `Mastodon` 'toots'."""
def __init__(
self,

@ -8,7 +8,7 @@ from langchain.utilities.max_compute import MaxComputeAPIWrapper
class MaxComputeLoader(BaseLoader):
"""Loads a query result from Alibaba Cloud MaxCompute table into documents."""
"""Load from `Alibaba Cloud MaxCompute` table."""
def __init__(
self,

@ -1,4 +1,3 @@
"""Load Data from a MediaWiki dump xml."""
import logging
from pathlib import Path
from typing import List, Optional, Sequence, Union
@ -10,8 +9,8 @@ logger = logging.getLogger(__name__)
class MWDumpLoader(BaseLoader):
"""
Load MediaWiki dump from XML file
"""Load `MediaWiki` dump from an `XML` file.
Example:
.. code-block:: python

@ -1,5 +1,3 @@
"""Load MHTML files, enriching metadata with page title."""
import email
import logging
from typing import Dict, List, Union
@ -11,7 +9,7 @@ logger = logging.getLogger(__name__)
class MHTMLLoader(BaseLoader):
"""Loader that uses beautiful soup to parse HTML files."""
"""Parse `MHTML` files with `BeautifulSoup`."""
def __init__(
self,

@ -1,4 +1,3 @@
"""Loader that fetches data from Modern Treasury"""
import json
import urllib.request
from base64 import b64encode
@ -27,7 +26,7 @@ incoming_payment_details",
class ModernTreasuryLoader(BaseLoader):
"""Loader that fetches data from Modern Treasury."""
"""Load from `Modern Treasury`."""
def __init__(
self,

@ -9,7 +9,7 @@ logger = logging.getLogger(__name__)
class NewsURLLoader(BaseLoader):
"""Loader that uses newspaper to load news articles from URLs.
"""Load news articles from URLs using `Unstructured`.
Args:
urls: URLs to load. Each is loaded into its own document.

@ -70,7 +70,7 @@ def remove_newlines(x: Any) -> Any:
class NotebookLoader(BaseLoader):
"""Loads .ipynb notebook files."""
"""Load `Jupyter notebook` (.ipynb) files."""
def __init__(
self,
@ -80,7 +80,7 @@ class NotebookLoader(BaseLoader):
remove_newline: bool = False,
traceback: bool = False,
):
"""Initialize with path.
"""Initialize with a path.
Args:
path: The path to load the notebook from.

@ -1,4 +1,3 @@
"""Loads Notion directory dump."""
from pathlib import Path
from typing import List
@ -7,7 +6,7 @@ from langchain.document_loaders.base import BaseLoader
class NotionDirectoryLoader(BaseLoader):
"""Loads Notion directory dump."""
"""Load `Notion directory` dump."""
def __init__(self, path: str):
"""Initialize with a file path."""

@ -1,5 +1,3 @@
"""Notion DB loader for langchain"""
from typing import Any, Dict, List, Optional
import requests
@ -14,7 +12,7 @@ BLOCK_URL = NOTION_BASE_URL + "/blocks/{block_id}/children"
class NotionDBLoader(BaseLoader):
"""Notion DB Loader.
"""Load from `Notion DB`.
Reads content from pages within a Notion Database.
Args:

@ -1,4 +1,3 @@
"""Extract text from any file type."""
import json
import uuid
from typing import List
@ -9,7 +8,7 @@ from langchain.tools.nuclia.tool import NucliaUnderstandingAPI
class NucliaLoader(BaseLoader):
"""Extract text from any file type."""
"""Load from any file type using `Nuclia Understanding API`."""
def __init__(self, path: str, nuclia_tool: NucliaUnderstandingAPI):
self.nua = nuclia_tool

@ -7,7 +7,7 @@ from langchain.document_loaders.obs_file import OBSFileLoader
class OBSDirectoryLoader(BaseLoader):
"""Loading logic for loading documents from Huawei OBS."""
"""Load from `Huawei OBS directory`."""
def __init__(
self,

@ -10,7 +10,7 @@ from langchain.document_loaders.unstructured import UnstructuredFileLoader
class OBSFileLoader(BaseLoader):
"""Loader for Huawei OBS file."""
"""Load from the `Huawei OBS file`."""
def __init__(
self,

@ -1,4 +1,3 @@
"""Loads Obsidian directory dump."""
import re
from pathlib import Path
from typing import List
@ -8,7 +7,7 @@ from langchain.document_loaders.base import BaseLoader
class ObsidianLoader(BaseLoader):
"""Loads Obsidian files from disk."""
"""Load `Obsidian` files from directory."""
FRONT_MATTER_REGEX = re.compile(r"^---\n(.*?)\n---\n", re.MULTILINE | re.DOTALL)

@ -1,4 +1,3 @@
"""Loads OpenOffice ODT files."""
from typing import Any, List
from langchain.document_loaders.unstructured import (
@ -8,7 +7,8 @@ from langchain.document_loaders.unstructured import (
class UnstructuredODTLoader(UnstructuredFileLoader):
"""Loader that uses unstructured to load OpenOffice ODT files.
"""Load `OpenOffice ODT` files using `Unstructured`.
You can run the loader in one of two modes: "single" and "elements".
If you use "single" mode, the document will be returned as a single
langchain Document object. If you use "elements" mode, the unstructured

@ -60,7 +60,7 @@ class _SupportedFileTypes(BaseModel):
class OneDriveLoader(BaseLoader, BaseModel):
"""Loads data from OneDrive."""
"""Load from `Microsoft OneDrive`."""
settings: _OneDriveSettings = Field(default_factory=_OneDriveSettings)
""" The settings for the OneDrive API client."""

@ -16,7 +16,7 @@ CHUNK_SIZE = 1024 * 1024 * 5
class OneDriveFileLoader(BaseLoader, BaseModel):
"""Loads a file from OneDrive."""
"""Load a file from `Microsoft OneDrive`."""
file: File = Field(...)
"""The file to load."""

@ -5,7 +5,7 @@ from langchain.document_loaders.base import BaseLoader
class OpenCityDataLoader(BaseLoader):
"""Loads Open City data."""
"""Load from `Open City`."""
def __init__(self, city_id: str, dataset_id: str, limit: int):
"""Initialize with dataset_id.

@ -1,4 +1,3 @@
"""Loads Org-Mode files."""
from typing import Any, List
from langchain.document_loaders.unstructured import (
@ -8,7 +7,8 @@ from langchain.document_loaders.unstructured import (
class UnstructuredOrgModeLoader(UnstructuredFileLoader):
"""Loader that uses unstructured to load Org-Mode files.
"""Load `Org-Mode` files using `Unstructured`.
You can run the loader in one of two modes: "single" and "elements".
If you use "single" mode, the document will be returned as a single
langchain Document object. If you use "elements" mode, the unstructured

@ -1,4 +1,3 @@
"""Loads PDF files."""
import json
import logging
import os
@ -30,7 +29,8 @@ logger = logging.getLogger(__file__)
class UnstructuredPDFLoader(UnstructuredFileLoader):
"""Loader that uses unstructured to load PDF files.
"""Load `PDF` files using `Unstructured`.
You can run the loader in one of two modes: "single" and "elements".
If you use "single" mode, the document will be returned as a single
langchain Document object. If you use "elements" mode, the unstructured
@ -59,7 +59,7 @@ class UnstructuredPDFLoader(UnstructuredFileLoader):
class BasePDFLoader(BaseLoader, ABC):
"""Base loader class for PDF files.
"""Base Loader class for `PDF` files.
Defaults to check for local file, but if the file is a web path, it will download it
to a temporary file, use it, then clean up the temporary file after completion
@ -122,7 +122,7 @@ class BasePDFLoader(BaseLoader, ABC):
class OnlinePDFLoader(BasePDFLoader):
"""Loads online PDFs."""
"""Load online `PDF`."""
def load(self) -> List[Document]:
"""Load documents."""
@ -131,7 +131,7 @@ class OnlinePDFLoader(BasePDFLoader):
class PyPDFLoader(BasePDFLoader):
"""Loads a PDF with pypdf and chunks at character level.
"""Load `PDF using `pypdf` and chunks at character level.
Loader also stores page numbers in metadata.
"""
@ -162,7 +162,7 @@ class PyPDFLoader(BasePDFLoader):
class PyPDFium2Loader(BasePDFLoader):
"""Loads a PDF with pypdfium2 and chunks at character level."""
"""Load `PDF` using `pypdfium2` and chunks at character level."""
def __init__(self, file_path: str):
"""Initialize with a file path."""
@ -182,7 +182,7 @@ class PyPDFium2Loader(BasePDFLoader):
class PyPDFDirectoryLoader(BaseLoader):
"""Loads a directory with PDF files with pypdf and chunks at character level.
"""Load a directory with `PDF` files using `pypdf` and chunks at character level.
Loader also stores page numbers in metadata.
"""
@ -227,7 +227,7 @@ class PyPDFDirectoryLoader(BaseLoader):
class PDFMinerLoader(BasePDFLoader):
"""Loader that uses PDFMiner to load PDF files."""
"""Load `PDF` files using `PDFMiner`."""
def __init__(self, file_path: str) -> None:
"""Initialize with file path."""
@ -255,7 +255,7 @@ class PDFMinerLoader(BasePDFLoader):
class PDFMinerPDFasHTMLLoader(BasePDFLoader):
"""Loader that uses PDFMiner to load PDF files as HTML content."""
"""Load `PDF` files as HTML content using `PDFMiner`."""
def __init__(self, file_path: str):
"""Initialize with a file path."""
@ -289,7 +289,7 @@ class PDFMinerPDFasHTMLLoader(BasePDFLoader):
class PyMuPDFLoader(BasePDFLoader):
"""Loader that uses PyMuPDF to load PDF files."""
"""Load `PDF` files using `PyMuPDF`."""
def __init__(self, file_path: str) -> None:
"""Initialize with a file path."""
@ -314,7 +314,7 @@ class PyMuPDFLoader(BasePDFLoader):
# MathpixPDFLoader implementation taken largely from Daniel Gross's:
# https://gist.github.com/danielgross/3ab4104e14faccc12b49200843adab21
class MathpixPDFLoader(BasePDFLoader):
"""This class uses Mathpix service to load PDF files."""
"""Load `PDF` files using `Mathpix` service."""
def __init__(
self,
@ -433,7 +433,7 @@ class MathpixPDFLoader(BasePDFLoader):
class PDFPlumberLoader(BasePDFLoader):
"""Loader that uses pdfplumber to load PDF files."""
"""Load `PDF` files using `pdfplumber`."""
def __init__(
self, file_path: str, text_kwargs: Optional[Mapping[str, Any]] = None
@ -459,7 +459,7 @@ class PDFPlumberLoader(BasePDFLoader):
class AmazonTextractPDFLoader(BasePDFLoader):
"""Loads a PDF document from local file system, HTTP or S3.
""" "Load `PDF` files from a local file system, HTTP or S3.
To authenticate, the AWS client uses the following methods to
automatically load credentials:

@ -1,4 +1,3 @@
"""Loads PowerPoint files."""
import os
from typing import List
@ -6,7 +5,8 @@ from langchain.document_loaders.unstructured import UnstructuredFileLoader
class UnstructuredPowerPointLoader(UnstructuredFileLoader):
"""Loader that uses unstructured to load PowerPoint files.
"""Load `Microsoft PowerPoint` files using `Unstructured`.
Works with both .ppt and .pptx files.
You can run the loader in one of two modes: "single" and "elements".
If you use "single" mode, the document will be returned as a single

@ -1,4 +1,3 @@
"""Loads documents from Psychic.dev."""
from typing import List, Optional
from langchain.docstore.document import Document
@ -6,7 +5,7 @@ from langchain.document_loaders.base import BaseLoader
class PsychicLoader(BaseLoader):
"""Loads documents from Psychic.dev."""
"""Load from `Psychic.dev`."""
def __init__(
self, api_key: str, account_id: str, connector_id: Optional[str] = None

@ -6,7 +6,7 @@ from langchain.utilities.pubmed import PubMedAPIWrapper
class PubMedLoader(BaseLoader):
"""Loads a query result from PubMed biomedical library into a list of Documents.
"""Load from the `PubMed` biomedical library.
Attributes:
query: The query to be passed to the PubMed API.

@ -1,4 +1,3 @@
"""Load from a Spark Dataframe object"""
import itertools
import logging
import sys
@ -14,7 +13,7 @@ if TYPE_CHECKING:
class PySparkDataFrameLoader(BaseLoader):
"""Load PySpark DataFrames"""
"""Load `PySpark` DataFrames."""
def __init__(
self,

@ -4,9 +4,7 @@ from langchain.document_loaders.text import TextLoader
class PythonLoader(TextLoader):
"""
Load Python files, respecting any non-default encoding if specified.
"""
"""Load `Python` files, respecting any non-default encoding if specified."""
def __init__(self, file_path: str):
"""Initialize with a file path.

@ -1,4 +1,3 @@
"""Loads ReadTheDocs documentation directory dump."""
from pathlib import Path
from typing import Any, List, Optional, Tuple, Union
@ -7,7 +6,7 @@ from langchain.document_loaders.base import BaseLoader
class ReadTheDocsLoader(BaseLoader):
"""Loads ReadTheDocs documentation directory dump."""
"""Load `ReadTheDocs` documentation directory."""
def __init__(
self,

@ -10,7 +10,7 @@ from langchain.document_loaders.base import BaseLoader
class RecursiveUrlLoader(BaseLoader):
"""Loads all child links from a given url."""
"""Load all child links from a URL page."""
def __init__(
self,

@ -1,4 +1,3 @@
"""Reddit document loader."""
from __future__ import annotations
from typing import TYPE_CHECKING, Iterable, List, Optional, Sequence
@ -21,7 +20,8 @@ def _dependable_praw_import() -> praw:
class RedditPostsLoader(BaseLoader):
"""Reddit posts loader.
"""Load `Reddit` posts.
Read posts on a subreddit.
First, you need to go to
https://www.reddit.com/prefs/apps/

@ -1,4 +1,3 @@
"""Loads Roam directory dump."""
from pathlib import Path
from typing import List
@ -7,7 +6,7 @@ from langchain.document_loaders.base import BaseLoader
class RoamLoader(BaseLoader):
"""Loads Roam files from disk."""
"""Load `Roam` files from a directory."""
def __init__(self, path: str):
"""Initialize with a path."""

@ -17,7 +17,7 @@ class ColumnNotFoundError(Exception):
class RocksetLoader(BaseLoader):
"""Wrapper around Rockset db
"""Load from a `Rockset` database.
To use, you should have the `rockset` python package installed.

@ -1,4 +1,3 @@
"""Loader that uses unstructured to load HTML files."""
import logging
from typing import Any, Iterator, List, Optional, Sequence
@ -10,7 +9,7 @@ logger = logging.getLogger(__name__)
class RSSFeedLoader(BaseLoader):
"""Loader that uses newspaper to load news articles from RSS feeds.
"""Load news articles from `RSS` feeds using `Unstructured`.
Args:
urls: URLs for RSS feeds to load. Each articles in the feed is loaded into its own document.

@ -8,7 +8,8 @@ from langchain.document_loaders.unstructured import (
class UnstructuredRSTLoader(UnstructuredFileLoader):
"""Loader that uses unstructured to load RST files.
"""Load `RST` files using `Unstructured`.
You can run the loader in one of two modes: "single" and "elements".
If you use "single" mode, the document will be returned as a single
langchain Document object. If you use "elements" mode, the unstructured

@ -8,7 +8,8 @@ from langchain.document_loaders.unstructured import (
class UnstructuredRTFLoader(UnstructuredFileLoader):
"""Loader that uses unstructured to load RTF files.
"""Load `RTF` files using `Unstructured`.
You can run the loader in one of two modes: "single" and "elements".
If you use "single" mode, the document will be returned as a single
langchain Document object. If you use "elements" mode, the unstructured

@ -1,4 +1,3 @@
"""Loading logic for loading documents from an AWS S3 directory."""
from typing import List
from langchain.docstore.document import Document
@ -7,7 +6,7 @@ from langchain.document_loaders.s3_file import S3FileLoader
class S3DirectoryLoader(BaseLoader):
"""Loading logic for loading documents from an AWS S3."""
"""Load from `Amazon AWS S3` directory."""
def __init__(self, bucket: str, prefix: str = ""):
"""Initialize with bucket and key name.

@ -1,4 +1,3 @@
"""Loading logic for loading documents from an AWS S3 file."""
import os
import tempfile
from typing import List
@ -9,7 +8,7 @@ from langchain.document_loaders.unstructured import UnstructuredFileLoader
class S3FileLoader(BaseLoader):
"""Loading logic for loading documents from an AWS S3 file."""
"""Load from `Amazon AWS S3` file."""
def __init__(self, bucket: str, key: str):
"""Initialize with bucket and key name.

@ -1,4 +1,3 @@
"""Loader that fetches a sitemap and loads those URLs."""
import itertools
import re
from typing import Any, Callable, Generator, Iterable, List, Optional
@ -22,7 +21,7 @@ def _batch_block(iterable: Iterable, size: int) -> Generator[List[dict], None, N
class SitemapLoader(WebBaseLoader):
"""Loader that fetches a sitemap and loads those URLs."""
"""Load a sitemap and its URLs."""
def __init__(
self,

@ -1,4 +1,3 @@
"""Loader for documents from a Slack export."""
import json
import zipfile
from pathlib import Path
@ -9,7 +8,7 @@ from langchain.document_loaders.base import BaseLoader
class SlackDirectoryLoader(BaseLoader):
"""Loads documents from a Slack directory dump."""
"""Load from a `Slack` directory dump."""
def __init__(self, zip_path: str, workspace_url: Optional[str] = None):
"""Initialize the SlackDirectoryLoader.

@ -7,7 +7,7 @@ from langchain.document_loaders.base import BaseLoader
class SnowflakeLoader(BaseLoader):
"""Loads a query result from Snowflake into a list of documents.
"""Load from `Snowflake` API.
Each document represents one row of the result. The `page_content_columns`
are written into the `page_content` of the document. The `metadata_columns`

@ -1,4 +1,3 @@
"""Loader that fetches data from Spreedly API."""
import json
import urllib.request
from typing import List
@ -20,7 +19,7 @@ SPREEDLY_ENDPOINTS = {
class SpreedlyLoader(BaseLoader):
"""Loader that fetches data from Spreedly API."""
"""Load from `Spreedly` API."""
def __init__(self, access_token: str, resource: str) -> None:
"""Initialize with an access token and a resource.

@ -1,4 +1,3 @@
"""Loader for .srt (subtitle) files."""
from typing import List
from langchain.docstore.document import Document
@ -6,7 +5,7 @@ from langchain.document_loaders.base import BaseLoader
class SRTLoader(BaseLoader):
"""Loader for .srt (subtitle) files."""
"""Load `.srt` (subtitle) files."""
def __init__(self, file_path: str):
"""Initialize with a file path."""

@ -1,4 +1,3 @@
"""Loader that fetches data from Stripe"""
import json
import urllib.request
from typing import List, Optional
@ -18,7 +17,7 @@ STRIPE_ENDPOINTS = {
class StripeLoader(BaseLoader):
"""Loader that fetches data from Stripe."""
"""Load from `Stripe` API."""
def __init__(self, resource: str, access_token: Optional[str] = None) -> None:
"""Initialize with a resource and an access token.

@ -1,4 +1,3 @@
"""Loads Telegram chat json dump."""
from __future__ import annotations
import asyncio
@ -24,7 +23,7 @@ def concatenate_rows(row: dict) -> str:
class TelegramChatFileLoader(BaseLoader):
"""Loads Telegram chat json directory dump."""
"""Load from `Telegram chat` dump."""
def __init__(self, path: str):
"""Initialize with a path."""

@ -1,4 +1,3 @@
"""Loading logic for loading documents from Tencent Cloud COS directory."""
from typing import Any, Iterator, List
from langchain.docstore.document import Document
@ -7,7 +6,7 @@ from langchain.document_loaders.tencent_cos_file import TencentCOSFileLoader
class TencentCOSDirectoryLoader(BaseLoader):
"""Loader for Tencent Cloud COS directory."""
"""Load from `Tencent Cloud COS` directory."""
def __init__(self, conf: Any, bucket: str, prefix: str = ""):
"""Initialize with COS config, bucket and prefix.

@ -1,4 +1,3 @@
"""Loading logic for loading documents from Tencent Cloud COS file."""
import os
import tempfile
from typing import Any, Iterator, List
@ -9,7 +8,7 @@ from langchain.document_loaders.unstructured import UnstructuredFileLoader
class TencentCOSFileLoader(BaseLoader):
"""Loader for Tencent Cloud COS file."""
"""Load from `Tencent Cloud COS` file."""
def __init__(self, conf: Any, bucket: str, key: str):
"""Initialize with COS config, bucket and key name.

@ -6,7 +6,7 @@ from langchain.utilities.tensorflow_datasets import TensorflowDatasets
class TensorflowDatasetLoader(BaseLoader):
"""Loads from TensorFlow Datasets into a list of Documents.
"""Load from `TensorFlow Dataset`.
Attributes:
dataset_name: the name of the dataset to load

@ -9,7 +9,7 @@ logger = logging.getLogger(__name__)
class TextLoader(BaseLoader):
"""Load text files.
"""Load text file.
Args:

@ -1,4 +1,3 @@
"""Loads HTML to markdown using 2markdown."""
from __future__ import annotations
from typing import Iterator, List
@ -10,7 +9,7 @@ from langchain.document_loaders.base import BaseLoader
class ToMarkdownLoader(BaseLoader):
"""Loads HTML to markdown using 2markdown."""
"""Load `HTML` using `2markdown API`."""
def __init__(self, url: str, api_key: str):
"""Initialize with url and api key."""

@ -7,11 +7,10 @@ from langchain.document_loaders.base import BaseLoader
class TomlLoader(BaseLoader):
"""
A TOML document loader that inherits from the BaseLoader class.
"""Load `TOML` files.
This class can be initialized with either a single source file or a source
directory containing TOML files.
It can load a single source file or several files in a single
directory.
"""
def __init__(self, source: Union[str, Path]):

@ -1,4 +1,3 @@
"""Loads cards from Trello"""
from __future__ import annotations
from typing import TYPE_CHECKING, Any, List, Literal, Optional, Tuple
@ -12,7 +11,7 @@ if TYPE_CHECKING:
class TrelloLoader(BaseLoader):
"""Trello loader. Reads all cards from a Trello board."""
"""Load cards from a `Trello` board."""
def __init__(
self,

@ -7,7 +7,9 @@ from langchain.document_loaders.unstructured import (
class UnstructuredTSVLoader(UnstructuredFileLoader):
"""Loader that uses unstructured to load TSV files. Like other
"""Load `TSV` files using `Unstructured`.
Like other
Unstructured loaders, UnstructuredTSVLoader can be used in both
"single" and "elements" mode. If you use the loader in "elements"
mode, the TSV file will be a single Unstructured Table element.

@ -1,4 +1,3 @@
"""Twitter document loader."""
from __future__ import annotations
from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Sequence, Union
@ -22,8 +21,9 @@ def _dependable_tweepy_import() -> tweepy:
class TwitterTweetLoader(BaseLoader):
"""Twitter tweets loader.
Read tweets of user twitter handle.
"""Load `Twitter` tweets.
Read tweets of the user's Twitter handle.
First you need to go to
`https://developer.twitter.com/en/docs/twitter-api

@ -130,7 +130,7 @@ class UnstructuredBaseLoader(BaseLoader, ABC):
class UnstructuredFileLoader(UnstructuredBaseLoader):
"""Loader that uses Unstructured to load files.
"""Load files using `Unstructured`.
The file loader uses the
unstructured partition function and will automatically detect the file
@ -211,7 +211,7 @@ def get_elements_from_api(
class UnstructuredAPIFileLoader(UnstructuredFileLoader):
"""Loader that uses the Unstructured API to load files.
"""Load files using `Unstructured` API.
By default, the loader makes a call to the hosted Unstructured API.
If you are running the unstructured API locally, you can change the
@ -275,7 +275,7 @@ class UnstructuredAPIFileLoader(UnstructuredFileLoader):
class UnstructuredFileIOLoader(UnstructuredBaseLoader):
"""Loader that uses Unstructured to load files.
"""Load files using `Unstructured`.
The file loader
uses the unstructured partition function and will automatically detect the file
@ -322,7 +322,7 @@ class UnstructuredFileIOLoader(UnstructuredBaseLoader):
class UnstructuredAPIFileIOLoader(UnstructuredFileIOLoader):
"""Loader that uses the Unstructured API to load files.
"""Load files using `Unstructured` API.
By default, the loader makes a call to the hosted Unstructured API.
If you are running the unstructured API locally, you can change the

@ -9,7 +9,8 @@ logger = logging.getLogger(__name__)
class UnstructuredURLLoader(BaseLoader):
"""Loader that use Unstructured to load files from remote URLs.
"""Load files from remote URLs using `Unstructured`.
Use the unstructured partition function to detect the MIME type
and route the file to the appropriate partitioner.

@ -10,7 +10,8 @@ logger = logging.getLogger(__name__)
class PlaywrightURLLoader(BaseLoader):
"""Loader that uses Playwright and to load a page and unstructured to load the html.
"""Load `HTML` pages with `Playwright` and parse with `Unstructured`.
This is useful for loading pages that require javascript to render.
Attributes:

@ -13,7 +13,8 @@ logger = logging.getLogger(__name__)
class SeleniumURLLoader(BaseLoader):
"""Loader that uses Selenium and to load a page and unstructured to load the html.
"""Load `HTML` pages with `Selenium` and parse with `Unstructured`.
This is useful for loading pages that require javascript to render.
Attributes:

@ -10,7 +10,7 @@ from langchain.utilities.openweathermap import OpenWeatherMapAPIWrapper
class WeatherDataLoader(BaseLoader):
"""Weather Reader.
"""Load weather data with `Open Weather Map` API.
Reads the forecast & current weather of any location using OpenWeatherMap's free
API. Checkout 'https://openweathermap.org/appid' for more on how to generate a free

@ -37,7 +37,7 @@ def _build_metadata(soup: Any, url: str) -> dict:
class WebBaseLoader(BaseLoader):
"""Loader that uses urllib and beautiful soup to load webpages."""
"""Load HTML pages using `urllib` and parse them with `BeautifulSoup'."""
web_paths: List[str]

@ -12,7 +12,7 @@ def concatenate_rows(date: str, sender: str, text: str) -> str:
class WhatsAppChatLoader(BaseLoader):
"""Loads WhatsApp messages text file."""
"""Load `WhatsApp` messages text file."""
def __init__(self, path: str):
"""Initialize with path."""

@ -6,7 +6,8 @@ from langchain.utilities.wikipedia import WikipediaAPIWrapper
class WikipediaLoader(BaseLoader):
"""Loads a query result from www.wikipedia.org into a list of Documents.
"""Load from `Wikipedia`.
The hard limit on the number of downloaded Documents is 300 for now.
Each wiki page represents one Document.

@ -13,7 +13,7 @@ from langchain.document_loaders.unstructured import UnstructuredFileLoader
class Docx2txtLoader(BaseLoader, ABC):
"""Loads a DOCX with docx2txt and chunks at character level.
"""Load `DOCX` file using `docx2txt` and chunks at character level.
Defaults to check for local file, but if the file is a web path, it will download it
to a temporary file, and use that, then clean up the temporary file after completion
@ -65,7 +65,8 @@ class Docx2txtLoader(BaseLoader, ABC):
class UnstructuredWordDocumentLoader(UnstructuredFileLoader):
"""Loader that uses unstructured to load word documents.
"""Load `Microsof Word` file using `Unstructured`.
Works with both .docx and .doc files.
You can run the loader in one of two modes: "single" and "elements".
If you use "single" mode, the document will be returned as a single

@ -8,7 +8,8 @@ from langchain.document_loaders.unstructured import (
class UnstructuredXMLLoader(UnstructuredFileLoader):
"""Loader that uses unstructured to load XML files.
"""Load `XML` file using `Unstructured`.
You can run the loader in one of two modes: "single" and "elements".
If you use "single" mode, the document will be returned as a single
langchain Document object. If you use "elements" mode, the unstructured

@ -5,7 +5,7 @@ from langchain.document_loaders.base import BaseLoader
class XorbitsLoader(BaseLoader):
"""Load Xorbits DataFrame."""
"""Load `Xorbits` DataFrame."""
def __init__(self, data_frame: Any, page_content_column: str = "text"):
"""Initialize with dataframe object.

@ -140,7 +140,7 @@ def _parse_video_id(url: str) -> Optional[str]:
class YoutubeLoader(BaseLoader):
"""Loads Youtube transcripts."""
"""Load `YouTube` transcripts."""
def __init__(
self,
@ -252,7 +252,7 @@ class YoutubeLoader(BaseLoader):
@dataclass
class GoogleApiYoutubeLoader(BaseLoader):
"""Loads all Videos from a Channel
"""Load all Videos from a `YouTube` Channel.
To use, you should have the ``googleapiclient,youtube_transcript_api``
python package installed.

Loading…
Cancel
Save