mirror of
https://github.com/hwchase17/langchain
synced 2024-11-08 07:10:35 +00:00
docs: update unstructured docstrings (#7561)
### Summary Updates the docstrings in the Unstructured document loaders to display more useful information on the integrations page.
This commit is contained in:
parent
2c2f0e15a6
commit
c4d53f98dc
@ -8,7 +8,27 @@ from langchain.document_loaders.unstructured import (
|
||||
|
||||
|
||||
class UnstructuredEPubLoader(UnstructuredFileLoader):
|
||||
"""Loader that uses unstructured to load epub files."""
|
||||
"""UnstructuredEPubLoader uses unstructured to load EPUB files.
|
||||
You can run the loader in one of two modes: "single" and "elements".
|
||||
If you use "single" mode, the document will be returned as a single
|
||||
langchain Document object. If you use "elements" mode, the unstructured
|
||||
library will split the document into elements such as Title and NarrativeText.
|
||||
You can pass in additional unstructured kwargs after mode to apply
|
||||
different unstructured settings.
|
||||
|
||||
Examples
|
||||
--------
|
||||
from langchain.document_loaders import UnstructuredEPubLoader
|
||||
|
||||
loader = UnstructuredEPubLoader(
|
||||
"example.epub", mode="elements", strategy="fast",
|
||||
)
|
||||
docs = loader.load()
|
||||
|
||||
References
|
||||
----------
|
||||
https://unstructured-io.github.io/unstructured/bricks.html#partition-epub
|
||||
"""
|
||||
|
||||
def _get_elements(self) -> List:
|
||||
min_unstructured_version = "0.5.4"
|
||||
|
@ -5,7 +5,27 @@ from langchain.document_loaders.unstructured import UnstructuredFileLoader
|
||||
|
||||
|
||||
class UnstructuredHTMLLoader(UnstructuredFileLoader):
|
||||
"""Loader that uses unstructured to load HTML files."""
|
||||
"""UnstructuredHTMLLoader uses unstructured to load HTML files.
|
||||
You can run the loader in one of two modes: "single" and "elements".
|
||||
If you use "single" mode, the document will be returned as a single
|
||||
langchain Document object. If you use "elements" mode, the unstructured
|
||||
library will split the document into elements such as Title and NarrativeText.
|
||||
You can pass in additional unstructured kwargs after mode to apply
|
||||
different unstructured settings.
|
||||
|
||||
Examples
|
||||
--------
|
||||
from langchain.document_loaders import UnstructuredHTMLLoader
|
||||
|
||||
loader = UnstructuredHTMLLoader(
|
||||
"example.html", mode="elements", strategy="fast",
|
||||
)
|
||||
docs = loader.load()
|
||||
|
||||
References
|
||||
----------
|
||||
https://unstructured-io.github.io/unstructured/bricks.html#partition-html
|
||||
"""
|
||||
|
||||
def _get_elements(self) -> List:
|
||||
from unstructured.partition.html import partition_html
|
||||
|
@ -5,7 +5,27 @@ from langchain.document_loaders.unstructured import UnstructuredFileLoader
|
||||
|
||||
|
||||
class UnstructuredImageLoader(UnstructuredFileLoader):
|
||||
"""Loader that uses unstructured to load image files, such as PNGs and JPGs."""
|
||||
"""UnstructuredImageLoader uses unstructured to load PNG and JPG files.
|
||||
You can run the loader in one of two modes: "single" and "elements".
|
||||
If you use "single" mode, the document will be returned as a single
|
||||
langchain Document object. If you use "elements" mode, the unstructured
|
||||
library will split the document into elements such as Title and NarrativeText.
|
||||
You can pass in additional unstructured kwargs after mode to apply
|
||||
different unstructured settings.
|
||||
|
||||
Examples
|
||||
--------
|
||||
from langchain.document_loaders import UnstructuredImageLoader
|
||||
|
||||
loader = UnstructuredImageLoader(
|
||||
"example.png", mode="elements", strategy="fast",
|
||||
)
|
||||
docs = loader.load()
|
||||
|
||||
References
|
||||
----------
|
||||
https://unstructured-io.github.io/unstructured/bricks.html#partition-image
|
||||
"""
|
||||
|
||||
def _get_elements(self) -> List:
|
||||
from unstructured.partition.image import partition_image
|
||||
|
@ -5,7 +5,27 @@ from langchain.document_loaders.unstructured import UnstructuredFileLoader
|
||||
|
||||
|
||||
class UnstructuredMarkdownLoader(UnstructuredFileLoader):
|
||||
"""Loader that uses unstructured to load markdown files."""
|
||||
"""UnstructuredMarkdownLoader uses unstructured to load markdown files.
|
||||
You can run the loader in one of two modes: "single" and "elements".
|
||||
If you use "single" mode, the document will be returned as a single
|
||||
langchain Document object. If you use "elements" mode, the unstructured
|
||||
library will split the document into elements such as Title and NarrativeText.
|
||||
You can pass in additional unstructured kwargs after mode to apply
|
||||
different unstructured settings.
|
||||
|
||||
Examples
|
||||
--------
|
||||
from langchain.document_loaders import UnstructuredMarkdownLoader
|
||||
|
||||
loader = UnstructuredMarkdownLoader(
|
||||
"example.md", mode="elements", strategy="fast",
|
||||
)
|
||||
docs = loader.load()
|
||||
|
||||
References
|
||||
----------
|
||||
https://unstructured-io.github.io/unstructured/bricks.html#partition-md
|
||||
"""
|
||||
|
||||
def _get_elements(self) -> List:
|
||||
from unstructured.__version__ import __version__ as __unstructured_version__
|
||||
|
@ -8,7 +8,27 @@ from langchain.document_loaders.unstructured import (
|
||||
|
||||
|
||||
class UnstructuredODTLoader(UnstructuredFileLoader):
|
||||
"""Loader that uses unstructured to load OpenOffice ODT files."""
|
||||
"""Loader that uses unstructured to load OpenOffice ODT files.
|
||||
You can run the loader in one of two modes: "single" and "elements".
|
||||
If you use "single" mode, the document will be returned as a single
|
||||
langchain Document object. If you use "elements" mode, the unstructured
|
||||
library will split the document into elements such as Title and NarrativeText.
|
||||
You can pass in additional unstructured kwargs after mode to apply
|
||||
different unstructured settings.
|
||||
|
||||
Examples
|
||||
--------
|
||||
from langchain.document_loaders import UnstructuredODTLoader
|
||||
|
||||
loader = UnstructuredODTLoader(
|
||||
"example.odt", mode="elements", strategy="fast",
|
||||
)
|
||||
docs = loader.load()
|
||||
|
||||
References
|
||||
----------
|
||||
https://unstructured-io.github.io/unstructured/bricks.html#partition-odt
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self, file_path: str, mode: str = "single", **unstructured_kwargs: Any
|
||||
|
@ -8,7 +8,27 @@ from langchain.document_loaders.unstructured import (
|
||||
|
||||
|
||||
class UnstructuredOrgModeLoader(UnstructuredFileLoader):
|
||||
"""Loader that uses unstructured to load Org-Mode files."""
|
||||
"""Loader that uses unstructured to load Org-Mode files.
|
||||
You can run the loader in one of two modes: "single" and "elements".
|
||||
If you use "single" mode, the document will be returned as a single
|
||||
langchain Document object. If you use "elements" mode, the unstructured
|
||||
library will split the document into elements such as Title and NarrativeText.
|
||||
You can pass in additional unstructured kwargs after mode to apply
|
||||
different unstructured settings.
|
||||
|
||||
Examples
|
||||
--------
|
||||
from langchain.document_loaders import UnstructuredOrgModeLoader
|
||||
|
||||
loader = UnstructuredOrgModeLoader(
|
||||
"example.org", mode="elements", strategy="fast",
|
||||
)
|
||||
docs = loader.load()
|
||||
|
||||
References
|
||||
----------
|
||||
https://unstructured-io.github.io/unstructured/bricks.html#partition-org
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self, file_path: str, mode: str = "single", **unstructured_kwargs: Any
|
||||
|
@ -29,7 +29,27 @@ logger = logging.getLogger(__file__)
|
||||
|
||||
|
||||
class UnstructuredPDFLoader(UnstructuredFileLoader):
|
||||
"""Loader that uses unstructured to load PDF files."""
|
||||
"""Loader that uses unstructured to load PDF files.
|
||||
You can run the loader in one of two modes: "single" and "elements".
|
||||
If you use "single" mode, the document will be returned as a single
|
||||
langchain Document object. If you use "elements" mode, the unstructured
|
||||
library will split the document into elements such as Title and NarrativeText.
|
||||
You can pass in additional unstructured kwargs after mode to apply
|
||||
different unstructured settings.
|
||||
|
||||
Examples
|
||||
--------
|
||||
from langchain.document_loaders import UnstructuredPDFLoader
|
||||
|
||||
loader = UnstructuredPDFLoader(
|
||||
"example.pdf", mode="elements", strategy="fast",
|
||||
)
|
||||
docs = loader.load()
|
||||
|
||||
References
|
||||
----------
|
||||
https://unstructured-io.github.io/unstructured/bricks.html#partition-pdf
|
||||
"""
|
||||
|
||||
def _get_elements(self) -> List:
|
||||
from unstructured.partition.pdf import partition_pdf
|
||||
|
@ -6,7 +6,28 @@ from langchain.document_loaders.unstructured import UnstructuredFileLoader
|
||||
|
||||
|
||||
class UnstructuredPowerPointLoader(UnstructuredFileLoader):
|
||||
"""Loader that uses unstructured to load PowerPoint files."""
|
||||
"""Loader that uses unstructured to load PowerPoint files.
|
||||
Works with both .ppt and .pptx files.
|
||||
You can run the loader in one of two modes: "single" and "elements".
|
||||
If you use "single" mode, the document will be returned as a single
|
||||
langchain Document object. If you use "elements" mode, the unstructured
|
||||
library will split the document into elements such as Title and NarrativeText.
|
||||
You can pass in additional unstructured kwargs after mode to apply
|
||||
different unstructured settings.
|
||||
|
||||
Examples
|
||||
--------
|
||||
from langchain.document_loaders import UnstructuredPowerPointLoader
|
||||
|
||||
loader = UnstructuredPowerPointLoader(
|
||||
"example.pptx", mode="elements", strategy="fast",
|
||||
)
|
||||
docs = loader.load()
|
||||
|
||||
References
|
||||
----------
|
||||
https://unstructured-io.github.io/unstructured/bricks.html#partition-pptx
|
||||
"""
|
||||
|
||||
def _get_elements(self) -> List:
|
||||
from unstructured.__version__ import __version__ as __unstructured_version__
|
||||
|
@ -8,7 +8,27 @@ from langchain.document_loaders.unstructured import (
|
||||
|
||||
|
||||
class UnstructuredRSTLoader(UnstructuredFileLoader):
|
||||
"""Loader that uses unstructured to load RST files."""
|
||||
"""Loader that uses unstructured to load RST files.
|
||||
You can run the loader in one of two modes: "single" and "elements".
|
||||
If you use "single" mode, the document will be returned as a single
|
||||
langchain Document object. If you use "elements" mode, the unstructured
|
||||
library will split the document into elements such as Title and NarrativeText.
|
||||
You can pass in additional unstructured kwargs after mode to apply
|
||||
different unstructured settings.
|
||||
|
||||
Examples
|
||||
--------
|
||||
from langchain.document_loaders import UnstructuredRSTLoader
|
||||
|
||||
loader = UnstructuredRSTLoader(
|
||||
"example.rst", mode="elements", strategy="fast",
|
||||
)
|
||||
docs = loader.load()
|
||||
|
||||
References
|
||||
----------
|
||||
https://unstructured-io.github.io/unstructured/bricks.html#partition-rst
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self, file_path: str, mode: str = "single", **unstructured_kwargs: Any
|
||||
|
@ -8,7 +8,27 @@ from langchain.document_loaders.unstructured import (
|
||||
|
||||
|
||||
class UnstructuredRTFLoader(UnstructuredFileLoader):
|
||||
"""Loader that uses unstructured to load rtf files."""
|
||||
"""Loader that uses unstructured to load RTF files.
|
||||
You can run the loader in one of two modes: "single" and "elements".
|
||||
If you use "single" mode, the document will be returned as a single
|
||||
langchain Document object. If you use "elements" mode, the unstructured
|
||||
library will split the document into elements such as Title and NarrativeText.
|
||||
You can pass in additional unstructured kwargs after mode to apply
|
||||
different unstructured settings.
|
||||
|
||||
Examples
|
||||
--------
|
||||
from langchain.document_loaders import UnstructuredRTFLoader
|
||||
|
||||
loader = UnstructuredRTFLoader(
|
||||
"example.rtf", mode="elements", strategy="fast",
|
||||
)
|
||||
docs = loader.load()
|
||||
|
||||
References
|
||||
----------
|
||||
https://unstructured-io.github.io/unstructured/bricks.html#partition-rtf
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self, file_path: str, mode: str = "single", **unstructured_kwargs: Any
|
||||
|
@ -126,14 +126,12 @@ class UnstructuredFileLoader(UnstructuredBaseLoader):
|
||||
|
||||
Examples
|
||||
--------
|
||||
```python
|
||||
from langchain.document_loaders import UnstructuredFileLoader
|
||||
|
||||
loader = UnstructuredFileLoader(
|
||||
"example.pdf", mode="elements", strategy="fast",
|
||||
)
|
||||
docs = loader.load()
|
||||
```
|
||||
|
||||
References
|
||||
----------
|
||||
@ -211,7 +209,6 @@ class UnstructuredAPIFileLoader(UnstructuredFileLoader):
|
||||
different unstructured settings.
|
||||
|
||||
Examples
|
||||
--------
|
||||
```python
|
||||
from langchain.document_loaders import UnstructuredAPIFileLoader
|
||||
|
||||
@ -219,7 +216,6 @@ class UnstructuredAPIFileLoader(UnstructuredFileLoader):
|
||||
"example.pdf", mode="elements", strategy="fast", api_key="MY_API_KEY",
|
||||
)
|
||||
docs = loader.load()
|
||||
```
|
||||
|
||||
References
|
||||
----------
|
||||
@ -272,7 +268,6 @@ class UnstructuredFileIOLoader(UnstructuredBaseLoader):
|
||||
|
||||
Examples
|
||||
--------
|
||||
```python
|
||||
from langchain.document_loaders import UnstructuredFileIOLoader
|
||||
|
||||
with open("example.pdf", "rb") as f:
|
||||
@ -280,7 +275,6 @@ class UnstructuredFileIOLoader(UnstructuredBaseLoader):
|
||||
f, mode="elements", strategy="fast",
|
||||
)
|
||||
docs = loader.load()
|
||||
```
|
||||
|
||||
|
||||
References
|
||||
@ -324,7 +318,6 @@ class UnstructuredAPIFileIOLoader(UnstructuredFileIOLoader):
|
||||
|
||||
Examples
|
||||
--------
|
||||
```python
|
||||
from langchain.document_loaders import UnstructuredAPIFileLoader
|
||||
|
||||
with open("example.pdf", "rb") as f:
|
||||
@ -332,7 +325,6 @@ class UnstructuredAPIFileIOLoader(UnstructuredFileIOLoader):
|
||||
f, mode="elements", strategy="fast", api_key="MY_API_KEY",
|
||||
)
|
||||
docs = loader.load()
|
||||
```
|
||||
|
||||
References
|
||||
----------
|
||||
|
@ -9,7 +9,30 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class UnstructuredURLLoader(BaseLoader):
|
||||
"""Loader that uses unstructured to load HTML files."""
|
||||
"""Loader that use Unstructured to load files from remote URLs.
|
||||
Use the unstructured partition function to detect the MIME type
|
||||
and route the file to the appropriate partitioner.
|
||||
|
||||
You can run the loader in one of two modes: "single" and "elements".
|
||||
If you use "single" mode, the document will be returned as a single
|
||||
langchain Document object. If you use "elements" mode, the unstructured
|
||||
library will split the document into elements such as Title and NarrativeText.
|
||||
You can pass in additional unstructured kwargs after mode to apply
|
||||
different unstructured settings.
|
||||
|
||||
Examples
|
||||
--------
|
||||
from langchain.document_loaders import UnstructuredURLLoader
|
||||
|
||||
loader = UnstructuredURLLoader(
|
||||
ursl=["<url-1>", "<url-2>"], mode="elements", strategy="fast",
|
||||
)
|
||||
docs = loader.load()
|
||||
|
||||
References
|
||||
----------
|
||||
https://unstructured-io.github.io/unstructured/bricks.html#partition
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
|
@ -65,7 +65,28 @@ class Docx2txtLoader(BaseLoader, ABC):
|
||||
|
||||
|
||||
class UnstructuredWordDocumentLoader(UnstructuredFileLoader):
|
||||
"""Loader that uses unstructured to load word documents."""
|
||||
"""Loader that uses unstructured to load word documents.
|
||||
Works with both .docx and .doc files.
|
||||
You can run the loader in one of two modes: "single" and "elements".
|
||||
If you use "single" mode, the document will be returned as a single
|
||||
langchain Document object. If you use "elements" mode, the unstructured
|
||||
library will split the document into elements such as Title and NarrativeText.
|
||||
You can pass in additional unstructured kwargs after mode to apply
|
||||
different unstructured settings.
|
||||
|
||||
Examples
|
||||
--------
|
||||
from langchain.document_loaders import UnstructuredWordDocumentLoader
|
||||
|
||||
loader = UnstructuredWordDocumentLoader(
|
||||
"example.docx", mode="elements", strategy="fast",
|
||||
)
|
||||
docs = loader.load()
|
||||
|
||||
References
|
||||
----------
|
||||
https://unstructured-io.github.io/unstructured/bricks.html#partition-docx
|
||||
"""
|
||||
|
||||
def _get_elements(self) -> List:
|
||||
from unstructured.__version__ import __version__ as __unstructured_version__
|
||||
|
@ -8,7 +8,27 @@ from langchain.document_loaders.unstructured import (
|
||||
|
||||
|
||||
class UnstructuredXMLLoader(UnstructuredFileLoader):
|
||||
"""Loader that uses unstructured to load XML files."""
|
||||
"""Loader that uses unstructured to load XML files.
|
||||
You can run the loader in one of two modes: "single" and "elements".
|
||||
If you use "single" mode, the document will be returned as a single
|
||||
langchain Document object. If you use "elements" mode, the unstructured
|
||||
library will split the document into elements such as Title and NarrativeText.
|
||||
You can pass in additional unstructured kwargs after mode to apply
|
||||
different unstructured settings.
|
||||
|
||||
Examples
|
||||
--------
|
||||
from langchain.document_loaders import UnstructuredXMLLoader
|
||||
|
||||
loader = UnstructuredXMLLoader(
|
||||
"example.xml", mode="elements", strategy="fast",
|
||||
)
|
||||
docs = loader.load()
|
||||
|
||||
References
|
||||
----------
|
||||
https://unstructured-io.github.io/unstructured/bricks.html#partition-xml
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self, file_path: str, mode: str = "single", **unstructured_kwargs: Any
|
||||
|
Loading…
Reference in New Issue
Block a user