diff --git a/docs/extras/modules/data_connection/document_loaders/integrations/example_data/stanley-cups.tsv b/docs/extras/modules/data_connection/document_loaders/integrations/example_data/stanley-cups.tsv
new file mode 100644
index 0000000000..314be466da
--- /dev/null
+++ b/docs/extras/modules/data_connection/document_loaders/integrations/example_data/stanley-cups.tsv
@@ -0,0 +1,5 @@
+Stanley Cups
+Team Location Stanley Cups
+Blues STL 1
+Flyers PHI 2
+Maple Leafs TOR 13
diff --git a/docs/extras/modules/data_connection/document_loaders/integrations/tsv.ipynb b/docs/extras/modules/data_connection/document_loaders/integrations/tsv.ipynb
new file mode 100644
index 0000000000..f959ab6b74
--- /dev/null
+++ b/docs/extras/modules/data_connection/document_loaders/integrations/tsv.ipynb
@@ -0,0 +1,181 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# TSV\n",
+ "\n",
+ ">A [tab-separated values (TSV)](https://en.wikipedia.org/wiki/Tab-separated_values) file is a simple, text-based file format for storing tabular data.[3] Records are separated by newlines, and values within a record are separated by tab characters."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## `UnstructuredTSVLoader`\n",
+ "\n",
+ "You can also load the table using the `UnstructuredTSVLoader`. One advantage of using `UnstructuredTSVLoader` is that if you use it in `\"elements\"` mode, an HTML representation of the table will be available in the metadata."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from langchain.document_loaders.tsv import UnstructuredTSVLoader"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "loader = UnstructuredTSVLoader(\n",
+ " file_path=\"example_data/mlb_teams_2012.csv\", mode=\"elements\"\n",
+ ")\n",
+ "docs = loader.load()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "
\n",
+ " \n",
+ " \n",
+ " Nationals, 81.34, 98 | \n",
+ "
\n",
+ " \n",
+ " Reds, 82.20, 97 | \n",
+ "
\n",
+ " \n",
+ " Yankees, 197.96, 95 | \n",
+ "
\n",
+ " \n",
+ " Giants, 117.62, 94 | \n",
+ "
\n",
+ " \n",
+ " Braves, 83.31, 94 | \n",
+ "
\n",
+ " \n",
+ " Athletics, 55.37, 94 | \n",
+ "
\n",
+ " \n",
+ " Rangers, 120.51, 93 | \n",
+ "
\n",
+ " \n",
+ " Orioles, 81.43, 93 | \n",
+ "
\n",
+ " \n",
+ " Rays, 64.17, 90 | \n",
+ "
\n",
+ " \n",
+ " Angels, 154.49, 89 | \n",
+ "
\n",
+ " \n",
+ " Tigers, 132.30, 88 | \n",
+ "
\n",
+ " \n",
+ " Cardinals, 110.30, 88 | \n",
+ "
\n",
+ " \n",
+ " Dodgers, 95.14, 86 | \n",
+ "
\n",
+ " \n",
+ " White Sox, 96.92, 85 | \n",
+ "
\n",
+ " \n",
+ " Brewers, 97.65, 83 | \n",
+ "
\n",
+ " \n",
+ " Phillies, 174.54, 81 | \n",
+ "
\n",
+ " \n",
+ " Diamondbacks, 74.28, 81 | \n",
+ "
\n",
+ " \n",
+ " Pirates, 63.43, 79 | \n",
+ "
\n",
+ " \n",
+ " Padres, 55.24, 76 | \n",
+ "
\n",
+ " \n",
+ " Mariners, 81.97, 75 | \n",
+ "
\n",
+ " \n",
+ " Mets, 93.35, 74 | \n",
+ "
\n",
+ " \n",
+ " Blue Jays, 75.48, 73 | \n",
+ "
\n",
+ " \n",
+ " Royals, 60.91, 72 | \n",
+ "
\n",
+ " \n",
+ " Marlins, 118.07, 69 | \n",
+ "
\n",
+ " \n",
+ " Red Sox, 173.18, 69 | \n",
+ "
\n",
+ " \n",
+ " Indians, 78.43, 68 | \n",
+ "
\n",
+ " \n",
+ " Twins, 94.08, 66 | \n",
+ "
\n",
+ " \n",
+ " Rockies, 78.06, 64 | \n",
+ "
\n",
+ " \n",
+ " Cubs, 88.19, 61 | \n",
+ "
\n",
+ " \n",
+ " Astros, 60.65, 55 | \n",
+ "
\n",
+ " \n",
+ "
\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(docs[0].metadata[\"text_as_html\"])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.13"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/langchain/document_loaders/__init__.py b/langchain/document_loaders/__init__.py
index b6cba9d6f9..fd32632354 100644
--- a/langchain/document_loaders/__init__.py
+++ b/langchain/document_loaders/__init__.py
@@ -122,6 +122,7 @@ from langchain.document_loaders.text import TextLoader
from langchain.document_loaders.tomarkdown import ToMarkdownLoader
from langchain.document_loaders.toml import TomlLoader
from langchain.document_loaders.trello import TrelloLoader
+from langchain.document_loaders.tsv import UnstructuredTSVLoader
from langchain.document_loaders.twitter import TwitterTweetLoader
from langchain.document_loaders.unstructured import (
UnstructuredAPIFileIOLoader,
@@ -278,6 +279,7 @@ __all__ = [
"UnstructuredPowerPointLoader",
"UnstructuredRSTLoader",
"UnstructuredRTFLoader",
+ "UnstructuredTSVLoader",
"UnstructuredURLLoader",
"UnstructuredWordDocumentLoader",
"UnstructuredXMLLoader",
diff --git a/langchain/document_loaders/csv_loader.py b/langchain/document_loaders/csv_loader.py
index 17bb84df26..9a5289966b 100644
--- a/langchain/document_loaders/csv_loader.py
+++ b/langchain/document_loaders/csv_loader.py
@@ -78,7 +78,21 @@ class CSVLoader(BaseLoader):
class UnstructuredCSVLoader(UnstructuredFileLoader):
- """Loader that uses unstructured to load CSV files."""
+ """Loader that uses unstructured to load CSV files. Like other
+ Unstructured loaders, UnstructuredCSVLoader can be used in both
+ "single" and "elements" mode. If you use the loader in "elements"
+ mode, the CSV file will be a single Unstructured Table element.
+ If you use the loader in "elements" mode, an HTML representation
+ of the table will be available in the "text_as_html" key in the
+ document metadata.
+
+ Examples
+ --------
+ from langchain.document_loaders.csv_loader import UnstructuredCSVLoader
+
+ loader = UnstructuredCSVLoader("stanley-cups.csv", mode="elements")
+ docs = loader.load()
+ """
def __init__(
self, file_path: str, mode: str = "single", **unstructured_kwargs: Any
diff --git a/langchain/document_loaders/excel.py b/langchain/document_loaders/excel.py
index 54e96bf269..946430f0c6 100644
--- a/langchain/document_loaders/excel.py
+++ b/langchain/document_loaders/excel.py
@@ -8,7 +8,21 @@ from langchain.document_loaders.unstructured import (
class UnstructuredExcelLoader(UnstructuredFileLoader):
- """Loader that uses unstructured to load Microsoft Excel files."""
+ """Loader that uses unstructured to load Excel files. Like other
+ Unstructured loaders, UnstructuredExcelLoader can be used in both
+ "single" and "elements" mode. If you use the loader in "elements"
+ mode, each sheet in the Excel file will be a an Unstructured Table
+ element. If you use the loader in "elements" mode, an
+ HTML representation of the table will be available in the
+ "text_as_html" key in the document metadata.
+
+ Examples
+ --------
+ from langchain.document_loaders.excel import UnstructuredExcelLoader
+
+ loader = UnstructuredExcelLoader("stanley-cups.xlsd", mode="elements")
+ docs = loader.load()
+ """
def __init__(
self, file_path: str, mode: str = "single", **unstructured_kwargs: Any
diff --git a/langchain/document_loaders/tsv.py b/langchain/document_loaders/tsv.py
new file mode 100644
index 0000000000..5a5c7b6d7c
--- /dev/null
+++ b/langchain/document_loaders/tsv.py
@@ -0,0 +1,35 @@
+from typing import Any, List
+
+from langchain.document_loaders.unstructured import (
+ UnstructuredFileLoader,
+ validate_unstructured_version,
+)
+
+
+class UnstructuredTSVLoader(UnstructuredFileLoader):
+ """Loader that uses unstructured to load TSV files. Like other
+ Unstructured loaders, UnstructuredTSVLoader can be used in both
+ "single" and "elements" mode. If you use the loader in "elements"
+ mode, the TSV file will be a single Unstructured Table element.
+ If you use the loader in "elements" mode, an HTML representation
+ of the table will be available in the "text_as_html" key in the
+ document metadata.
+
+ Examples
+ --------
+ from langchain.document_loaders.tsv import UnstructuredTSVLoader
+
+ loader = UnstructuredTSVLoader("stanley-cups.tsv", mode="elements")
+ docs = loader.load()
+ """
+
+ def __init__(
+ self, file_path: str, mode: str = "single", **unstructured_kwargs: Any
+ ):
+ validate_unstructured_version(min_unstructured_version="0.7.6")
+ super().__init__(file_path=file_path, mode=mode, **unstructured_kwargs)
+
+ def _get_elements(self) -> List:
+ from unstructured.partition.tsv import partition_tsv
+
+ return partition_tsv(filename=self.file_path, **self.unstructured_kwargs)
diff --git a/tests/integration_tests/document_loaders/test_tsv.py b/tests/integration_tests/document_loaders/test_tsv.py
new file mode 100644
index 0000000000..2834fc61c3
--- /dev/null
+++ b/tests/integration_tests/document_loaders/test_tsv.py
@@ -0,0 +1,15 @@
+import os
+from pathlib import Path
+
+from langchain.document_loaders import UnstructuredTSVLoader
+
+EXAMPLE_DIRECTORY = file_path = Path(__file__).parent.parent / "examples"
+
+
+def test_unstructured_tsv_loader() -> None:
+ """Test unstructured loader."""
+ file_path = os.path.join(EXAMPLE_DIRECTORY, "stanley-cups.tsv")
+ loader = UnstructuredTSVLoader(str(file_path))
+ docs = loader.load()
+
+ assert len(docs) == 1
diff --git a/tests/integration_tests/examples/stanley-cups.tsv b/tests/integration_tests/examples/stanley-cups.tsv
new file mode 100644
index 0000000000..314be466da
--- /dev/null
+++ b/tests/integration_tests/examples/stanley-cups.tsv
@@ -0,0 +1,5 @@
+Stanley Cups
+Team Location Stanley Cups
+Blues STL 1
+Flyers PHI 2
+Maple Leafs TOR 13