diff --git a/docs/modules/document_loaders/examples/srt.ipynb b/docs/modules/document_loaders/examples/srt.ipynb
new file mode 100644
index 0000000000..7eeafad618
--- /dev/null
+++ b/docs/modules/document_loaders/examples/srt.ipynb
@@ -0,0 +1,93 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "4bdaea79",
+ "metadata": {},
+ "source": [
+ "# Subtitle Files\n",
+ "How to load data from subtitle (`.srt`) files"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "id": "2cbb7f5c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from langchain.document_loaders import SRTLoader"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 25,
+ "id": "865d8a14",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "loader = SRTLoader(\"example_data/Star_Wars_The_Clone_Wars_S06E07_Crisis_at_the_Heart.srt\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 26,
+ "id": "173a9234",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "docs = loader.load()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 27,
+ "id": "15e00030",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'Corruption discovered\\nat the core of the Banking Clan! Reunited, Rush Clovis\\nand Senator A'"
+ ]
+ },
+ "execution_count": 27,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "docs[0].page_content[:100]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "3b7a8dc4",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.1"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/langchain/document_loaders/__init__.py b/langchain/document_loaders/__init__.py
index b64a5ee3c2..386311f9d8 100644
--- a/langchain/document_loaders/__init__.py
+++ b/langchain/document_loaders/__init__.py
@@ -24,6 +24,7 @@ from langchain.document_loaders.readthedocs import ReadTheDocsLoader
from langchain.document_loaders.roam import RoamLoader
from langchain.document_loaders.s3_directory import S3DirectoryLoader
from langchain.document_loaders.s3_file import S3FileLoader
+from langchain.document_loaders.srt import SRTLoader
from langchain.document_loaders.telegram import TelegramChatLoader
from langchain.document_loaders.text import TextLoader
from langchain.document_loaders.unstructured import UnstructuredFileLoader
@@ -63,4 +64,5 @@ __all__ = [
"OnlinePDFLoader",
"PDFMinerLoader",
"TelegramChatLoader",
+ "SRTLoader",
]
diff --git a/langchain/document_loaders/srt.py b/langchain/document_loaders/srt.py
new file mode 100644
index 0000000000..ce38f1c2f8
--- /dev/null
+++ b/langchain/document_loaders/srt.py
@@ -0,0 +1,28 @@
+"""Loader for .srt (subtitle) files."""
+from typing import List
+
+from langchain.docstore.document import Document
+from langchain.document_loaders.base import BaseLoader
+
+
+class SRTLoader(BaseLoader):
+ """Loader for .srt (subtitle) files."""
+
+ def __init__(self, file_path: str):
+ """Initialize with file path."""
+ try:
+ import pysrt # noqa:F401
+ except ImportError:
+ raise ValueError(
+ "package `pysrt` not found, please install it with `pysrt`"
+ )
+ self.file_path = file_path
+
+ def load(self) -> List[Document]:
+ """Load using pysrt file."""
+ import pysrt
+
+ parsed_info = pysrt.open(self.file_path)
+ text = " ".join([t.text for t in parsed_info])
+ metadata = {"source": self.file_path}
+ return [Document(page_content=text, metadata=metadata)]