forked from Archives/langchain
You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
23 lines
747 B
Python
23 lines
747 B
Python
"""Loader that loads EPub files."""
|
|
from typing import List
|
|
|
|
from langchain.document_loaders.unstructured import (
|
|
UnstructuredFileLoader,
|
|
satisfies_min_unstructured_version,
|
|
)
|
|
|
|
|
|
class UnstructuredEPubLoader(UnstructuredFileLoader):
|
|
"""Loader that uses unstructured to load epub files."""
|
|
|
|
def _get_elements(self) -> List:
|
|
min_unstructured_version = "0.5.4"
|
|
if not satisfies_min_unstructured_version(min_unstructured_version):
|
|
raise ValueError(
|
|
"Partitioning epub files is only supported in "
|
|
f"unstructured>={min_unstructured_version}."
|
|
)
|
|
from unstructured.partition.epub import partition_epub
|
|
|
|
return partition_epub(filename=self.file_path)
|