You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
langchain/langchain/document_loaders/xml.py

23 lines
749 B
Python

"""Loader that loads Microsoft Excel files."""
from typing import Any, List
from langchain.document_loaders.unstructured import (
UnstructuredFileLoader,
validate_unstructured_version,
)
class UnstructuredXMLLoader(UnstructuredFileLoader):
"""Loader that uses unstructured to load XML files."""
def __init__(
self, file_path: str, mode: str = "single", **unstructured_kwargs: Any
):
validate_unstructured_version(min_unstructured_version="0.6.7")
super().__init__(file_path=file_path, mode=mode, **unstructured_kwargs)
def _get_elements(self) -> List:
from unstructured.partition.xml import partition_xml
return partition_xml(filename=self.file_path, **self.unstructured_kwargs)