mirror of
https://github.com/hwchase17/langchain
synced 2024-10-29 17:07:25 +00:00
e4224a396b
# Unstructured XML Loader Adds an `UnstructuredXMLLoader` class for .xml files. Works with unstructured>=0.6.7. A plain text representation of the text with the XML tags will be available under the `page_content` attribute in the doc. ### Testing ```python from langchain.document_loaders import UnstructuredXMLLoader loader = UnstructuredXMLLoader( "example_data/factbook.xml", ) docs = loader.load() ``` ## Who can review? @hwchase17 @eyurtsev
16 lines
421 B
Python
16 lines
421 B
Python
import os
|
|
from pathlib import Path
|
|
|
|
from langchain.document_loaders import UnstructuredXMLLoader
|
|
|
|
EXAMPLE_DIRECTORY = file_path = Path(__file__).parent.parent / "examples"
|
|
|
|
|
|
def test_unstructured_xml_loader() -> None:
|
|
"""Test unstructured loader."""
|
|
file_path = os.path.join(EXAMPLE_DIRECTORY, "factbook.xml")
|
|
loader = UnstructuredXMLLoader(str(file_path))
|
|
docs = loader.load()
|
|
|
|
assert len(docs) == 1
|