mirror of
https://github.com/hwchase17/langchain
synced 2024-10-29 17:07:25 +00:00
e4224a396b
# Unstructured XML Loader Adds an `UnstructuredXMLLoader` class for .xml files. Works with unstructured>=0.6.7. A plain text representation of the text with the XML tags will be available under the `page_content` attribute in the doc. ### Testing ```python from langchain.document_loaders import UnstructuredXMLLoader loader = UnstructuredXMLLoader( "example_data/factbook.xml", ) docs = loader.load() ``` ## Who can review? @hwchase17 @eyurtsev
28 lines
669 B
XML
28 lines
669 B
XML
<?xml version="1.0" encoding="UTF-8"?>
|
|
<factbook>
|
|
<country>
|
|
<name>United States</name>
|
|
<capital>Washington, DC</capital>
|
|
<leader>Joe Biden</leader>
|
|
<sport>Baseball</sport>
|
|
</country>
|
|
<country>
|
|
<name>Canada</name>
|
|
<capital>Ottawa</capital>
|
|
<leader>Justin Trudeau</leader>
|
|
<sport>Hockey</sport>
|
|
</country>
|
|
<country>
|
|
<name>France</name>
|
|
<capital>Paris</capital>
|
|
<leader>Emmanuel Macron</leader>
|
|
<sport>Soccer</sport>
|
|
</country>
|
|
<country>
|
|
<name>Trinidad & Tobado</name>
|
|
<capital>Port of Spain</capital>
|
|
<leader>Keith Rowley</leader>
|
|
<sport>Track & Field</sport>
|
|
</country>
|
|
</factbook>
|