forked from Archives/langchain
Change Data Loader Namespace (#6568)
Description: Update the artifact name of the xml file and the namespaces. Co-authored with @tjaffri Co-authored-by: Kenzie Mihardja <kenzie@docugami.com>
This commit is contained in:
parent
0673245d0c
commit
b8d78424ab
@ -243,7 +243,7 @@ class DocugamiLoader(BaseLoader, BaseModel):
|
|||||||
artifact_url = artifact.get("url")
|
artifact_url = artifact.get("url")
|
||||||
artifact_doc = artifact.get("document")
|
artifact_doc = artifact.get("document")
|
||||||
|
|
||||||
if artifact_name == f"{project_id}.xml" and artifact_url and artifact_doc:
|
if artifact_name == "report-values.xml" and artifact_url and artifact_doc:
|
||||||
doc_id = artifact_doc["id"]
|
doc_id = artifact_doc["id"]
|
||||||
metadata: Dict = {}
|
metadata: Dict = {}
|
||||||
|
|
||||||
@ -266,11 +266,11 @@ class DocugamiLoader(BaseLoader, BaseModel):
|
|||||||
artifact_tree = etree.parse(io.BytesIO(response.content))
|
artifact_tree = etree.parse(io.BytesIO(response.content))
|
||||||
artifact_root = artifact_tree.getroot()
|
artifact_root = artifact_tree.getroot()
|
||||||
ns = artifact_root.nsmap
|
ns = artifact_root.nsmap
|
||||||
entries = artifact_root.xpath("//wp:Entry", namespaces=ns)
|
entries = artifact_root.xpath("//pr:Entry", namespaces=ns)
|
||||||
for entry in entries:
|
for entry in entries:
|
||||||
heading = entry.xpath("./wp:Heading", namespaces=ns)[0].text
|
heading = entry.xpath("./pr:Heading", namespaces=ns)[0].text
|
||||||
value = " ".join(
|
value = " ".join(
|
||||||
entry.xpath("./wp:Value", namespaces=ns)[0].itertext()
|
entry.xpath("./pr:Value", namespaces=ns)[0].itertext()
|
||||||
).strip()
|
).strip()
|
||||||
metadata[heading] = value
|
metadata[heading] = value
|
||||||
per_file_metadata[doc_id] = metadata
|
per_file_metadata[doc_id] = metadata
|
||||||
|
Loading…
Reference in New Issue
Block a user