Change Data Loader Namespace (#6568)

Description:
Update the artifact name of the xml file and the namespaces. Co-authored
with @tjaffri
Co-authored-by: Kenzie Mihardja <kenzie@docugami.com>
multi_strategy_parser
Kenzie Mihardja 11 months ago committed by GitHub
parent 0673245d0c
commit b8d78424ab
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -243,7 +243,7 @@ class DocugamiLoader(BaseLoader, BaseModel):
artifact_url = artifact.get("url")
artifact_doc = artifact.get("document")
if artifact_name == f"{project_id}.xml" and artifact_url and artifact_doc:
if artifact_name == "report-values.xml" and artifact_url and artifact_doc:
doc_id = artifact_doc["id"]
metadata: Dict = {}
@ -266,11 +266,11 @@ class DocugamiLoader(BaseLoader, BaseModel):
artifact_tree = etree.parse(io.BytesIO(response.content))
artifact_root = artifact_tree.getroot()
ns = artifact_root.nsmap
entries = artifact_root.xpath("//wp:Entry", namespaces=ns)
entries = artifact_root.xpath("//pr:Entry", namespaces=ns)
for entry in entries:
heading = entry.xpath("./wp:Heading", namespaces=ns)[0].text
heading = entry.xpath("./pr:Heading", namespaces=ns)[0].text
value = " ".join(
entry.xpath("./wp:Value", namespaces=ns)[0].itertext()
entry.xpath("./pr:Value", namespaces=ns)[0].itertext()
).strip()
metadata[heading] = value
per_file_metadata[doc_id] = metadata

Loading…
Cancel
Save