CR feedback

This commit is contained in:
Taqi Jaffri 2023-08-19 13:48:15 -07:00
parent 5919c0f4a2
commit 5cd244e9b7
2 changed files with 3 additions and 10 deletions

View File

@ -19,18 +19,10 @@
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: lxml in /root/Source/github/docugami.langchain/libs/langchain/.venv/lib/python3.9/site-packages (4.9.3)\n"
]
}
],
"outputs": [],
"source": [
"# You need the lxml package to use the DocugamiLoader\n",
"!poetry run pip install lxml"
"!poetry run pip install lxml --quiet"
]
},
{

View File

@ -147,6 +147,7 @@ class DocugamiLoader(BaseLoader, BaseModel):
metadata = {
XPATH_KEY: _xpath_for_chunk(node),
DOCUMENT_ID_KEY: document[DOCUMENT_ID_KEY],
DOCUMENT_NAME_KEY: document[DOCUMENT_NAME_KEY],
DOCUMENT_SOURCE_KEY: document[DOCUMENT_NAME_KEY],
STRUCTURE_KEY: node.attrib.get("structure", ""),
TAG_KEY: re.sub(r"\{.*\}", "", node.tag),