From 4806504ebc73c6f311606cdca353708b02e64aea Mon Sep 17 00:00:00 2001 From: Taqi Jaffri Date: Tue, 1 Aug 2023 15:43:26 -0700 Subject: [PATCH] Fixed one last key name --- .../document_loaders/docugami.ipynb | 73 ++++++++----------- .../langchain/document_loaders/docugami.py | 4 +- 2 files changed, 31 insertions(+), 46 deletions(-) diff --git a/docs/extras/integrations/document_loaders/docugami.ipynb b/docs/extras/integrations/document_loaders/docugami.ipynb index fb12a02b3c..346df6096e 100644 --- a/docs/extras/integrations/document_loaders/docugami.ipynb +++ b/docs/extras/integrations/document_loaders/docugami.ipynb @@ -15,7 +15,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 20, "metadata": { "tags": [] }, @@ -58,7 +58,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 21, "metadata": {}, "outputs": [], "source": [ @@ -77,7 +77,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 22, "metadata": {}, "outputs": [ { @@ -104,7 +104,7 @@ " Document(page_content='DOCUGAMI INC . : \\n\\n Caleb Divine : \\n\\n Signature: Signature: Name: \\n\\n Jean Paoli Name: Title: \\n\\n CEO Title:', metadata={'xpath': '/docset:MutualNon-disclosure/docset:Witness/docset:TheParties/docset:DocugamiInc/docset:DocugamiInc/xhtml:table', 'id': '43rj0ds7s0ur', 'source': 'NDA simple layout.docx', 'structure': '', 'tag': 'table'})]" ] }, - "execution_count": 9, + "execution_count": 22, "metadata": {}, "output_type": "execute_result" } @@ -141,7 +141,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 23, "metadata": {}, "outputs": [], "source": [ @@ -150,7 +150,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 24, "metadata": {}, "outputs": [], "source": [ @@ -176,7 +176,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 25, "metadata": {}, "outputs": [], "source": [ @@ -190,21 +190,21 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'query': 'What can tenants do with signage on their properties?',\n", - " 'result': ' Tenants can place or attach signs (digital or otherwise) or other forms of identification to their premises, as long as they receive written permission from the landlord and the signs or other forms of identification conform to all applicable laws and ordinances.',\n", - " 'source_documents': [Document(page_content='ARTICLE VI SIGNAGE 6.01 Signage . Tenant may place or attach to the Premises signs (digital or otherwise) or other such identification as needed after receiving written permission from the Landlord , which permission shall not be unreasonably withheld. Any damage caused to the Premises by the Tenant ’s erecting or removing such signs shall be repaired promptly by the Tenant at the Tenant ’s expense . Any signs or other form of identification allowed must conform to all applicable laws, ordinances, etc. governing the same. Tenant also agrees to have any window or glass identification completely removed and cleaned at its expense promptly upon vacating the Premises.', metadata={'Landlord': 'BUBBA CENTER PARTNERSHIP', 'Tenant': 'Truetone Lane LLC', 'id': 'v1bvgaozfkak', 'source': 'TruTone Lane 2.docx', 'structure': 'div', 'tag': '_601Signage', 'xpath': '/docset:OFFICELEASEAGREEMENT-section/docset:OFFICELEASEAGREEMENT/docset:Article/docset:ARTICLEVISIGNAGE-section/docset:_601Signage-section/docset:_601Signage'}),\n", - " Document(page_content='ARTICLE VI SIGNAGE 6.01 Signage . Tenant may place or attach to the Premises signs (digital or otherwise) or other such identification as needed after receiving written permission from the Landlord , which permission shall not be unreasonably withheld. Any damage caused to the Premises by the Tenant ’s erecting or removing such signs shall be repaired promptly by the Tenant at the Tenant ’s expense . Any signs or other form of identification allowed must conform to all applicable laws, ordinances, etc. governing the same. Tenant also agrees to have any window or glass identification completely removed and cleaned at its expense promptly upon vacating the Premises.', metadata={'Landlord': 'BUBBA CENTER PARTNERSHIP', 'Tenant': 'Truetone Lane LLC', 'id': 'v1bvgaozfkak', 'source': 'TruTone Lane 2.docx', 'structure': 'div', 'tag': '_601Signage', 'xpath': '/docset:OFFICELEASEAGREEMENT-section/docset:OFFICELEASEAGREEMENT/docset:Article/docset:ARTICLEVISIGNAGE-section/docset:_601Signage-section/docset:_601Signage'}),\n", + " 'result': ' Tenants can place or attach signs (digital or otherwise) to their properties, after receiving written permission from their landlord. Any signs must conform to all applicable laws, ordinances, etc. governing the same.',\n", + " 'source_documents': [Document(page_content='Signage. Tenant may place or attach to the Premises signs (digital or otherwise) or other such identification as needed after receiving written permission from the Landlord , which permission shall not be unreasonably withheld. Any damage caused to the Premises by the Tenant ’s erecting or removing such signs shall be repaired promptly by the Tenant at the Tenant ’s expense . Any signs or other form of identification allowed must conform to all applicable laws, ordinances, etc. governing the same. Tenant also agrees to have any window or glass identification completely removed and cleaned at its expense promptly upon vacating the Premises. \\n\\n ARTICLE VII UTILITIES 7.01', metadata={'Landlord': 'GLORY ROAD LLC', 'Tenant': 'Truetone Lane LLC', 'id': 'g2fvhekmltza', 'source': 'TruTone Lane 6.pdf', 'structure': 'lim', 'tag': 'chunk', 'xpath': '/docset:OFFICELEASEAGREEMENT-section/docset:OFFICELEASEAGREEMENT/docset:ThisOFFICELEASEAGREEMENTThis/docset:ArticleIBasic/docset:ArticleIiiUseAndCareOf/docset:ARTICLEIIIUSEANDCAREOFPREMISES-section/docset:ARTICLEIIIUSEANDCAREOFPREMISES/docset:NoOtherPurposes/docset:TenantsResponsibility/dg:chunk'}),\n", " Document(page_content='Signage. Tenant may place or attach to the Premises signs (digital or otherwise) or other such identification as needed after receiving written permission from the Landlord , which permission shall not be unreasonably withheld. Any damage caused to the Premises by the Tenant ’s erecting or removing such signs shall be repaired promptly by the Tenant at the Tenant ’s expense . Any signs or other form of identification allowed must conform to all applicable laws, ordinances, etc. governing the same. Tenant also agrees to have any window or glass identification completely removed and cleaned at its expense promptly upon vacating the Premises. \\n\\n ARTICLE VII UTILITIES 7.01', metadata={'Landlord': 'GLORY ROAD LLC', 'Tenant': 'Truetone Lane LLC', 'id': 'g2fvhekmltza', 'source': 'TruTone Lane 6.pdf', 'structure': 'lim', 'tag': 'chunk', 'xpath': '/docset:OFFICELEASEAGREEMENT-section/docset:OFFICELEASEAGREEMENT/docset:ThisOFFICELEASEAGREEMENTThis/docset:ArticleIBasic/docset:ArticleIiiUseAndCareOf/docset:ARTICLEIIIUSEANDCAREOFPREMISES-section/docset:ARTICLEIIIUSEANDCAREOFPREMISES/docset:NoOtherPurposes/docset:TenantsResponsibility/dg:chunk'}),\n", - " Document(page_content='Signage. Tenant may place or attach to the Premises signs (digital or otherwise) or other such identification as needed after receiving written permission from the Landlord , which permission shall not be unreasonably withheld. Any damage caused to the Premises by the Tenant ’s erecting or removing such signs shall be repaired promptly by the Tenant at the Tenant ’s expense . Any signs or other form of identification allowed must conform to all applicable laws, ordinances, etc. governing the same. Tenant also agrees to have any window or glass identification completely removed and cleaned at its expense promptly upon vacating the Premises. \\n\\n ARTICLE VII UTILITIES 7.01', metadata={'Landlord': 'GLORY ROAD LLC', 'Tenant': 'Truetone Lane LLC', 'id': 'g2fvhekmltza', 'source': 'TruTone Lane 6.pdf', 'structure': 'lim', 'tag': 'chunk', 'xpath': '/docset:OFFICELEASEAGREEMENT-section/docset:OFFICELEASEAGREEMENT/docset:ThisOFFICELEASEAGREEMENTThis/docset:ArticleIBasic/docset:ArticleIiiUseAndCareOf/docset:ARTICLEIIIUSEANDCAREOFPREMISES-section/docset:ARTICLEIIIUSEANDCAREOFPREMISES/docset:NoOtherPurposes/docset:TenantsResponsibility/dg:chunk'})]}" + " Document(page_content='ARTICLE VI SIGNAGE 6.01 Signage . Tenant may place or attach to the Premises signs (digital or otherwise) or other such identification as needed after receiving written permission from the Landlord , which permission shall not be unreasonably withheld. Any damage caused to the Premises by the Tenant ’s erecting or removing such signs shall be repaired promptly by the Tenant at the Tenant ’s expense . Any signs or other form of identification allowed must conform to all applicable laws, ordinances, etc. governing the same. Tenant also agrees to have any window or glass identification completely removed and cleaned at its expense promptly upon vacating the Premises.', metadata={'Landlord': 'BUBBA CENTER PARTNERSHIP', 'Tenant': 'Truetone Lane LLC', 'id': 'v1bvgaozfkak', 'source': 'TruTone Lane 2.docx', 'structure': 'div', 'tag': '_601Signage', 'xpath': '/docset:OFFICELEASEAGREEMENT-section/docset:OFFICELEASEAGREEMENT/docset:Article/docset:ARTICLEVISIGNAGE-section/docset:_601Signage-section/docset:_601Signage'}),\n", + " Document(page_content='ARTICLE VI SIGNAGE 6.01 Signage . Tenant may place or attach to the Premises signs (digital or otherwise) or other such identification as needed after receiving written permission from the Landlord , which permission shall not be unreasonably withheld. Any damage caused to the Premises by the Tenant ’s erecting or removing such signs shall be repaired promptly by the Tenant at the Tenant ’s expense . Any signs or other form of identification allowed must conform to all applicable laws, ordinances, etc. governing the same. Tenant also agrees to have any window or glass identification completely removed and cleaned at its expense promptly upon vacating the Premises.', metadata={'Landlord': 'BUBBA CENTER PARTNERSHIP', 'Tenant': 'Truetone Lane LLC', 'id': 'v1bvgaozfkak', 'source': 'TruTone Lane 2.docx', 'structure': 'div', 'tag': '_601Signage', 'xpath': '/docset:OFFICELEASEAGREEMENT-section/docset:OFFICELEASEAGREEMENT/docset:Article/docset:ARTICLEVISIGNAGE-section/docset:_601Signage-section/docset:_601Signage'})]}" ] }, - "execution_count": 13, + "execution_count": 26, "metadata": {}, "output_type": "execute_result" } @@ -227,23 +227,16 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 27, "metadata": {}, "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "query='rentable area' filter=Comparison(comparator=, attribute='Landlord', value='DHA Group') limit=None\n" - ] - }, { "data": { "text/plain": [ - "' 13,500 square feet.'" + "\" I don't know.\"" ] }, - "execution_count": 19, + "execution_count": 27, "metadata": {}, "output_type": "execute_result" } @@ -262,19 +255,19 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 28, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "[Document(page_content='1.6 Rentable Area of the Premises. 9,753 square feet . This square footage figure includes an add-on factor for Common Areas in the Building and has been agreed upon by the parties as final and correct and is not subject to challenge or dispute by either party.', metadata={'Landlord': 'Perry & Blair LLC', 'Tenant': 'Shorebucks LLC', 'id': 'dsyfhh4vpeyf', 'source': 'Shorebucks LLC_CO.pdf', 'structure': 'div', 'tag': 'RentableAreaofthePremises', 'xpath': '/docset:OFFICELEASE-section/docset:OFFICELEASE/docset:THISOFFICELEASE/docset:WITNESSETH-section/docset:WITNESSETH/docset:TheTerms/dg:chunk/docset:BasicLeaseInformation/docset:BASICLEASEINFORMATIONANDDEFINEDTERMS-section/docset:BASICLEASEINFORMATIONANDDEFINEDTERMS/docset:PerryBlair/docset:PerryBlair/docset:Premises[2]/docset:RentableAreaofthePremises-section/docset:RentableAreaofthePremises'}),\n", - " Document(page_content='1.6 Rentable Area of the Premises. 9,753 square feet . This square footage figure includes an add-on factor for Common Areas in the Building and has been agreed upon by the parties as final and correct and is not subject to challenge or dispute by either party.', metadata={'Landlord': 'Perry & Blair LLC', 'Tenant': 'Shorebucks LLC', 'id': 'dsyfhh4vpeyf', 'source': 'Shorebucks LLC_CO.pdf', 'structure': 'div', 'tag': 'RentableAreaofthePremises', 'xpath': '/docset:OFFICELEASE-section/docset:OFFICELEASE/docset:THISOFFICELEASE/docset:WITNESSETH-section/docset:WITNESSETH/docset:TheTerms/dg:chunk/docset:BasicLeaseInformation/docset:BASICLEASEINFORMATIONANDDEFINEDTERMS-section/docset:BASICLEASEINFORMATIONANDDEFINEDTERMS/docset:PerryBlair/docset:PerryBlair/docset:Premises[2]/docset:RentableAreaofthePremises-section/docset:RentableAreaofthePremises'}),\n", - " Document(page_content='1.6 Rentable Area of the Premises. 13,500 square feet . This square footage figure includes an add-on factor for Common Areas in the Building and has been agreed upon by the parties as final and correct and is not subject to challenge or dispute by either party.', metadata={'Landlord': 'DHA Group', 'Tenant': 'Shorebucks LLC', 'id': 'md8rieecquyv', 'source': 'Shorebucks LLC_NJ.pdf', 'structure': 'div', 'tag': 'RentableAreaofthePremises', 'xpath': '/docset:OFFICELEASE-section/docset:OFFICELEASE/docset:THISOFFICELEASE/docset:WITNESSETH-section/docset:WITNESSETH/docset:TheTerms/dg:chunk/docset:BasicLeaseInformation/docset:BASICLEASEINFORMATIONANDDEFINEDTERMS-section/docset:BASICLEASEINFORMATIONANDDEFINEDTERMS/docset:DhaGroup/docset:DhaGroup/docset:Premises[2]/docset:RentableAreaofthePremises-section/docset:RentableAreaofthePremises'}),\n", - " Document(page_content='1.6 Rentable Area of the Premises. 13,500 square feet . This square footage figure includes an add-on factor for Common Areas in the Building and has been agreed upon by the parties as final and correct and is not subject to challenge or dispute by either party.', metadata={'Landlord': 'DHA Group', 'Tenant': 'Shorebucks LLC', 'id': 'md8rieecquyv', 'source': 'Shorebucks LLC_NJ.pdf', 'structure': 'div', 'tag': 'RentableAreaofthePremises', 'xpath': '/docset:OFFICELEASE-section/docset:OFFICELEASE/docset:THISOFFICELEASE/docset:WITNESSETH-section/docset:WITNESSETH/docset:TheTerms/dg:chunk/docset:BasicLeaseInformation/docset:BASICLEASEINFORMATIONANDDEFINEDTERMS-section/docset:BASICLEASEINFORMATIONANDDEFINEDTERMS/docset:DhaGroup/docset:DhaGroup/docset:Premises[2]/docset:RentableAreaofthePremises-section/docset:RentableAreaofthePremises'})]" + "[Document(page_content='1.1 Landlord . DHA Group , a Delaware limited liability company authorized to transact business in New Jersey .', metadata={'Landlord': 'DHA Group', 'Tenant': 'Shorebucks LLC', 'id': 'md8rieecquyv', 'source': 'Shorebucks LLC_NJ.pdf', 'structure': 'div', 'tag': 'DhaGroup', 'xpath': '/docset:OFFICELEASE-section/docset:OFFICELEASE/docset:THISOFFICELEASE/docset:WITNESSETH-section/docset:WITNESSETH/docset:TheTerms/dg:chunk/docset:BasicLeaseInformation/docset:BASICLEASEINFORMATIONANDDEFINEDTERMS-section/docset:BASICLEASEINFORMATIONANDDEFINEDTERMS/docset:DhaGroup/docset:DhaGroup/docset:DhaGroup/docset:Landlord-section/docset:DhaGroup'}),\n", + " Document(page_content='1.1 Landlord . DHA Group , a Delaware limited liability company authorized to transact business in New Jersey .', metadata={'Landlord': 'DHA Group', 'Tenant': 'Shorebucks LLC', 'id': 'md8rieecquyv', 'source': 'Shorebucks LLC_NJ.pdf', 'structure': 'div', 'tag': 'DhaGroup', 'xpath': '/docset:OFFICELEASE-section/docset:OFFICELEASE/docset:THISOFFICELEASE/docset:WITNESSETH-section/docset:WITNESSETH/docset:TheTerms/dg:chunk/docset:BasicLeaseInformation/docset:BASICLEASEINFORMATIONANDDEFINEDTERMS-section/docset:BASICLEASEINFORMATIONANDDEFINEDTERMS/docset:DhaGroup/docset:DhaGroup/docset:DhaGroup/docset:Landlord-section/docset:DhaGroup'}),\n", + " Document(page_content='1.1 Landlord . DHA Group , a Delaware limited liability company authorized to transact business in New Jersey .', metadata={'Landlord': 'DHA Group', 'Tenant': 'Shorebucks LLC', 'id': 'md8rieecquyv', 'source': 'Shorebucks LLC_NJ.pdf', 'structure': 'div', 'tag': 'DhaGroup', 'xpath': '/docset:OFFICELEASE-section/docset:OFFICELEASE/docset:THISOFFICELEASE/docset:WITNESSETH-section/docset:WITNESSETH/docset:TheTerms/dg:chunk/docset:BasicLeaseInformation/docset:BASICLEASEINFORMATIONANDDEFINEDTERMS-section/docset:BASICLEASEINFORMATIONANDDEFINEDTERMS/docset:DhaGroup/docset:DhaGroup/docset:DhaGroup/docset:Landlord-section/docset:DhaGroup'}),\n", + " Document(page_content='1.1 Landlord . DHA Group , a Delaware limited liability company authorized to transact business in New Jersey .', metadata={'Landlord': 'DHA Group', 'Tenant': 'Shorebucks LLC', 'id': 'md8rieecquyv', 'source': 'Shorebucks LLC_NJ.pdf', 'structure': 'div', 'tag': 'DhaGroup', 'xpath': '/docset:OFFICELEASE-section/docset:OFFICELEASE/docset:THISOFFICELEASE/docset:WITNESSETH-section/docset:WITNESSETH/docset:TheTerms/dg:chunk/docset:BasicLeaseInformation/docset:BASICLEASEINFORMATIONANDDEFINEDTERMS-section/docset:BASICLEASEINFORMATIONANDDEFINEDTERMS/docset:DhaGroup/docset:DhaGroup/docset:DhaGroup/docset:Landlord-section/docset:DhaGroup'})]" ] }, - "execution_count": 15, + "execution_count": 28, "metadata": {}, "output_type": "execute_result" } @@ -294,7 +287,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 29, "metadata": {}, "outputs": [ { @@ -309,7 +302,7 @@ " 'Tenant': 'Truetone Lane LLC'}" ] }, - "execution_count": 16, + "execution_count": 29, "metadata": {}, "output_type": "execute_result" } @@ -329,7 +322,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 30, "metadata": {}, "outputs": [], "source": [ @@ -368,17 +361,9 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 31, "metadata": {}, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/root/Source/github/docugami.langchain/libs/langchain/langchain/chains/llm.py:275: UserWarning: The predict_and_parse method is deprecated, instead pass an output parser directly to LLMChain.\n", - " warnings.warn(\n" - ] - }, { "name": "stdout", "output_type": "stream", @@ -390,14 +375,14 @@ "data": { "text/plain": [ "{'query': 'What is rentable area for the property owned by DHA Group?',\n", - " 'result': ' 13,500 square feet.',\n", + " 'result': ' The rentable area for the property owned by DHA Group is 13,500 square feet.',\n", " 'source_documents': [Document(page_content='1.6 Rentable Area of the Premises. 13,500 square feet . This square footage figure includes an add-on factor for Common Areas in the Building and has been agreed upon by the parties as final and correct and is not subject to challenge or dispute by either party.', metadata={'Landlord': 'DHA Group', 'Tenant': 'Shorebucks LLC', 'id': 'md8rieecquyv', 'source': 'Shorebucks LLC_NJ.pdf', 'structure': 'div', 'tag': 'RentableAreaofthePremises', 'xpath': '/docset:OFFICELEASE-section/docset:OFFICELEASE/docset:THISOFFICELEASE/docset:WITNESSETH-section/docset:WITNESSETH/docset:TheTerms/dg:chunk/docset:BasicLeaseInformation/docset:BASICLEASEINFORMATIONANDDEFINEDTERMS-section/docset:BASICLEASEINFORMATIONANDDEFINEDTERMS/docset:DhaGroup/docset:DhaGroup/docset:Premises[2]/docset:RentableAreaofthePremises-section/docset:RentableAreaofthePremises'}),\n", " Document(page_content='1.6 Rentable Area of the Premises. 13,500 square feet . This square footage figure includes an add-on factor for Common Areas in the Building and has been agreed upon by the parties as final and correct and is not subject to challenge or dispute by either party.', metadata={'Landlord': 'DHA Group', 'Tenant': 'Shorebucks LLC', 'id': 'md8rieecquyv', 'source': 'Shorebucks LLC_NJ.pdf', 'structure': 'div', 'tag': 'RentableAreaofthePremises', 'xpath': '/docset:OFFICELEASE-section/docset:OFFICELEASE/docset:THISOFFICELEASE/docset:WITNESSETH-section/docset:WITNESSETH/docset:TheTerms/dg:chunk/docset:BasicLeaseInformation/docset:BASICLEASEINFORMATIONANDDEFINEDTERMS-section/docset:BASICLEASEINFORMATIONANDDEFINEDTERMS/docset:DhaGroup/docset:DhaGroup/docset:Premises[2]/docset:RentableAreaofthePremises-section/docset:RentableAreaofthePremises'}),\n", " Document(page_content='1.6 Rentable Area of the Premises. 13,500 square feet . This square footage figure includes an add-on factor for Common Areas in the Building and has been agreed upon by the parties as final and correct and is not subject to challenge or dispute by either party.', metadata={'Landlord': 'DHA Group', 'Tenant': 'Shorebucks LLC', 'id': 'md8rieecquyv', 'source': 'Shorebucks LLC_NJ.pdf', 'structure': 'div', 'tag': 'RentableAreaofthePremises', 'xpath': '/docset:OFFICELEASE-section/docset:OFFICELEASE/docset:THISOFFICELEASE/docset:WITNESSETH-section/docset:WITNESSETH/docset:TheTerms/dg:chunk/docset:BasicLeaseInformation/docset:BASICLEASEINFORMATIONANDDEFINEDTERMS-section/docset:BASICLEASEINFORMATIONANDDEFINEDTERMS/docset:DhaGroup/docset:DhaGroup/docset:Premises[2]/docset:RentableAreaofthePremises-section/docset:RentableAreaofthePremises'}),\n", - " Document(page_content='1.11 Percentage Rent . (a) 55 % of Gross Revenue to Landlord until Landlord receives Percentage Rent in an amount equal to the Annual Market Rent Hurdle (as escalated); and', metadata={'Landlord': 'DHA Group', 'Tenant': 'Shorebucks LLC', 'id': 'md8rieecquyv', 'source': 'Shorebucks LLC_NJ.pdf', 'structure': 'p', 'tag': 'GrossRevenue', 'xpath': '/docset:OFFICELEASE-section/docset:OFFICELEASE/docset:THISOFFICELEASE/docset:WITNESSETH-section/docset:WITNESSETH/docset:GrossRentCreditTheRentCredit-section/docset:GrossRentCreditTheRentCredit/docset:Period/docset:ApplicableSalesTax/docset:PercentageRent/docset:PercentageRent/docset:PercentageRent/docset:PercentageRent-section/docset:PercentageRent[2]/docset:PercentageRent/docset:GrossRevenue[1]/docset:GrossRevenue'})]}" + " Document(page_content='1.6 Rentable Area of the Premises. 13,500 square feet . This square footage figure includes an add-on factor for Common Areas in the Building and has been agreed upon by the parties as final and correct and is not subject to challenge or dispute by either party.', metadata={'Landlord': 'DHA Group', 'Tenant': 'Shorebucks LLC', 'id': 'md8rieecquyv', 'source': 'Shorebucks LLC_NJ.pdf', 'structure': 'div', 'tag': 'RentableAreaofthePremises', 'xpath': '/docset:OFFICELEASE-section/docset:OFFICELEASE/docset:THISOFFICELEASE/docset:WITNESSETH-section/docset:WITNESSETH/docset:TheTerms/dg:chunk/docset:BasicLeaseInformation/docset:BASICLEASEINFORMATIONANDDEFINEDTERMS-section/docset:BASICLEASEINFORMATIONANDDEFINEDTERMS/docset:DhaGroup/docset:DhaGroup/docset:Premises[2]/docset:RentableAreaofthePremises-section/docset:RentableAreaofthePremises'})]}" ] }, - "execution_count": 18, + "execution_count": 31, "metadata": {}, "output_type": "execute_result" } diff --git a/libs/langchain/langchain/document_loaders/docugami.py b/libs/langchain/langchain/document_loaders/docugami.py index ede8908d6d..dabe510381 100644 --- a/libs/langchain/langchain/document_loaders/docugami.py +++ b/libs/langchain/langchain/document_loaders/docugami.py @@ -145,8 +145,8 @@ class DocugamiLoader(BaseLoader, BaseModel): """Create a Document from a node and text.""" metadata = { XPATH_KEY: _xpath_for_chunk(node), - DOCUMENT_ID_KEY: document["id"], - DOCUMENT_SOURCE_KEY: document["name"], + DOCUMENT_ID_KEY: document[DOCUMENT_ID_KEY], + DOCUMENT_SOURCE_KEY: document[DOCUMENT_SOURCE_KEY], STRUCTURE_KEY: node.attrib.get("structure", ""), TAG_KEY: re.sub(r"\{.*\}", "", node.tag), }