mirror of
https://github.com/hwchase17/langchain
synced 2024-11-08 07:10:35 +00:00
Create ArcGISLoader & example notebook (#8873)
- Description: Adds the ArcGISLoader class to `langchain.document_loaders` - Allows users to load data from ArcGIS Online, Portal, and similar - Users can authenticate with `arcgis.gis.GIS` or retrieve public data anonymously - Uses the `arcgis.features.FeatureLayer` class to retrieve the data - Defines the most relevant keywords arguments and accepts `**kwargs` - Dependencies: Using this class requires `arcgis` and, optionally, `bs4.BeautifulSoup`. Tagging maintainers: - DataLoaders / VectorStores / Retrievers: @rlancemartin, @eyurtsev --------- Co-authored-by: Bagatur <baskaryan@gmail.com>
This commit is contained in:
parent
e21152358a
commit
eaa505fb09
325
docs/extras/integrations/document_loaders/arcgis.ipynb
Normal file
325
docs/extras/integrations/document_loaders/arcgis.ipynb
Normal file
@ -0,0 +1,325 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "62359e08-cf80-4210-a30c-f450000e65b9",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# ArcGISLoader\n",
|
||||||
|
"\n",
|
||||||
|
"This notebook demonstrates the use of the `langchain.document_loaders.ArcGISLoader` class.\n",
|
||||||
|
"\n",
|
||||||
|
"You will need to install the ArcGIS API for Python `arcgis` and, optionally, `bs4.BeautifulSoup`.\n",
|
||||||
|
"\n",
|
||||||
|
"You can use an `arcgis.gis.GIS` object for authenticated data loading, or leave it blank to access public data."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 1,
|
||||||
|
"id": "b782cab5-0584-4e2a-9073-009fb8dc93a3",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from langchain.document_loaders import ArcGISLoader\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"url = \"https://maps1.vcgov.org/arcgis/rest/services/Beaches/MapServer/7\"\n",
|
||||||
|
"\n",
|
||||||
|
"loader = ArcGISLoader(url)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 2,
|
||||||
|
"id": "aa3053cf-4127-43ea-bf56-e378b348091f",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"CPU times: user 4.04 ms, sys: 1.63 ms, total: 5.67 ms\n",
|
||||||
|
"Wall time: 644 ms\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"%%time\n",
|
||||||
|
"\n",
|
||||||
|
"docs = loader.load()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 3,
|
||||||
|
"id": "a2444519-9117-4feb-8bb9-8931ce286fa5",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"dict_keys(['url', 'layer_description', 'item_description', 'layer_properties'])"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 3,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"docs[0].metadata.keys()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 4,
|
||||||
|
"id": "6b6e9107-6a80-4ef7-8149-3013faa2de76",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"KeysView({\n",
|
||||||
|
" \"currentVersion\": 10.81,\n",
|
||||||
|
" \"id\": 7,\n",
|
||||||
|
" \"name\": \"Beach Ramps\",\n",
|
||||||
|
" \"type\": \"Feature Layer\",\n",
|
||||||
|
" \"description\": \"\",\n",
|
||||||
|
" \"geometryType\": \"esriGeometryPoint\",\n",
|
||||||
|
" \"sourceSpatialReference\": {\n",
|
||||||
|
" \"wkid\": 2881,\n",
|
||||||
|
" \"latestWkid\": 2881\n",
|
||||||
|
" },\n",
|
||||||
|
" \"copyrightText\": \"\",\n",
|
||||||
|
" \"parentLayer\": null,\n",
|
||||||
|
" \"subLayers\": [],\n",
|
||||||
|
" \"minScale\": 750000,\n",
|
||||||
|
" \"maxScale\": 0,\n",
|
||||||
|
" \"drawingInfo\": {\n",
|
||||||
|
" \"renderer\": {\n",
|
||||||
|
" \"type\": \"simple\",\n",
|
||||||
|
" \"symbol\": {\n",
|
||||||
|
" \"type\": \"esriPMS\",\n",
|
||||||
|
" \"url\": \"9bb2e5ca499bb68aa3ee0d4e1ecc3849\",\n",
|
||||||
|
" \"imageData\": \"iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAAAXNSR0IB2cksfwAAAAlwSFlzAAAOxAAADsQBlSsOGwAAAJJJREFUOI3NkDEKg0AQRZ9kkSnSGBshR7DJqdJYeg7BMpcS0uQWQsqoCLExkcUJzGqT38zw2fcY1rEzbp7vjXz0EXC7gBxs1ABcG/8CYkCcDqwyLqsV+RlV0I/w7PzuJBArr1VB20H58Ls6h+xoFITkTwWpQJX7XSIBAnFwVj7MLAjJV/AC6G3QoAmK+74Lom04THTBEp/HCSc6AAAAAElFTkSuQmCC\",\n",
|
||||||
|
" \"contentType\": \"image/png\",\n",
|
||||||
|
" \"width\": 12,\n",
|
||||||
|
" \"height\": 12,\n",
|
||||||
|
" \"angle\": 0,\n",
|
||||||
|
" \"xoffset\": 0,\n",
|
||||||
|
" \"yoffset\": 0\n",
|
||||||
|
" },\n",
|
||||||
|
" \"label\": \"\",\n",
|
||||||
|
" \"description\": \"\"\n",
|
||||||
|
" },\n",
|
||||||
|
" \"transparency\": 0,\n",
|
||||||
|
" \"labelingInfo\": null\n",
|
||||||
|
" },\n",
|
||||||
|
" \"defaultVisibility\": true,\n",
|
||||||
|
" \"extent\": {\n",
|
||||||
|
" \"xmin\": -81.09480168806815,\n",
|
||||||
|
" \"ymin\": 28.858349245353473,\n",
|
||||||
|
" \"xmax\": -80.77512908572814,\n",
|
||||||
|
" \"ymax\": 29.41078388840041,\n",
|
||||||
|
" \"spatialReference\": {\n",
|
||||||
|
" \"wkid\": 4326,\n",
|
||||||
|
" \"latestWkid\": 4326\n",
|
||||||
|
" }\n",
|
||||||
|
" },\n",
|
||||||
|
" \"hasAttachments\": false,\n",
|
||||||
|
" \"htmlPopupType\": \"esriServerHTMLPopupTypeNone\",\n",
|
||||||
|
" \"displayField\": \"AccessName\",\n",
|
||||||
|
" \"typeIdField\": null,\n",
|
||||||
|
" \"subtypeFieldName\": null,\n",
|
||||||
|
" \"subtypeField\": null,\n",
|
||||||
|
" \"defaultSubtypeCode\": null,\n",
|
||||||
|
" \"fields\": [\n",
|
||||||
|
" {\n",
|
||||||
|
" \"name\": \"OBJECTID\",\n",
|
||||||
|
" \"type\": \"esriFieldTypeOID\",\n",
|
||||||
|
" \"alias\": \"OBJECTID\",\n",
|
||||||
|
" \"domain\": null\n",
|
||||||
|
" },\n",
|
||||||
|
" {\n",
|
||||||
|
" \"name\": \"Shape\",\n",
|
||||||
|
" \"type\": \"esriFieldTypeGeometry\",\n",
|
||||||
|
" \"alias\": \"Shape\",\n",
|
||||||
|
" \"domain\": null\n",
|
||||||
|
" },\n",
|
||||||
|
" {\n",
|
||||||
|
" \"name\": \"AccessName\",\n",
|
||||||
|
" \"type\": \"esriFieldTypeString\",\n",
|
||||||
|
" \"alias\": \"AccessName\",\n",
|
||||||
|
" \"length\": 40,\n",
|
||||||
|
" \"domain\": null\n",
|
||||||
|
" },\n",
|
||||||
|
" {\n",
|
||||||
|
" \"name\": \"AccessID\",\n",
|
||||||
|
" \"type\": \"esriFieldTypeString\",\n",
|
||||||
|
" \"alias\": \"AccessID\",\n",
|
||||||
|
" \"length\": 50,\n",
|
||||||
|
" \"domain\": null\n",
|
||||||
|
" },\n",
|
||||||
|
" {\n",
|
||||||
|
" \"name\": \"AccessType\",\n",
|
||||||
|
" \"type\": \"esriFieldTypeString\",\n",
|
||||||
|
" \"alias\": \"AccessType\",\n",
|
||||||
|
" \"length\": 25,\n",
|
||||||
|
" \"domain\": null\n",
|
||||||
|
" },\n",
|
||||||
|
" {\n",
|
||||||
|
" \"name\": \"GeneralLoc\",\n",
|
||||||
|
" \"type\": \"esriFieldTypeString\",\n",
|
||||||
|
" \"alias\": \"GeneralLoc\",\n",
|
||||||
|
" \"length\": 100,\n",
|
||||||
|
" \"domain\": null\n",
|
||||||
|
" },\n",
|
||||||
|
" {\n",
|
||||||
|
" \"name\": \"MilePost\",\n",
|
||||||
|
" \"type\": \"esriFieldTypeDouble\",\n",
|
||||||
|
" \"alias\": \"MilePost\",\n",
|
||||||
|
" \"domain\": null\n",
|
||||||
|
" },\n",
|
||||||
|
" {\n",
|
||||||
|
" \"name\": \"City\",\n",
|
||||||
|
" \"type\": \"esriFieldTypeString\",\n",
|
||||||
|
" \"alias\": \"City\",\n",
|
||||||
|
" \"length\": 50,\n",
|
||||||
|
" \"domain\": null\n",
|
||||||
|
" },\n",
|
||||||
|
" {\n",
|
||||||
|
" \"name\": \"AccessStatus\",\n",
|
||||||
|
" \"type\": \"esriFieldTypeString\",\n",
|
||||||
|
" \"alias\": \"AccessStatus\",\n",
|
||||||
|
" \"length\": 50,\n",
|
||||||
|
" \"domain\": null\n",
|
||||||
|
" },\n",
|
||||||
|
" {\n",
|
||||||
|
" \"name\": \"Entry_Date_Time\",\n",
|
||||||
|
" \"type\": \"esriFieldTypeDate\",\n",
|
||||||
|
" \"alias\": \"Entry_Date_Time\",\n",
|
||||||
|
" \"length\": 8,\n",
|
||||||
|
" \"domain\": null\n",
|
||||||
|
" },\n",
|
||||||
|
" {\n",
|
||||||
|
" \"name\": \"DrivingZone\",\n",
|
||||||
|
" \"type\": \"esriFieldTypeString\",\n",
|
||||||
|
" \"alias\": \"DrivingZone\",\n",
|
||||||
|
" \"length\": 50,\n",
|
||||||
|
" \"domain\": null\n",
|
||||||
|
" }\n",
|
||||||
|
" ],\n",
|
||||||
|
" \"geometryField\": {\n",
|
||||||
|
" \"name\": \"Shape\",\n",
|
||||||
|
" \"type\": \"esriFieldTypeGeometry\",\n",
|
||||||
|
" \"alias\": \"Shape\"\n",
|
||||||
|
" },\n",
|
||||||
|
" \"indexes\": null,\n",
|
||||||
|
" \"subtypes\": [],\n",
|
||||||
|
" \"relationships\": [],\n",
|
||||||
|
" \"canModifyLayer\": true,\n",
|
||||||
|
" \"canScaleSymbols\": false,\n",
|
||||||
|
" \"hasLabels\": false,\n",
|
||||||
|
" \"capabilities\": \"Map,Query,Data\",\n",
|
||||||
|
" \"maxRecordCount\": 1000,\n",
|
||||||
|
" \"supportsStatistics\": true,\n",
|
||||||
|
" \"supportsAdvancedQueries\": true,\n",
|
||||||
|
" \"supportedQueryFormats\": \"JSON, geoJSON\",\n",
|
||||||
|
" \"isDataVersioned\": false,\n",
|
||||||
|
" \"ownershipBasedAccessControlForFeatures\": {\n",
|
||||||
|
" \"allowOthersToQuery\": true\n",
|
||||||
|
" },\n",
|
||||||
|
" \"useStandardizedQueries\": true,\n",
|
||||||
|
" \"advancedQueryCapabilities\": {\n",
|
||||||
|
" \"useStandardizedQueries\": true,\n",
|
||||||
|
" \"supportsStatistics\": true,\n",
|
||||||
|
" \"supportsHavingClause\": true,\n",
|
||||||
|
" \"supportsCountDistinct\": true,\n",
|
||||||
|
" \"supportsOrderBy\": true,\n",
|
||||||
|
" \"supportsDistinct\": true,\n",
|
||||||
|
" \"supportsPagination\": true,\n",
|
||||||
|
" \"supportsTrueCurve\": true,\n",
|
||||||
|
" \"supportsReturningQueryExtent\": true,\n",
|
||||||
|
" \"supportsQueryWithDistance\": true,\n",
|
||||||
|
" \"supportsSqlExpression\": true\n",
|
||||||
|
" },\n",
|
||||||
|
" \"supportsDatumTransformation\": true,\n",
|
||||||
|
" \"dateFieldsTimeReference\": null,\n",
|
||||||
|
" \"supportsCoordinatesQuantization\": true\n",
|
||||||
|
"})"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 4,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"docs[0].metadata['layer_properties'].keys()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 5,
|
||||||
|
"id": "1d132b7d-5a13-4d66-98e8-785ffdf87af0",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"{\"OBJECTID\": 2, \"AccessName\": \"27TH AV\", \"AccessID\": \"NS-141\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"3600 BLK S ATLANTIC AV\", \"MilePost\": 4.83, \"City\": \"NEW SMYRNA BEACH\", \"AccessStatus\": \"OPEN\", \"Entry_Date_Time\": 1691397348000, \"DrivingZone\": \"BOTH\"}\n",
|
||||||
|
"{\"OBJECTID\": 7, \"AccessName\": \"BEACHWAY AV\", \"AccessID\": \"NS-106\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"1400 N ATLANTIC AV\", \"MilePost\": 1.57, \"City\": \"NEW SMYRNA BEACH\", \"AccessStatus\": \"OPEN\", \"Entry_Date_Time\": 1691397348000, \"DrivingZone\": \"YES\"}\n",
|
||||||
|
"{\"OBJECTID\": 10, \"AccessName\": \"SEABREEZE BLVD\", \"AccessID\": \"DB-051\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"500 BLK N ATLANTIC AV\", \"MilePost\": 14.24, \"City\": \"DAYTONA BEACH\", \"AccessStatus\": \"OPEN\", \"Entry_Date_Time\": 1691394892000, \"DrivingZone\": \"BOTH\"}\n",
|
||||||
|
"{\"OBJECTID\": 13, \"AccessName\": \"GRANADA BLVD\", \"AccessID\": \"OB-030\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"20 BLK OCEAN SHORE BLVD\", \"MilePost\": 10.02, \"City\": \"ORMOND BEACH\", \"AccessStatus\": \"4X4 ONLY\", \"Entry_Date_Time\": 1691394952000, \"DrivingZone\": \"BOTH\"}\n",
|
||||||
|
"{\"OBJECTID\": 16, \"AccessName\": \"INTERNATIONAL SPEEDWAY BLVD\", \"AccessID\": \"DB-059\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"300 BLK S ATLANTIC AV\", \"MilePost\": 15.27, \"City\": \"DAYTONA BEACH\", \"AccessStatus\": \"OPEN\", \"Entry_Date_Time\": 1691395174000, \"DrivingZone\": \"BOTH\"}\n",
|
||||||
|
"{\"OBJECTID\": 26, \"AccessName\": \"UNIVERSITY BLVD\", \"AccessID\": \"DB-048\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"900 BLK N ATLANTIC AV\", \"MilePost\": 13.74, \"City\": \"DAYTONA BEACH\", \"AccessStatus\": \"OPEN\", \"Entry_Date_Time\": 1691394892000, \"DrivingZone\": \"BOTH\"}\n",
|
||||||
|
"{\"OBJECTID\": 36, \"AccessName\": \"BEACH ST\", \"AccessID\": \"PI-097\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"4890 BLK S ATLANTIC AV\", \"MilePost\": 25.85, \"City\": \"PONCE INLET\", \"AccessStatus\": \"OPEN\", \"Entry_Date_Time\": 1691397348000, \"DrivingZone\": \"BOTH\"}\n",
|
||||||
|
"{\"OBJECTID\": 40, \"AccessName\": \"BOTEFUHR AV\", \"AccessID\": \"DBS-067\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"1900 BLK S ATLANTIC AV\", \"MilePost\": 16.68, \"City\": \"DAYTONA BEACH SHORES\", \"AccessStatus\": \"4X4 ONLY\", \"Entry_Date_Time\": 1691395124000, \"DrivingZone\": \"YES\"}\n",
|
||||||
|
"{\"OBJECTID\": 41, \"AccessName\": \"SILVER BEACH AV\", \"AccessID\": \"DB-064\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"1000 BLK S ATLANTIC AV\", \"MilePost\": 15.98, \"City\": \"DAYTONA BEACH\", \"AccessStatus\": \"OPEN\", \"Entry_Date_Time\": 1691395174000, \"DrivingZone\": \"YES\"}\n",
|
||||||
|
"{\"OBJECTID\": 50, \"AccessName\": \"3RD AV\", \"AccessID\": \"NS-118\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"1200 BLK HILL ST\", \"MilePost\": 3.25, \"City\": \"NEW SMYRNA BEACH\", \"AccessStatus\": \"OPEN\", \"Entry_Date_Time\": 1691397348000, \"DrivingZone\": \"YES\"}\n",
|
||||||
|
"{\"OBJECTID\": 58, \"AccessName\": \"DUNLAWTON BLVD\", \"AccessID\": \"DBS-078\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"3400 BLK S ATLANTIC AV\", \"MilePost\": 20.61, \"City\": \"DAYTONA BEACH SHORES\", \"AccessStatus\": \"OPEN\", \"Entry_Date_Time\": 1691397348000, \"DrivingZone\": \"YES\"}\n",
|
||||||
|
"{\"OBJECTID\": 63, \"AccessName\": \"MILSAP RD\", \"AccessID\": \"OB-037\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"700 BLK S ATLANTIC AV\", \"MilePost\": 11.52, \"City\": \"ORMOND BEACH\", \"AccessStatus\": \"4X4 ONLY\", \"Entry_Date_Time\": 1691394952000, \"DrivingZone\": \"YES\"}\n",
|
||||||
|
"{\"OBJECTID\": 68, \"AccessName\": \"EMILIA AV\", \"AccessID\": \"DBS-082\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"3790 BLK S ATLANTIC AV\", \"MilePost\": 21.38, \"City\": \"DAYTONA BEACH SHORES\", \"AccessStatus\": \"OPEN\", \"Entry_Date_Time\": 1691397348000, \"DrivingZone\": \"BOTH\"}\n",
|
||||||
|
"{\"OBJECTID\": 92, \"AccessName\": \"FLAGLER AV\", \"AccessID\": \"NS-110\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"500 BLK FLAGLER AV\", \"MilePost\": 2.57, \"City\": \"NEW SMYRNA BEACH\", \"AccessStatus\": \"OPEN\", \"Entry_Date_Time\": 1691397348000, \"DrivingZone\": \"YES\"}\n",
|
||||||
|
"{\"OBJECTID\": 94, \"AccessName\": \"CRAWFORD RD\", \"AccessID\": \"NS-108\", \"AccessType\": \"OPEN VEHICLE RAMP - PASS\", \"GeneralLoc\": \"800 BLK N ATLANTIC AV\", \"MilePost\": 2.19, \"City\": \"NEW SMYRNA BEACH\", \"AccessStatus\": \"OPEN\", \"Entry_Date_Time\": 1691397348000, \"DrivingZone\": \"YES\"}\n",
|
||||||
|
"{\"OBJECTID\": 122, \"AccessName\": \"HARTFORD AV\", \"AccessID\": \"DB-043\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"1890 BLK N ATLANTIC AV\", \"MilePost\": 12.76, \"City\": \"DAYTONA BEACH\", \"AccessStatus\": \"CLOSED - SEASONAL\", \"Entry_Date_Time\": 1691394832000, \"DrivingZone\": \"YES\"}\n",
|
||||||
|
"{\"OBJECTID\": 125, \"AccessName\": \"WILLIAMS AV\", \"AccessID\": \"DB-042\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"2200 BLK N ATLANTIC AV\", \"MilePost\": 12.5, \"City\": \"DAYTONA BEACH\", \"AccessStatus\": \"4X4 ONLY\", \"Entry_Date_Time\": 1691394952000, \"DrivingZone\": \"YES\"}\n",
|
||||||
|
"{\"OBJECTID\": 134, \"AccessName\": \"CARDINAL DR\", \"AccessID\": \"OB-036\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"600 BLK S ATLANTIC AV\", \"MilePost\": 11.27, \"City\": \"ORMOND BEACH\", \"AccessStatus\": \"4X4 ONLY\", \"Entry_Date_Time\": 1691394952000, \"DrivingZone\": \"YES\"}\n",
|
||||||
|
"{\"OBJECTID\": 229, \"AccessName\": \"EL PORTAL ST\", \"AccessID\": \"DBS-076\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"3200 BLK S ATLANTIC AV\", \"MilePost\": 20.04, \"City\": \"DAYTONA BEACH SHORES\", \"AccessStatus\": \"OPEN\", \"Entry_Date_Time\": 1691397348000, \"DrivingZone\": \"YES\"}\n",
|
||||||
|
"{\"OBJECTID\": 230, \"AccessName\": \"HARVARD DR\", \"AccessID\": \"OB-038\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"900 BLK S ATLANTIC AV\", \"MilePost\": 11.72, \"City\": \"ORMOND BEACH\", \"AccessStatus\": \"4X4 ONLY\", \"Entry_Date_Time\": 1691394952000, \"DrivingZone\": \"YES\"}\n",
|
||||||
|
"{\"OBJECTID\": 232, \"AccessName\": \"VAN AV\", \"AccessID\": \"DBS-075\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"3100 BLK S ATLANTIC AV\", \"MilePost\": 19.6, \"City\": \"DAYTONA BEACH SHORES\", \"AccessStatus\": \"OPEN\", \"Entry_Date_Time\": 1691397348000, \"DrivingZone\": \"YES\"}\n",
|
||||||
|
"{\"OBJECTID\": 233, \"AccessName\": \"ROCKEFELLER DR\", \"AccessID\": \"OB-034\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"400 BLK S ATLANTIC AV\", \"MilePost\": 10.9, \"City\": \"ORMOND BEACH\", \"AccessStatus\": \"CLOSED - SEASONAL\", \"Entry_Date_Time\": 1691394832000, \"DrivingZone\": \"YES\"}\n",
|
||||||
|
"{\"OBJECTID\": 235, \"AccessName\": \"MINERVA RD\", \"AccessID\": \"DBS-069\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"2300 BLK S ATLANTIC AV\", \"MilePost\": 17.52, \"City\": \"DAYTONA BEACH SHORES\", \"AccessStatus\": \"4X4 ONLY\", \"Entry_Date_Time\": 1691395124000, \"DrivingZone\": \"YES\"}\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"for doc in docs:\n",
|
||||||
|
" print(doc.page_content)"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3 (ipykernel)",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.9.13"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 5
|
||||||
|
}
|
@ -29,6 +29,7 @@ from langchain.document_loaders.airbyte import (
|
|||||||
from langchain.document_loaders.airbyte_json import AirbyteJSONLoader
|
from langchain.document_loaders.airbyte_json import AirbyteJSONLoader
|
||||||
from langchain.document_loaders.airtable import AirtableLoader
|
from langchain.document_loaders.airtable import AirtableLoader
|
||||||
from langchain.document_loaders.apify_dataset import ApifyDatasetLoader
|
from langchain.document_loaders.apify_dataset import ApifyDatasetLoader
|
||||||
|
from langchain.document_loaders.arcgis_loader import ArcGISLoader
|
||||||
from langchain.document_loaders.arxiv import ArxivLoader
|
from langchain.document_loaders.arxiv import ArxivLoader
|
||||||
from langchain.document_loaders.async_html import AsyncHtmlLoader
|
from langchain.document_loaders.async_html import AsyncHtmlLoader
|
||||||
from langchain.document_loaders.azlyrics import AZLyricsLoader
|
from langchain.document_loaders.azlyrics import AZLyricsLoader
|
||||||
@ -214,6 +215,7 @@ __all__ = [
|
|||||||
"AirtableLoader",
|
"AirtableLoader",
|
||||||
"AmazonTextractPDFLoader",
|
"AmazonTextractPDFLoader",
|
||||||
"ApifyDatasetLoader",
|
"ApifyDatasetLoader",
|
||||||
|
"ArcGISLoader",
|
||||||
"ArxivLoader",
|
"ArxivLoader",
|
||||||
"AsyncHtmlLoader",
|
"AsyncHtmlLoader",
|
||||||
"AzureBlobStorageContainerLoader",
|
"AzureBlobStorageContainerLoader",
|
||||||
|
129
libs/langchain/langchain/document_loaders/arcgis_loader.py
Normal file
129
libs/langchain/langchain/document_loaders/arcgis_loader.py
Normal file
@ -0,0 +1,129 @@
|
|||||||
|
"""Document Loader for ArcGIS FeatureLayers."""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
import warnings
|
||||||
|
from typing import TYPE_CHECKING, Any, Iterator, List, Optional, Union
|
||||||
|
|
||||||
|
from langchain.docstore.document import Document
|
||||||
|
from langchain.document_loaders.base import BaseLoader
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
import arcgis
|
||||||
|
|
||||||
|
_NOT_PROVIDED = "(Not Provided)"
|
||||||
|
|
||||||
|
|
||||||
|
class ArcGISLoader(BaseLoader):
|
||||||
|
"""Load records from an ArcGIS FeatureLayer."""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
layer: Union[str, arcgis.features.FeatureLayer],
|
||||||
|
gis: Optional[arcgis.gis.GIS] = None,
|
||||||
|
where: str = "1=1",
|
||||||
|
out_fields: Optional[Union[List[str], str]] = None,
|
||||||
|
return_geometry: bool = False,
|
||||||
|
**kwargs: Any,
|
||||||
|
):
|
||||||
|
try:
|
||||||
|
import arcgis
|
||||||
|
except ImportError as e:
|
||||||
|
raise ImportError(
|
||||||
|
"arcgis is required to use the ArcGIS Loader. "
|
||||||
|
"Install it with pip or conda."
|
||||||
|
) from e
|
||||||
|
|
||||||
|
try:
|
||||||
|
from bs4 import BeautifulSoup # type: ignore
|
||||||
|
|
||||||
|
self.BEAUTIFULSOUP = BeautifulSoup
|
||||||
|
except ImportError:
|
||||||
|
warnings.warn("BeautifulSoup not found. HTML will not be parsed.")
|
||||||
|
self.BEAUTIFULSOUP = None
|
||||||
|
|
||||||
|
self.gis = gis or arcgis.gis.GIS()
|
||||||
|
|
||||||
|
if isinstance(layer, str):
|
||||||
|
self.url = layer
|
||||||
|
self.layer = arcgis.features.FeatureLayer(layer, gis=gis)
|
||||||
|
else:
|
||||||
|
self.url = layer.url
|
||||||
|
self.layer = layer
|
||||||
|
|
||||||
|
self.layer_properties = self._get_layer_properties()
|
||||||
|
|
||||||
|
self.where = where
|
||||||
|
|
||||||
|
if isinstance(out_fields, str):
|
||||||
|
self.out_fields = out_fields
|
||||||
|
elif out_fields is None:
|
||||||
|
self.out_fields = "*"
|
||||||
|
else:
|
||||||
|
self.out_fields = ",".join(out_fields)
|
||||||
|
|
||||||
|
self.return_geometry = return_geometry
|
||||||
|
self.kwargs = kwargs
|
||||||
|
|
||||||
|
def _get_layer_properties(self) -> dict:
|
||||||
|
"""Get the layer properties from the FeatureLayer."""
|
||||||
|
|
||||||
|
layer_number_pattern = re.compile(r"/\d+$")
|
||||||
|
props = self.layer.properties
|
||||||
|
|
||||||
|
try:
|
||||||
|
if self.BEAUTIFULSOUP:
|
||||||
|
lyr_desc = self.BEAUTIFULSOUP(props["description"]).text
|
||||||
|
else:
|
||||||
|
lyr_desc = props["description"]
|
||||||
|
lyr_desc = lyr_desc or _NOT_PROVIDED
|
||||||
|
except KeyError:
|
||||||
|
lyr_desc = _NOT_PROVIDED
|
||||||
|
try:
|
||||||
|
item_id = props["serviceItemId"]
|
||||||
|
item = self.gis.content.get(item_id) or arcgis.features.FeatureLayer(
|
||||||
|
re.sub(layer_number_pattern, "", self.url),
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
raw_desc = item.description
|
||||||
|
except AttributeError:
|
||||||
|
raw_desc = item.properties.description
|
||||||
|
if self.BEAUTIFULSOUP:
|
||||||
|
item_desc = self.BEAUTIFULSOUP(raw_desc).text
|
||||||
|
else:
|
||||||
|
item_desc = raw_desc
|
||||||
|
item_desc = item_desc or _NOT_PROVIDED
|
||||||
|
except KeyError:
|
||||||
|
item_desc = _NOT_PROVIDED
|
||||||
|
return {
|
||||||
|
"layer_description": lyr_desc,
|
||||||
|
"item_description": item_desc,
|
||||||
|
"layer_properties": props,
|
||||||
|
}
|
||||||
|
|
||||||
|
def lazy_load(self) -> Iterator[Document]:
|
||||||
|
"""Lazy load records from FeatureLayer."""
|
||||||
|
|
||||||
|
query_response = self.layer.query(
|
||||||
|
where=self.where,
|
||||||
|
out_fields=self.out_fields,
|
||||||
|
return_geometry=self.return_geometry,
|
||||||
|
return_all_records=True,
|
||||||
|
**self.kwargs,
|
||||||
|
)
|
||||||
|
features = (feature.as_dict["attributes"] for feature in query_response)
|
||||||
|
for feature in features:
|
||||||
|
yield Document(
|
||||||
|
page_content=json.dumps(feature),
|
||||||
|
metadata={
|
||||||
|
"url": self.url,
|
||||||
|
"layer_description": self.layer_properties["layer_description"],
|
||||||
|
"item_description": self.layer_properties["item_description"],
|
||||||
|
"layer_properties": self.layer_properties["layer_properties"],
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
def load(self) -> List[Document]:
|
||||||
|
"""Load all records from FeatureLayer."""
|
||||||
|
return list(self.lazy_load())
|
@ -0,0 +1,47 @@
|
|||||||
|
from unittest.mock import MagicMock, patch
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from langchain.document_loaders import ArcGISLoader
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def arcgis_mocks(mock_feature_layer, mock_gis): # type: ignore
|
||||||
|
sys_modules = {
|
||||||
|
"arcgis": MagicMock(),
|
||||||
|
"arcgis.features.FeatureLayer": mock_feature_layer,
|
||||||
|
"arcgis.gis.GIS": mock_gis,
|
||||||
|
}
|
||||||
|
with patch.dict("sys.modules", sys_modules):
|
||||||
|
yield
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def mock_feature_layer(): # type: ignore
|
||||||
|
feature_layer = MagicMock()
|
||||||
|
feature_layer.query.return_value = [
|
||||||
|
MagicMock(as_dict={"attributes": {"field": "value"}})
|
||||||
|
]
|
||||||
|
feature_layer.url = "https://example.com/layer_url"
|
||||||
|
feature_layer.properties = {
|
||||||
|
"description": "<html><body>Some HTML content</body></html>"
|
||||||
|
}
|
||||||
|
return feature_layer
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def mock_gis(): # type: ignore
|
||||||
|
gis = MagicMock()
|
||||||
|
gis.content.get.return_value = MagicMock(description="Item description")
|
||||||
|
return gis
|
||||||
|
|
||||||
|
|
||||||
|
def test_lazy_load(arcgis_mocks, mock_feature_layer, mock_gis): # type: ignore
|
||||||
|
loader = ArcGISLoader(layer=mock_feature_layer, gis=mock_gis)
|
||||||
|
loader.BEAUTIFULSOUP = None
|
||||||
|
|
||||||
|
documents = list(loader.lazy_load())
|
||||||
|
|
||||||
|
assert len(documents) == 1
|
||||||
|
assert documents[0].metadata["url"] == "https://example.com/layer_url"
|
||||||
|
# Add more assertions based on your expected behavior
|
Loading…
Reference in New Issue
Block a user