From f7ad14acfa234d3345caa6a7c6ecb1c04b12d247 Mon Sep 17 00:00:00 2001 From: glaze Date: Tue, 25 Jul 2023 08:09:16 +0800 Subject: [PATCH] Add etherscan document loader (#7943) @rlancemartin The modification includes: * etherscanLoader * test_etherscan * document ipynb I have run the test, lint, format, and spell check. I do encounter a linting error on ipynb, I am not sure how to address that. ``` docs/extras/modules/data_connection/document_loaders/integrations/Etherscan.ipynb:55: error: Name "null" is not defined [name-defined] docs/extras/modules/data_connection/document_loaders/integrations/Etherscan.ipynb:76: error: Name "null" is not defined [name-defined] Found 2 errors in 1 file (checked 1 source file) ``` - Description: The Etherscan loader uses etherscan api to load transaction histories under specific accounts on Ethereum Mainnet. - No dependency is introduced by this PR. - Twitter handle: glazecl --------- Co-authored-by: Bagatur --- .../document_loaders/Etherscan.ipynb | 220 ++++++++++++++++++ .../langchain/document_loaders/__init__.py | 2 + .../langchain/document_loaders/etherscan.py | 204 ++++++++++++++++ .../document_loaders/test_etherscan.py | 68 ++++++ 4 files changed, 494 insertions(+) create mode 100644 docs/extras/integrations/document_loaders/Etherscan.ipynb create mode 100644 libs/langchain/langchain/document_loaders/etherscan.py create mode 100644 libs/langchain/tests/integration_tests/document_loaders/test_etherscan.py diff --git a/docs/extras/integrations/document_loaders/Etherscan.ipynb b/docs/extras/integrations/document_loaders/Etherscan.ipynb new file mode 100644 index 0000000000..5edf001568 --- /dev/null +++ b/docs/extras/integrations/document_loaders/Etherscan.ipynb @@ -0,0 +1,220 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "1ab83660", + "metadata": {}, + "source": [ + "# Etherscan Loader\n", + "## Overview\n", + "\n", + "The Etherscan loader use etherscan api to load transacactions histories under specific account on Ethereum Mainnet.\n", + "\n", + "You will need a Etherscan api key to proceed. The free api key has 5 calls per seconds quota.\n", + "\n", + "The loader supports the following six functinalities:\n", + "* Retrieve normal transactions under specifc account on Ethereum Mainet\n", + "* Retrieve internal transactions under specifc account on Ethereum Mainet\n", + "* Retrieve erc20 transactions under specifc account on Ethereum Mainet\n", + "* Retrieve erc721 transactions under specifc account on Ethereum Mainet\n", + "* Retrieve erc1155 transactions under specifc account on Ethereum Mainet\n", + "* Retrieve ethereum balance in wei under specifc account on Ethereum Mainet\n", + "\n", + "\n", + "If the account does not have corresponding transactions, the loader will a list with one document. The content of document is ''.\n", + "\n", + "You can pass differnt filters to loader to access different functionalities we mentioned above:\n", + "* \"normal_transaction\"\n", + "* \"internal_transaction\"\n", + "* \"erc20_transaction\"\n", + "* \"eth_balance\"\n", + "* \"erc721_transaction\"\n", + "* \"erc1155_transaction\"\n", + "The filter is default to normal_transaction\n", + "\n", + "If you have any questions, you can access [Etherscan API Doc](https://etherscan.io/tx/0x0ffa32c787b1398f44303f731cb06678e086e4f82ce07cebf75e99bb7c079c77) or contact me via i@inevitable.tech.\n", + "\n", + "All functions related to transactions histories are restricted 1000 histories maximum because of Etherscan limit. You can use the following parameters to find the transaction histories you need:\n", + "* offset: default to 20. Shows 20 transactions for one time\n", + "* page: default to 1. This controls pagenation.\n", + "* start_block: Default to 0. The transaction histories starts from 0 block.\n", + "* end_block: Default to 99999999. The transaction histories starts from 99999999 block\n", + "* sort: \"desc\" or \"asc\". Set default to \"desc\" to get latest transactions." + ] + }, + { + "cell_type": "markdown", + "id": "d72d4e22", + "metadata": {}, + "source": [ + "# Setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2911e51e", + "metadata": {}, + "outputs": [], + "source": [ + "%pip install langchain -q" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "208e2fbf", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.document_loaders import EtherscanLoader\n", + "import os" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "5d24b650", + "metadata": {}, + "outputs": [], + "source": [ + "os.environ[\"ETHERSCAN_API_KEY\"] = etherscanAPIKey" + ] + }, + { + "cell_type": "markdown", + "id": "3bcbb63e", + "metadata": {}, + "source": [ + "# Create a ERC20 transaction loader" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "d525e6c8", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'blockNumber': '13242975',\n", + " 'timeStamp': '1631878751',\n", + " 'hash': '0x366dda325b1a6570928873665b6b418874a7dedf7fee9426158fa3536b621788',\n", + " 'nonce': '28',\n", + " 'blockHash': '0x5469dba1b1e1372962cf2be27ab2640701f88c00640c4d26b8cc2ae9ac256fb6',\n", + " 'from': '0x2ceee24f8d03fc25648c68c8e6569aa0512f6ac3',\n", + " 'contractAddress': '0x2ceee24f8d03fc25648c68c8e6569aa0512f6ac3',\n", + " 'to': '0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b',\n", + " 'value': '298131000000000',\n", + " 'tokenName': 'ABCHANGE.io',\n", + " 'tokenSymbol': 'XCH',\n", + " 'tokenDecimal': '9',\n", + " 'transactionIndex': '71',\n", + " 'gas': '15000000',\n", + " 'gasPrice': '48614996176',\n", + " 'gasUsed': '5712724',\n", + " 'cumulativeGasUsed': '11507920',\n", + " 'input': 'deprecated',\n", + " 'confirmations': '4492277'}" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "account_address = \"0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b\"\n", + "loader = EtherscanLoader(account_address, filter=\"erc20_transaction\")\n", + "result = loader.load()\n", + "eval(result[0].page_content)" + ] + }, + { + "cell_type": "markdown", + "id": "2a1ecce0", + "metadata": {}, + "source": [ + "# Create a normal transaction loader with customized parameters" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "07aa2b6c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "20\n" + ] + }, + { + "data": { + "text/plain": [ + "[Document(page_content=\"{'blockNumber': '1723771', 'timeStamp': '1466213371', 'hash': '0xe00abf5fa83a4b23ee1cc7f07f9dda04ab5fa5efe358b315df8b76699a83efc4', 'nonce': '3155', 'blockHash': '0xc2c2207bcaf341eed07f984c9a90b3f8e8bdbdbd2ac6562f8c2f5bfa4b51299d', 'transactionIndex': '5', 'from': '0x3763e6e1228bfeab94191c856412d1bb0a8e6996', 'to': '0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b', 'value': '13149213761000000000', 'gas': '90000', 'gasPrice': '22655598156', 'isError': '0', 'txreceipt_status': '', 'input': '0x', 'contractAddress': '', 'cumulativeGasUsed': '126000', 'gasUsed': '21000', 'confirmations': '16011481', 'methodId': '0x', 'functionName': ''}\", metadata={'from': '0x3763e6e1228bfeab94191c856412d1bb0a8e6996', 'tx_hash': '0xe00abf5fa83a4b23ee1cc7f07f9dda04ab5fa5efe358b315df8b76699a83efc4', 'to': '0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b'}),\n", + " Document(page_content=\"{'blockNumber': '1727090', 'timeStamp': '1466262018', 'hash': '0xd5a779346d499aa722f72ffe7cd3c8594a9ddd91eb7e439e8ba92ceb7bc86928', 'nonce': '3267', 'blockHash': '0xc0cff378c3446b9b22d217c2c5f54b1c85b89a632c69c55b76cdffe88d2b9f4d', 'transactionIndex': '20', 'from': '0x3763e6e1228bfeab94191c856412d1bb0a8e6996', 'to': '0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b', 'value': '11521979886000000000', 'gas': '90000', 'gasPrice': '20000000000', 'isError': '0', 'txreceipt_status': '', 'input': '0x', 'contractAddress': '', 'cumulativeGasUsed': '3806725', 'gasUsed': '21000', 'confirmations': '16008162', 'methodId': '0x', 'functionName': ''}\", metadata={'from': '0x3763e6e1228bfeab94191c856412d1bb0a8e6996', 'tx_hash': '0xd5a779346d499aa722f72ffe7cd3c8594a9ddd91eb7e439e8ba92ceb7bc86928', 'to': '0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b'}),\n", + " Document(page_content=\"{'blockNumber': '1730337', 'timeStamp': '1466308222', 'hash': '0xceaffdb3766d2741057d402738eb41e1d1941939d9d438c102fb981fd47a87a4', 'nonce': '3344', 'blockHash': '0x3a52d28b8587d55c621144a161a0ad5c37dd9f7d63b629ab31da04fa410b2cfa', 'transactionIndex': '1', 'from': '0x3763e6e1228bfeab94191c856412d1bb0a8e6996', 'to': '0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b', 'value': '9783400526000000000', 'gas': '90000', 'gasPrice': '20000000000', 'isError': '0', 'txreceipt_status': '', 'input': '0x', 'contractAddress': '', 'cumulativeGasUsed': '60788', 'gasUsed': '21000', 'confirmations': '16004915', 'methodId': '0x', 'functionName': ''}\", metadata={'from': '0x3763e6e1228bfeab94191c856412d1bb0a8e6996', 'tx_hash': '0xceaffdb3766d2741057d402738eb41e1d1941939d9d438c102fb981fd47a87a4', 'to': '0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b'}),\n", + " Document(page_content=\"{'blockNumber': '1733479', 'timeStamp': '1466352351', 'hash': '0x720d79bf78775f82b40280aae5abfc347643c5f6708d4bf4ec24d65cd01c7121', 'nonce': '3367', 'blockHash': '0x9928661e7ae125b3ae0bcf5e076555a3ee44c52ae31bd6864c9c93a6ebb3f43e', 'transactionIndex': '0', 'from': '0x3763e6e1228bfeab94191c856412d1bb0a8e6996', 'to': '0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b', 'value': '1570706444000000000', 'gas': '90000', 'gasPrice': '20000000000', 'isError': '0', 'txreceipt_status': '', 'input': '0x', 'contractAddress': '', 'cumulativeGasUsed': '21000', 'gasUsed': '21000', 'confirmations': '16001773', 'methodId': '0x', 'functionName': ''}\", metadata={'from': '0x3763e6e1228bfeab94191c856412d1bb0a8e6996', 'tx_hash': '0x720d79bf78775f82b40280aae5abfc347643c5f6708d4bf4ec24d65cd01c7121', 'to': '0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b'}),\n", + " Document(page_content=\"{'blockNumber': '1734172', 'timeStamp': '1466362463', 'hash': '0x7a062d25b83bafc9fe6b22bc6f5718bca333908b148676e1ac66c0adeccef647', 'nonce': '1016', 'blockHash': '0x8a8afe2b446713db88218553cfb5dd202422928e5e0bc00475ed2f37d95649de', 'transactionIndex': '4', 'from': '0x16545fb79dbee1ad3a7f868b7661c023f372d5de', 'to': '0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b', 'value': '6322276709000000000', 'gas': '90000', 'gasPrice': '20000000000', 'isError': '0', 'txreceipt_status': '', 'input': '0x', 'contractAddress': '', 'cumulativeGasUsed': '105333', 'gasUsed': '21000', 'confirmations': '16001080', 'methodId': '0x', 'functionName': ''}\", metadata={'from': '0x16545fb79dbee1ad3a7f868b7661c023f372d5de', 'tx_hash': '0x7a062d25b83bafc9fe6b22bc6f5718bca333908b148676e1ac66c0adeccef647', 'to': '0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b'}),\n", + " Document(page_content=\"{'blockNumber': '1737276', 'timeStamp': '1466406037', 'hash': '0xa4e89bfaf075abbf48f96700979e6c7e11a776b9040113ba64ef9c29ac62b19b', 'nonce': '1024', 'blockHash': '0xe117cad73752bb485c3bef24556e45b7766b283229180fcabc9711f3524b9f79', 'transactionIndex': '35', 'from': '0x16545fb79dbee1ad3a7f868b7661c023f372d5de', 'to': '0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b', 'value': '9976891868000000000', 'gas': '90000', 'gasPrice': '20000000000', 'isError': '0', 'txreceipt_status': '', 'input': '0x', 'contractAddress': '', 'cumulativeGasUsed': '3187163', 'gasUsed': '21000', 'confirmations': '15997976', 'methodId': '0x', 'functionName': ''}\", metadata={'from': '0x16545fb79dbee1ad3a7f868b7661c023f372d5de', 'tx_hash': '0xa4e89bfaf075abbf48f96700979e6c7e11a776b9040113ba64ef9c29ac62b19b', 'to': '0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b'}),\n", + " Document(page_content=\"{'blockNumber': '1740314', 'timeStamp': '1466450262', 'hash': '0x6e1a22dcc6e2c77a9451426fb49e765c3c459dae88350e3ca504f4831ec20e8a', 'nonce': '1051', 'blockHash': '0x588d17842819a81afae3ac6644d8005c12ce55ddb66c8d4c202caa91d4e8fdbe', 'transactionIndex': '6', 'from': '0x16545fb79dbee1ad3a7f868b7661c023f372d5de', 'to': '0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b', 'value': '8060633765000000000', 'gas': '90000', 'gasPrice': '22926905859', 'isError': '0', 'txreceipt_status': '', 'input': '0x', 'contractAddress': '', 'cumulativeGasUsed': '153077', 'gasUsed': '21000', 'confirmations': '15994938', 'methodId': '0x', 'functionName': ''}\", metadata={'from': '0x16545fb79dbee1ad3a7f868b7661c023f372d5de', 'tx_hash': '0x6e1a22dcc6e2c77a9451426fb49e765c3c459dae88350e3ca504f4831ec20e8a', 'to': '0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b'}),\n", + " Document(page_content=\"{'blockNumber': '1743384', 'timeStamp': '1466494099', 'hash': '0xdbfcc15f02269fc3ae27f69e344a1ac4e08948b12b76ebdd78a64d8cafd511ef', 'nonce': '1068', 'blockHash': '0x997245108c84250057fda27306b53f9438ad40978a95ca51d8fd7477e73fbaa7', 'transactionIndex': '2', 'from': '0x16545fb79dbee1ad3a7f868b7661c023f372d5de', 'to': '0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b', 'value': '9541921352000000000', 'gas': '90000', 'gasPrice': '20000000000', 'isError': '0', 'txreceipt_status': '', 'input': '0x', 'contractAddress': '', 'cumulativeGasUsed': '119650', 'gasUsed': '21000', 'confirmations': '15991868', 'methodId': '0x', 'functionName': ''}\", metadata={'from': '0x16545fb79dbee1ad3a7f868b7661c023f372d5de', 'tx_hash': '0xdbfcc15f02269fc3ae27f69e344a1ac4e08948b12b76ebdd78a64d8cafd511ef', 'to': '0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b'}),\n", + " Document(page_content=\"{'blockNumber': '1746405', 'timeStamp': '1466538123', 'hash': '0xbd4f9602f7fff4b8cc2ab6286efdb85f97fa114a43f6df4e6abc88e85b89e97b', 'nonce': '1092', 'blockHash': '0x3af3966cdaf22e8b112792ee2e0edd21ceb5a0e7bf9d8c168a40cf22deb3690c', 'transactionIndex': '0', 'from': '0x16545fb79dbee1ad3a7f868b7661c023f372d5de', 'to': '0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b', 'value': '8433783799000000000', 'gas': '90000', 'gasPrice': '25689279306', 'isError': '0', 'txreceipt_status': '', 'input': '0x', 'contractAddress': '', 'cumulativeGasUsed': '21000', 'gasUsed': '21000', 'confirmations': '15988847', 'methodId': '0x', 'functionName': ''}\", metadata={'from': '0x16545fb79dbee1ad3a7f868b7661c023f372d5de', 'tx_hash': '0xbd4f9602f7fff4b8cc2ab6286efdb85f97fa114a43f6df4e6abc88e85b89e97b', 'to': '0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b'}),\n", + " Document(page_content=\"{'blockNumber': '1749459', 'timeStamp': '1466582044', 'hash': '0x28c327f462cc5013d81c8682c032f014083c6891938a7bdeee85a1c02c3e9ed4', 'nonce': '1096', 'blockHash': '0x5fc5d2a903977b35ce1239975ae23f9157d45d7bd8a8f6205e8ce270000797f9', 'transactionIndex': '1', 'from': '0x16545fb79dbee1ad3a7f868b7661c023f372d5de', 'to': '0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b', 'value': '10269065805000000000', 'gas': '90000', 'gasPrice': '20000000000', 'isError': '0', 'txreceipt_status': '', 'input': '0x', 'contractAddress': '', 'cumulativeGasUsed': '42000', 'gasUsed': '21000', 'confirmations': '15985793', 'methodId': '0x', 'functionName': ''}\", metadata={'from': '0x16545fb79dbee1ad3a7f868b7661c023f372d5de', 'tx_hash': '0x28c327f462cc5013d81c8682c032f014083c6891938a7bdeee85a1c02c3e9ed4', 'to': '0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b'}),\n", + " Document(page_content=\"{'blockNumber': '1752614', 'timeStamp': '1466626168', 'hash': '0xc3849e550ca5276d7b3c51fa95ad3ae62c1c164799d33f4388fe60c4e1d4f7d8', 'nonce': '1118', 'blockHash': '0x88ef054b98e47504332609394e15c0a4467f84042396717af6483f0bcd916127', 'transactionIndex': '11', 'from': '0x16545fb79dbee1ad3a7f868b7661c023f372d5de', 'to': '0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b', 'value': '11325836780000000000', 'gas': '90000', 'gasPrice': '20000000000', 'isError': '0', 'txreceipt_status': '', 'input': '0x', 'contractAddress': '', 'cumulativeGasUsed': '252000', 'gasUsed': '21000', 'confirmations': '15982638', 'methodId': '0x', 'functionName': ''}\", metadata={'from': '0x16545fb79dbee1ad3a7f868b7661c023f372d5de', 'tx_hash': '0xc3849e550ca5276d7b3c51fa95ad3ae62c1c164799d33f4388fe60c4e1d4f7d8', 'to': '0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b'}),\n", + " Document(page_content=\"{'blockNumber': '1755659', 'timeStamp': '1466669931', 'hash': '0xb9f891b7c3d00fcd64483189890591d2b7b910eda6172e3bf3973c5fd3d5a5ae', 'nonce': '1133', 'blockHash': '0x2983972217a91343860415d1744c2a55246a297c4810908bbd3184785bc9b0c2', 'transactionIndex': '14', 'from': '0x16545fb79dbee1ad3a7f868b7661c023f372d5de', 'to': '0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b', 'value': '13226475343000000000', 'gas': '90000', 'gasPrice': '20000000000', 'isError': '0', 'txreceipt_status': '', 'input': '0x', 'contractAddress': '', 'cumulativeGasUsed': '2674679', 'gasUsed': '21000', 'confirmations': '15979593', 'methodId': '0x', 'functionName': ''}\", metadata={'from': '0x16545fb79dbee1ad3a7f868b7661c023f372d5de', 'tx_hash': '0xb9f891b7c3d00fcd64483189890591d2b7b910eda6172e3bf3973c5fd3d5a5ae', 'to': '0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b'}),\n", + " Document(page_content=\"{'blockNumber': '1758709', 'timeStamp': '1466713652', 'hash': '0xd6cce5b184dc7fce85f305ee832df647a9c4640b68e9b79b6f74dc38336d5622', 'nonce': '1147', 'blockHash': '0x1660de1e73067251be0109d267a21ffc7d5bde21719a3664c7045c32e771ecf9', 'transactionIndex': '1', 'from': '0x16545fb79dbee1ad3a7f868b7661c023f372d5de', 'to': '0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b', 'value': '9758447294000000000', 'gas': '90000', 'gasPrice': '20000000000', 'isError': '0', 'txreceipt_status': '', 'input': '0x', 'contractAddress': '', 'cumulativeGasUsed': '42000', 'gasUsed': '21000', 'confirmations': '15976543', 'methodId': '0x', 'functionName': ''}\", metadata={'from': '0x16545fb79dbee1ad3a7f868b7661c023f372d5de', 'tx_hash': '0xd6cce5b184dc7fce85f305ee832df647a9c4640b68e9b79b6f74dc38336d5622', 'to': '0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b'}),\n", + " Document(page_content=\"{'blockNumber': '1761783', 'timeStamp': '1466757809', 'hash': '0xd01545872629956867cbd65fdf5e97d0dde1a112c12e76a1bfc92048d37f650f', 'nonce': '1169', 'blockHash': '0x7576961afa4218a3264addd37a41f55c444dd534e9410dbd6f93f7fe20e0363e', 'transactionIndex': '2', 'from': '0x16545fb79dbee1ad3a7f868b7661c023f372d5de', 'to': '0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b', 'value': '10197126683000000000', 'gas': '90000', 'gasPrice': '20000000000', 'isError': '0', 'txreceipt_status': '', 'input': '0x', 'contractAddress': '', 'cumulativeGasUsed': '63000', 'gasUsed': '21000', 'confirmations': '15973469', 'methodId': '0x', 'functionName': ''}\", metadata={'from': '0x16545fb79dbee1ad3a7f868b7661c023f372d5de', 'tx_hash': '0xd01545872629956867cbd65fdf5e97d0dde1a112c12e76a1bfc92048d37f650f', 'to': '0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b'}),\n", + " Document(page_content=\"{'blockNumber': '1764895', 'timeStamp': '1466801683', 'hash': '0x620b91b12af7aac75553b47f15742e2825ea38919cfc8082c0666f404a0db28b', 'nonce': '1186', 'blockHash': '0x2e687643becd3c36e0c396a02af0842775e17ccefa0904de5aeca0a9a1aa795e', 'transactionIndex': '7', 'from': '0x16545fb79dbee1ad3a7f868b7661c023f372d5de', 'to': '0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b', 'value': '8690241462000000000', 'gas': '90000', 'gasPrice': '20000000000', 'isError': '0', 'txreceipt_status': '', 'input': '0x', 'contractAddress': '', 'cumulativeGasUsed': '168000', 'gasUsed': '21000', 'confirmations': '15970357', 'methodId': '0x', 'functionName': ''}\", metadata={'from': '0x16545fb79dbee1ad3a7f868b7661c023f372d5de', 'tx_hash': '0x620b91b12af7aac75553b47f15742e2825ea38919cfc8082c0666f404a0db28b', 'to': '0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b'}),\n", + " Document(page_content=\"{'blockNumber': '1767936', 'timeStamp': '1466845682', 'hash': '0x758efa27576cd17ebe7b842db4892eac6609e3962a4f9f57b7c84b7b1909512f', 'nonce': '1211', 'blockHash': '0xb01d8fd47b3554a99352ac3e5baf5524f314cfbc4262afcfbea1467b2d682898', 'transactionIndex': '0', 'from': '0x16545fb79dbee1ad3a7f868b7661c023f372d5de', 'to': '0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b', 'value': '11914401843000000000', 'gas': '90000', 'gasPrice': '20000000000', 'isError': '0', 'txreceipt_status': '', 'input': '0x', 'contractAddress': '', 'cumulativeGasUsed': '21000', 'gasUsed': '21000', 'confirmations': '15967316', 'methodId': '0x', 'functionName': ''}\", metadata={'from': '0x16545fb79dbee1ad3a7f868b7661c023f372d5de', 'tx_hash': '0x758efa27576cd17ebe7b842db4892eac6609e3962a4f9f57b7c84b7b1909512f', 'to': '0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b'}),\n", + " Document(page_content=\"{'blockNumber': '1770911', 'timeStamp': '1466888890', 'hash': '0x9d84470b54ab44b9074b108a0e506cd8badf30457d221e595bb68d63e926b865', 'nonce': '1212', 'blockHash': '0x79a9de39276132dab8bf00dc3e060f0e8a14f5e16a0ee4e9cc491da31b25fe58', 'transactionIndex': '0', 'from': '0x16545fb79dbee1ad3a7f868b7661c023f372d5de', 'to': '0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b', 'value': '10918214730000000000', 'gas': '90000', 'gasPrice': '20000000000', 'isError': '0', 'txreceipt_status': '', 'input': '0x', 'contractAddress': '', 'cumulativeGasUsed': '21000', 'gasUsed': '21000', 'confirmations': '15964341', 'methodId': '0x', 'functionName': ''}\", metadata={'from': '0x16545fb79dbee1ad3a7f868b7661c023f372d5de', 'tx_hash': '0x9d84470b54ab44b9074b108a0e506cd8badf30457d221e595bb68d63e926b865', 'to': '0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b'}),\n", + " Document(page_content=\"{'blockNumber': '1774044', 'timeStamp': '1466932983', 'hash': '0x958d85270b58b80f1ad228f716bbac8dd9da7c5f239e9f30d8edeb5bb9301d20', 'nonce': '1240', 'blockHash': '0x69cee390378c3b886f9543fb3a1cb2fc97621ec155f7884564d4c866348ce539', 'transactionIndex': '2', 'from': '0x16545fb79dbee1ad3a7f868b7661c023f372d5de', 'to': '0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b', 'value': '9979637283000000000', 'gas': '90000', 'gasPrice': '20000000000', 'isError': '0', 'txreceipt_status': '', 'input': '0x', 'contractAddress': '', 'cumulativeGasUsed': '63000', 'gasUsed': '21000', 'confirmations': '15961208', 'methodId': '0x', 'functionName': ''}\", metadata={'from': '0x16545fb79dbee1ad3a7f868b7661c023f372d5de', 'tx_hash': '0x958d85270b58b80f1ad228f716bbac8dd9da7c5f239e9f30d8edeb5bb9301d20', 'to': '0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b'}),\n", + " Document(page_content=\"{'blockNumber': '1777057', 'timeStamp': '1466976422', 'hash': '0xe76ca3603d2f4e7134bdd7a1c3fd553025fc0b793f3fd2a75cd206b8049e74ab', 'nonce': '1248', 'blockHash': '0xc7cacda0ac38c99f1b9bccbeee1562a41781d2cfaa357e8c7b4af6a49584b968', 'transactionIndex': '7', 'from': '0x16545fb79dbee1ad3a7f868b7661c023f372d5de', 'to': '0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b', 'value': '4556173496000000000', 'gas': '90000', 'gasPrice': '20000000000', 'isError': '0', 'txreceipt_status': '', 'input': '0x', 'contractAddress': '', 'cumulativeGasUsed': '168000', 'gasUsed': '21000', 'confirmations': '15958195', 'methodId': '0x', 'functionName': ''}\", metadata={'from': '0x16545fb79dbee1ad3a7f868b7661c023f372d5de', 'tx_hash': '0xe76ca3603d2f4e7134bdd7a1c3fd553025fc0b793f3fd2a75cd206b8049e74ab', 'to': '0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b'}),\n", + " Document(page_content=\"{'blockNumber': '1780120', 'timeStamp': '1467020353', 'hash': '0xc5ec8cecdc9f5ed55a5b8b0ad79c964fb5c49dc1136b6a49e981616c3e70bbe6', 'nonce': '1266', 'blockHash': '0xfc0e066e5b613239e1a01e6d582e7ab162ceb3ca4f719dfbd1a0c965adcfe1c5', 'transactionIndex': '1', 'from': '0x16545fb79dbee1ad3a7f868b7661c023f372d5de', 'to': '0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b', 'value': '11890330240000000000', 'gas': '90000', 'gasPrice': '20000000000', 'isError': '0', 'txreceipt_status': '', 'input': '0x', 'contractAddress': '', 'cumulativeGasUsed': '42000', 'gasUsed': '21000', 'confirmations': '15955132', 'methodId': '0x', 'functionName': ''}\", metadata={'from': '0x16545fb79dbee1ad3a7f868b7661c023f372d5de', 'tx_hash': '0xc5ec8cecdc9f5ed55a5b8b0ad79c964fb5c49dc1136b6a49e981616c3e70bbe6', 'to': '0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b'})]" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "loader = EtherscanLoader(\n", + " account_address,\n", + " page=2,\n", + " offset=20,\n", + " start_block=10000,\n", + " end_block=8888888888,\n", + " sort=\"asc\",\n", + ")\n", + "result = loader.load()\n", + "result" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/libs/langchain/langchain/document_loaders/__init__.py b/libs/langchain/langchain/document_loaders/__init__.py index 495f1644c2..6227d33064 100644 --- a/libs/langchain/langchain/document_loaders/__init__.py +++ b/libs/langchain/langchain/document_loaders/__init__.py @@ -45,6 +45,7 @@ from langchain.document_loaders.email import ( ) from langchain.document_loaders.embaas import EmbaasBlobLoader, EmbaasLoader from langchain.document_loaders.epub import UnstructuredEPubLoader +from langchain.document_loaders.etherscan import EtherscanLoader from langchain.document_loaders.evernote import EverNoteLoader from langchain.document_loaders.excel import UnstructuredExcelLoader from langchain.document_loaders.facebook_chat import FacebookChatLoader @@ -196,6 +197,7 @@ __all__ = [ "DuckDBLoader", "EmbaasBlobLoader", "EmbaasLoader", + "EtherscanLoader", "EverNoteLoader", "FacebookChatLoader", "FaunaLoader", diff --git a/libs/langchain/langchain/document_loaders/etherscan.py b/libs/langchain/langchain/document_loaders/etherscan.py new file mode 100644 index 0000000000..d3fa00368d --- /dev/null +++ b/libs/langchain/langchain/document_loaders/etherscan.py @@ -0,0 +1,204 @@ +import os +import re +from typing import Iterator, List + +import requests + +from langchain.docstore.document import Document +from langchain.document_loaders.base import BaseLoader + + +class EtherscanLoader(BaseLoader): + """ + Load transactions from an account on Ethereum mainnet. + + The Loader use Etherscan API to interact with Ethereum mainnet. + + ETHERSCAN_API_KEY environment variable must be set use this loader. + """ + + def __init__( + self, + account_address: str, + api_key: str = "docs-demo", + filter: str = "normal_transaction", + page: int = 1, + offset: int = 10, + start_block: int = 0, + end_block: int = 99999999, + sort: str = "desc", + ): + self.account_address = account_address + self.api_key = os.environ.get("ETHERSCAN_API_KEY") or api_key + self.filter = filter + self.page = page + self.offset = offset + self.start_block = start_block + self.end_block = end_block + self.sort = sort + + if not self.api_key: + raise ValueError("Etherscan API key not provided") + + if not re.match(r"^0x[a-fA-F0-9]{40}$", self.account_address): + raise ValueError(f"Invalid contract address {self.account_address}") + if filter not in [ + "normal_transaction", + "internal_transaction", + "erc20_transaction", + "eth_balance", + "erc721_transaction", + "erc1155_transaction", + ]: + raise ValueError(f"Invalid filter {filter}") + + def lazy_load(self) -> Iterator[Document]: + """Lazy load Documents from table.""" + result = [] + if self.filter == "normal_transaction": + result = self.getNormTx() + elif self.filter == "internal_transaction": + result = self.getInternalTx() + elif self.filter == "erc20_transaction": + result = self.getERC20Tx() + elif self.filter == "eth_balance": + result = self.getEthBalance() + elif self.filter == "erc721_transaction": + result = self.getERC721Tx() + elif self.filter == "erc1155_transaction": + result = self.getERC1155Tx() + else: + raise ValueError(f"Invalid filter {filter}") + for doc in result: + yield doc + + def load(self) -> List[Document]: + """Load transactions from spcifc account by Etherscan.""" + return list(self.lazy_load()) + + def getNormTx(self) -> List[Document]: + url = ( + f"https://api.etherscan.io/api?module=account&action=txlist&address={self.account_address}" + f"&startblock={self.start_block}&endblock={self.end_block}&page={self.page}" + f"&offset={self.offset}&sort={self.sort}&apikey={self.api_key}" + ) + try: + response = requests.get(url) + response.raise_for_status() + except requests.exceptions.RequestException as e: + print("Error occurred while making the request:", e) + items = response.json()["result"] + result = [] + if len(items) == 0: + return [Document(page_content="")] + for item in items: + content = str(item) + metadata = {"from": item["from"], "tx_hash": item["hash"], "to": item["to"]} + result.append(Document(page_content=content, metadata=metadata)) + print(len(result)) + return result + + def getEthBalance(self) -> List[Document]: + url = ( + f"https://api.etherscan.io/api?module=account&action=balance" + f"&address={self.account_address}&tag=latest&apikey={self.api_key}" + ) + + try: + response = requests.get(url) + response.raise_for_status() + except requests.exceptions.RequestException as e: + print("Error occurred while making the request:", e) + return [Document(page_content=response.json()["result"])] + + def getInternalTx(self) -> List[Document]: + url = ( + f"https://api.etherscan.io/api?module=account&action=txlistinternal" + f"&address={self.account_address}&startblock={self.start_block}" + f"&endblock={self.end_block}&page={self.page}&offset={self.offset}" + f"&sort={self.sort}&apikey={self.api_key}" + ) + + try: + response = requests.get(url) + response.raise_for_status() + except requests.exceptions.RequestException as e: + print("Error occurred while making the request:", e) + items = response.json()["result"] + result = [] + if len(items) == 0: + return [Document(page_content="")] + for item in items: + content = str(item) + metadata = {"from": item["from"], "tx_hash": item["hash"], "to": item["to"]} + result.append(Document(page_content=content, metadata=metadata)) + return result + + def getERC20Tx(self) -> List[Document]: + url = ( + f"https://api.etherscan.io/api?module=account&action=tokentx" + f"&address={self.account_address}&startblock={self.start_block}" + f"&endblock={self.end_block}&page={self.page}&offset={self.offset}" + f"&sort={self.sort}&apikey={self.api_key}" + ) + + try: + response = requests.get(url) + response.raise_for_status() + except requests.exceptions.RequestException as e: + print("Error occurred while making the request:", e) + items = response.json()["result"] + result = [] + if len(items) == 0: + return [Document(page_content="")] + for item in items: + content = str(item) + metadata = {"from": item["from"], "tx_hash": item["hash"], "to": item["to"]} + result.append(Document(page_content=content, metadata=metadata)) + return result + + def getERC721Tx(self) -> List[Document]: + url = ( + f"https://api.etherscan.io/api?module=account&action=tokennfttx" + f"&address={self.account_address}&startblock={self.start_block}" + f"&endblock={self.end_block}&page={self.page}&offset={self.offset}" + f"&sort={self.sort}&apikey={self.api_key}" + ) + + try: + response = requests.get(url) + response.raise_for_status() + except requests.exceptions.RequestException as e: + print("Error occurred while making the request:", e) + items = response.json()["result"] + result = [] + if len(items) == 0: + return [Document(page_content="")] + for item in items: + content = str(item) + metadata = {"from": item["from"], "tx_hash": item["hash"], "to": item["to"]} + result.append(Document(page_content=content, metadata=metadata)) + return result + + def getERC1155Tx(self) -> List[Document]: + url = ( + f"https://api.etherscan.io/api?module=account&action=token1155tx" + f"&address={self.account_address}&startblock={self.start_block}" + f"&endblock={self.end_block}&page={self.page}&offset={self.offset}" + f"&sort={self.sort}&apikey={self.api_key}" + ) + + try: + response = requests.get(url) + response.raise_for_status() + except requests.exceptions.RequestException as e: + print("Error occurred while making the request:", e) + items = response.json()["result"] + result = [] + if len(items) == 0: + return [Document(page_content="")] + for item in items: + content = str(item) + metadata = {"from": item["from"], "tx_hash": item["hash"], "to": item["to"]} + result.append(Document(page_content=content, metadata=metadata)) + return result diff --git a/libs/langchain/tests/integration_tests/document_loaders/test_etherscan.py b/libs/langchain/tests/integration_tests/document_loaders/test_etherscan.py new file mode 100644 index 0000000000..e88ea51f3d --- /dev/null +++ b/libs/langchain/tests/integration_tests/document_loaders/test_etherscan.py @@ -0,0 +1,68 @@ +import os + +import pytest + +from langchain.document_loaders import EtherscanLoader + +if "ETHERSCAN_API_KEY" in os.environ: + etherscan_key_set = True + api_key = os.environ["ETHERSCAN_API_KEY"] +else: + etherscan_key_set = False + + +@pytest.mark.skipif(not etherscan_key_set, reason="Etherscan API key not provided.") +def test_get_normal_transaction() -> None: + account_address = "0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b" + loader = EtherscanLoader(account_address) + result = loader.load() + assert len(result) > 0, "No transactions returned" + + +@pytest.mark.skipif(not etherscan_key_set, reason="Etherscan API key not provided.") +def test_get_internal_transaction() -> None: + account_address = "0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b" + loader = EtherscanLoader(account_address, filter="internal_transaction") + result = loader.load() + assert len(result) > 0, "No transactions returned" + + +@pytest.mark.skipif(not etherscan_key_set, reason="Etherscan API key not provided.") +def test_get_erc20_transaction() -> None: + account_address = "0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b" + loader = EtherscanLoader(account_address, filter="erc20_transaction") + result = loader.load() + assert len(result) > 0, "No transactions returned" + + +@pytest.mark.skipif(not etherscan_key_set, reason="Etherscan API key not provided.") +def test_get_erc721_transaction() -> None: + account_address = "0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b" + loader = EtherscanLoader(account_address, filter="erc721_transaction") + result = loader.load() + assert len(result) > 0, "No transactions returned" + + +@pytest.mark.skipif(not etherscan_key_set, reason="Etherscan API key not provided.") +def test_get_erc1155_transaction() -> None: + account_address = "0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b" + loader = EtherscanLoader(account_address, filter="erc1155_transaction") + result = loader.load() + assert len(result) == 1, "Wrong transactions returned" + assert result[0].page_content == "", "Wrong transactions returned" + + +@pytest.mark.skipif(not etherscan_key_set, reason="Etherscan API key not provided.") +def test_get_eth_balance() -> None: + account_address = "0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b" + loader = EtherscanLoader(account_address, filter="eth_balance") + result = loader.load() + assert len(result) > 0, "No transactions returned" + + +@pytest.mark.skipif(not etherscan_key_set, reason="Etherscan API key not provided.") +def test_invalid_filter() -> None: + account_address = "0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b" + with pytest.raises(ValueError) as error_invalid_filter: + EtherscanLoader(account_address, filter="internal_saction") + assert str(error_invalid_filter.value) == "Invalid filter internal_saction"