mirror of
https://github.com/hwchase17/langchain
synced 2024-11-06 03:20:49 +00:00
Harrison/blockchain docloader (#3491)
Co-authored-by: Jon Saginaw <saginawj@users.noreply.github.com>
This commit is contained in:
parent
bee59b4689
commit
0fc0aa62f2
444
docs/modules/indexes/document_loaders/examples/blockchain.ipynb
Normal file
444
docs/modules/indexes/document_loaders/examples/blockchain.ipynb
Normal file
@ -0,0 +1,444 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"attachments": {},
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"id": "vm8vn9t8DvC_"
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"# Blockchain Document Loader"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"attachments": {},
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"id": "5WjXERXzFEhg"
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"## Overview"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"attachments": {},
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"id": "juAmbgoWD17u"
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"The intention of this notebook is to provide a means of testing functionality in the Langchain Document Loader for Blockchain.\n",
|
||||||
|
"\n",
|
||||||
|
"Initially this Loader supports:\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"* Ethereum Maninnet, Ethereum Testnet, Polgyon Mainnet, Polygon Testnet (default is eth-mainnet)\n",
|
||||||
|
"* Alchemy's getNFTsForCollection API\n",
|
||||||
|
"\n",
|
||||||
|
"It can be extended if the community finds value in this loader. Specifically:\n",
|
||||||
|
"\n",
|
||||||
|
"* Additional APIs can be added (e.g. Tranction-related APIs)\n",
|
||||||
|
"\n",
|
||||||
|
"To run this notebook, the user will need:\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"* An OpenAI key (for OpenAI models)\n",
|
||||||
|
"* A free [Alchemy API Key](https://https://www.alchemy.com/)\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"attachments": {},
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Setup"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 48,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Note: you may need to restart the kernel to use updated packages.\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"%pip install langchain -q"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 49,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from langchain.document_loaders import BlockchainDocumentLoader\n",
|
||||||
|
"from langchain.document_loaders.blockchain import BlockchainType\n",
|
||||||
|
"import os"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 50,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"alchemyApiKey = \"get your own key from https://www.alchemy.com/\" \n",
|
||||||
|
"os.environ[\"ALCHEMY_API_KEY\"] = alchemyApiKey"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"attachments": {},
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"id": "nzuPWRaBNCMx"
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"## Create a Blockchain Document Loader"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"attachments": {},
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Option 1: Ethereum Mainnet (default BlockchainType)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 24,
|
||||||
|
"metadata": {
|
||||||
|
"id": "J3LWHARC-Kn0"
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"[Document(page_content=\"{'contract': {'address': '0xbc4ca0eda7647a8ab7c2061c2e118a18a936f13d'}, 'id': {'tokenId': '0x0000000000000000000000000000000000000000000000000000000000000000', 'tokenMetadata': {'tokenType': 'ERC721'}}, 'title': '', 'description': '', 'tokenUri': {'gateway': 'https://alchemy.mypinata.cloud/ipfs/QmeSjSinHpPnmXmspMjwiXyN6zS4E9zccariGR3jxcaWtq/0', 'raw': 'ipfs://QmeSjSinHpPnmXmspMjwiXyN6zS4E9zccariGR3jxcaWtq/0'}, 'media': [{'gateway': 'https://nft-cdn.alchemy.com/eth-mainnet/415d618f5fef7bfe683e02d4653c4289', 'thumbnail': 'https://res.cloudinary.com/alchemyapi/image/upload/thumbnailv2/eth-mainnet/415d618f5fef7bfe683e02d4653c4289', 'raw': 'ipfs://QmRRPWG96cmgTn2qSzjwr2qvfNEuhunv6FNeMFGa9bx6mQ', 'format': 'png', 'bytes': 133270}], 'metadata': {'image': 'ipfs://QmRRPWG96cmgTn2qSzjwr2qvfNEuhunv6FNeMFGa9bx6mQ', 'attributes': [{'value': 'Silver Hoop', 'trait_type': 'Earring'}, {'value': 'Orange', 'trait_type': 'Background'}, {'value': 'Robot', 'trait_type': 'Fur'}, {'value': 'Striped Tee', 'trait_type': 'Clothes'}, {'value': 'Discomfort', 'trait_type': 'Mouth'}, {'value': 'X Eyes', 'trait_type': 'Eyes'}]}, 'timeLastUpdated': '2023-04-18T04:05:27.817Z', 'contractMetadata': {'name': 'BoredApeYachtClub', 'symbol': 'BAYC', 'totalSupply': '10000', 'tokenType': 'ERC721', 'contractDeployer': '0xaba7161a7fb69c88e16ed9f455ce62b791ee4d03', 'deployedBlockNumber': 12287507, 'openSea': {'floorPrice': 68.16, 'collectionName': 'Bored Ape Yacht Club', 'safelistRequestStatus': 'verified', 'imageUrl': 'https://i.seadn.io/gae/Ju9CkWtV-1Okvf45wo8UctR-M9He2PjILP0oOvxE89AyiPPGtrR3gysu1Zgy0hjd2xKIgjJJtWIc0ybj4Vd7wv8t3pxDGHoJBzDB?w=500&auto=format', 'description': 'The Bored Ape Yacht Club is a collection of 10,000 unique Bored Ape NFTs— unique digital collectibles living on the Ethereum blockchain. Your Bored Ape doubles as your Yacht Club membership card, and grants access to members-only benefits, the first of which is access to THE BATHROOM, a collaborative graffiti board. Future areas and perks can be unlocked by the community through roadmap activation. Visit www.BoredApeYachtClub.com for more details.', 'externalUrl': 'http://www.boredapeyachtclub.com/', 'twitterUsername': 'BoredApeYC', 'discordUrl': 'https://discord.gg/3P5K3dzgdB', 'lastIngestedAt': '2023-03-21T03:54:33.000Z'}}}\", metadata={'tokenId': '0x0000000000000000000000000000000000000000000000000000000000000000'}),\n",
|
||||||
|
" Document(page_content=\"{'contract': {'address': '0xbc4ca0eda7647a8ab7c2061c2e118a18a936f13d'}, 'id': {'tokenId': '0x0000000000000000000000000000000000000000000000000000000000000001', 'tokenMetadata': {'tokenType': 'ERC721'}}, 'title': '', 'description': '', 'tokenUri': {'gateway': 'https://alchemy.mypinata.cloud/ipfs/QmeSjSinHpPnmXmspMjwiXyN6zS4E9zccariGR3jxcaWtq/1', 'raw': 'ipfs://QmeSjSinHpPnmXmspMjwiXyN6zS4E9zccariGR3jxcaWtq/1'}, 'media': [{'gateway': 'https://nft-cdn.alchemy.com/eth-mainnet/65558a4d0c5b0c56fbc50bf03f55e3fa', 'thumbnail': 'https://res.cloudinary.com/alchemyapi/image/upload/thumbnailv2/eth-mainnet/65558a4d0c5b0c56fbc50bf03f55e3fa', 'raw': 'ipfs://QmPbxeGcXhYQQNgsC6a36dDyYUcHgMLnGKnF8pVFmGsvqi', 'format': 'png', 'bytes': 171425}], 'metadata': {'image': 'ipfs://QmPbxeGcXhYQQNgsC6a36dDyYUcHgMLnGKnF8pVFmGsvqi', 'attributes': [{'value': 'Grin', 'trait_type': 'Mouth'}, {'value': 'Vietnam Jacket', 'trait_type': 'Clothes'}, {'value': 'Orange', 'trait_type': 'Background'}, {'value': 'Blue Beams', 'trait_type': 'Eyes'}, {'value': 'Robot', 'trait_type': 'Fur'}]}, 'timeLastUpdated': '2023-04-24T04:37:37.738Z', 'contractMetadata': {'name': 'BoredApeYachtClub', 'symbol': 'BAYC', 'totalSupply': '10000', 'tokenType': 'ERC721', 'contractDeployer': '0xaba7161a7fb69c88e16ed9f455ce62b791ee4d03', 'deployedBlockNumber': 12287507, 'openSea': {'floorPrice': 68.16, 'collectionName': 'Bored Ape Yacht Club', 'safelistRequestStatus': 'verified', 'imageUrl': 'https://i.seadn.io/gae/Ju9CkWtV-1Okvf45wo8UctR-M9He2PjILP0oOvxE89AyiPPGtrR3gysu1Zgy0hjd2xKIgjJJtWIc0ybj4Vd7wv8t3pxDGHoJBzDB?w=500&auto=format', 'description': 'The Bored Ape Yacht Club is a collection of 10,000 unique Bored Ape NFTs— unique digital collectibles living on the Ethereum blockchain. Your Bored Ape doubles as your Yacht Club membership card, and grants access to members-only benefits, the first of which is access to THE BATHROOM, a collaborative graffiti board. Future areas and perks can be unlocked by the community through roadmap activation. Visit www.BoredApeYachtClub.com for more details.', 'externalUrl': 'http://www.boredapeyachtclub.com/', 'twitterUsername': 'BoredApeYC', 'discordUrl': 'https://discord.gg/3P5K3dzgdB', 'lastIngestedAt': '2023-03-21T03:54:33.000Z'}}}\", metadata={'tokenId': '0x0000000000000000000000000000000000000000000000000000000000000001'})]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 24,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"contractAddress = \"0xbc4ca0eda7647a8ab7c2061c2e118a18a936f13d\" # Bored Ape Yacht Club contract address\n",
|
||||||
|
"\n",
|
||||||
|
"blockchainType = BlockchainType.ETH_MAINNET #default value, optional parameter\n",
|
||||||
|
"\n",
|
||||||
|
"blockchainLoader = BlockchainDocumentLoader(contractAddress)\n",
|
||||||
|
"\n",
|
||||||
|
"nfts = blockchainLoader.load()\n",
|
||||||
|
"\n",
|
||||||
|
"nfts[:2]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"attachments": {},
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Option 2: Polygon Mainnet"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 36,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"[Document(page_content=\"{'contract': {'address': '0x448676ffcd0adf2d85c1f0565e8dde6924a9a7d9'}, 'id': {'tokenId': '0x01', 'tokenMetadata': {'tokenType': 'ERC1155'}}, 'title': 'Wyatt Horton #0001', 'description': 'A sleepy capybara', 'tokenUri': {'gateway': 'https://storage.googleapis.com/minted-nfts/smoothstack/avatars/metadata/1.json', 'raw': 'https://storage.googleapis.com/minted-nfts/smoothstack/avatars/metadata/1.json'}, 'media': [{'gateway': 'https://nft-cdn.alchemy.com/matic-mainnet/9085e06ff9f6c9074de91801d1c72d26', 'thumbnail': 'https://res.cloudinary.com/alchemyapi/image/upload/thumbnailv2/matic-mainnet/9085e06ff9f6c9074de91801d1c72d26', 'raw': 'https://storage.googleapis.com/minted-nfts/smoothstack/avatars/images/1.png', 'format': 'png', 'bytes': 769622}], 'metadata': {'name': 'Wyatt Horton #0001', 'description': 'A sleepy capybara', 'image': 'https://storage.googleapis.com/minted-nfts/smoothstack/avatars/images/1.png', 'attributes': [{'value': 'Avatar', 'trait_type': 'Type'}, {'value': 'Animal', 'trait_type': 'Category'}, {'value': 'Capybara', 'trait_type': 'Class'}, {'value': 'Fall 2022', 'trait_type': 'Collection'}, {'value': 'Furry', 'trait_type': 'Feature'}]}, 'timeLastUpdated': '2023-04-20T14:38:24.947Z', 'contractMetadata': {'name': 'Smoothstack - Avatars', 'symbol': 'SMTH', 'tokenType': 'ERC1155', 'contractDeployer': '0x23075b2523c6563b06920a302a8be4f90ef6e974', 'deployedBlockNumber': 34752389, 'openSea': {'lastIngestedAt': '2023-04-17T20:59:42.000Z'}}}\", metadata={'tokenId': '0x01'}),\n",
|
||||||
|
" Document(page_content=\"{'contract': {'address': '0x448676ffcd0adf2d85c1f0565e8dde6924a9a7d9'}, 'id': {'tokenId': '0x02', 'tokenMetadata': {'tokenType': 'ERC1155'}}, 'title': 'Dylan Leisler #0002', 'description': 'A chipper cat with a big, red bowtie', 'tokenUri': {'gateway': 'https://storage.googleapis.com/minted-nfts/smoothstack/avatars/metadata/2.json', 'raw': 'https://storage.googleapis.com/minted-nfts/smoothstack/avatars/metadata/2.json'}, 'media': [{'gateway': 'https://nft-cdn.alchemy.com/matic-mainnet/67c3c7ccef44b32bf2ce758e8e73dbcd', 'thumbnail': 'https://res.cloudinary.com/alchemyapi/image/upload/thumbnailv2/matic-mainnet/67c3c7ccef44b32bf2ce758e8e73dbcd', 'raw': 'https://storage.googleapis.com/minted-nfts/smoothstack/avatars/images/2.png', 'format': 'png', 'bytes': 1187749}], 'metadata': {'name': 'Dylan Leisler #0002', 'description': 'A chipper cat with a big, red bowtie', 'image': 'https://storage.googleapis.com/minted-nfts/smoothstack/avatars/images/2.png', 'attributes': [{'value': 'Avatar', 'trait_type': 'Type'}, {'value': 'Animal', 'trait_type': 'Category'}, {'value': 'Cat', 'trait_type': 'Class'}, {'value': 'Fall 2022', 'trait_type': 'Collection'}, {'value': 'Red Bowtie', 'trait_type': 'Feature'}]}, 'timeLastUpdated': '2023-04-23T13:38:29.316Z', 'contractMetadata': {'name': 'Smoothstack - Avatars', 'symbol': 'SMTH', 'tokenType': 'ERC1155', 'contractDeployer': '0x23075b2523c6563b06920a302a8be4f90ef6e974', 'deployedBlockNumber': 34752389, 'openSea': {'lastIngestedAt': '2023-04-17T20:59:42.000Z'}}}\", metadata={'tokenId': '0x02'})]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 36,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"contractAddress = \"0x448676ffCd0aDf2D85C1f0565e8dde6924A9A7D9\" # Polygon Mainnet contract address\n",
|
||||||
|
"\n",
|
||||||
|
"blockchainType = BlockchainType.POLYGON_MAINNET \n",
|
||||||
|
"\n",
|
||||||
|
"blockchainLoader = BlockchainDocumentLoader(contractAddress, blockchainType)\n",
|
||||||
|
"\n",
|
||||||
|
"nfts = blockchainLoader.load()\n",
|
||||||
|
"\n",
|
||||||
|
"nfts[:2]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"attachments": {},
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## (Optional) Using the Blockchain Document Loader"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"attachments": {},
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"id": "_PGkFfMCB8J3"
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"### Setup Splitter and Index"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 37,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Note: you may need to restart the kernel to use updated packages.\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"%pip install sentence_transformers chromadb openai tiktoken -q"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 38,
|
||||||
|
"metadata": {
|
||||||
|
"id": "BwxxopOCCABh"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from langchain.indexes import VectorstoreIndexCreator\n",
|
||||||
|
"from langchain.embeddings import HuggingFaceEmbeddings\n",
|
||||||
|
"from langchain.text_splitter import RecursiveCharacterTextSplitter"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 39,
|
||||||
|
"metadata": {
|
||||||
|
"colab": {
|
||||||
|
"base_uri": "https://localhost:8080/"
|
||||||
|
},
|
||||||
|
"id": "JE_myAulCDSZ",
|
||||||
|
"outputId": "99e16b6a-03b4-4e67-d4b4-9dd611a866ef"
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"NUMBER OF DOCUMENTS: 424\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"text_splitter = RecursiveCharacterTextSplitter(chunk_size=400, chunk_overlap=0)\n",
|
||||||
|
"\n",
|
||||||
|
"docs = text_splitter.split_documents(nfts)\n",
|
||||||
|
"print(\"NUMBER OF DOCUMENTS: \", len(docs))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 40,
|
||||||
|
"metadata": {
|
||||||
|
"id": "d83yFuAuCKQS"
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stderr",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Using embedded DuckDB without persistence: data will be transient\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"index = VectorstoreIndexCreator(\n",
|
||||||
|
" embedding=HuggingFaceEmbeddings(),\n",
|
||||||
|
" text_splitter=text_splitter).from_loaders([blockchainLoader])"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"attachments": {},
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"id": "y0VfObeXDEXB"
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"## Setup Models and Chains"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 42,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"openAiKey = \"put OpenAI key here\"\n",
|
||||||
|
"os.environ[\"OPENAI_API_KEY\"] = openAiKey"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 31,
|
||||||
|
"metadata": {
|
||||||
|
"id": "hiNjDzP9C4pA"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from langchain.chains import RetrievalQA\n",
|
||||||
|
"from langchain.llms import OpenAI"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"attachments": {},
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"id": "u-xDlKPaC_xg"
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"### Retrieval Chain"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 43,
|
||||||
|
"metadata": {
|
||||||
|
"id": "BqP00JovC9R4"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"llmOpenAI = OpenAI()\n",
|
||||||
|
"\n",
|
||||||
|
"chainQA = RetrievalQA.from_chain_type(llm=llmOpenAI, \n",
|
||||||
|
" chain_type=\"map_reduce\",\n",
|
||||||
|
" retriever=index.vectorstore.as_retriever(), \n",
|
||||||
|
" verbose=True,\n",
|
||||||
|
" input_key=\"question\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 44,
|
||||||
|
"metadata": {
|
||||||
|
"colab": {
|
||||||
|
"base_uri": "https://localhost:8080/",
|
||||||
|
"height": 122
|
||||||
|
},
|
||||||
|
"id": "2Y3cVVKZDVNq",
|
||||||
|
"outputId": "dfeea416-5193-47cf-e9dc-c17a5c1cd780"
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"\u001b[1m> Entering new RetrievalQA chain...\u001b[0m\n",
|
||||||
|
"\n",
|
||||||
|
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"' Popular attributes include \"Avatar\" (Type), \"Character\" (Category), and \"Human\" or \"Wizard\" (Class).'"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 44,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"chainQA.run(\"What are some of the popular attributes?\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"colab": {
|
||||||
|
"base_uri": "https://localhost:8080/",
|
||||||
|
"height": 122
|
||||||
|
},
|
||||||
|
"id": "7o6ArPo9DXbz",
|
||||||
|
"outputId": "b1f8ad43-27c7-4cdb-95a7-8c8bd6381c5a"
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"\u001b[1m> Entering new RetrievalQA chain...\u001b[0m\n",
|
||||||
|
"\n",
|
||||||
|
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"application/vnd.google.colaboratory.intrinsic+json": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"text/plain": [
|
||||||
|
"' There are 10,000 unique Bored Ape NFTs.'"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 32,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"chainQA.run(\"How many NFTs are there?\")"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"colab": {
|
||||||
|
"collapsed_sections": [
|
||||||
|
"5WjXERXzFEhg"
|
||||||
|
],
|
||||||
|
"provenance": []
|
||||||
|
},
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.9.16"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 0
|
||||||
|
}
|
@ -12,6 +12,7 @@ from langchain.document_loaders.azure_blob_storage_file import (
|
|||||||
from langchain.document_loaders.bigquery import BigQueryLoader
|
from langchain.document_loaders.bigquery import BigQueryLoader
|
||||||
from langchain.document_loaders.bilibili import BiliBiliLoader
|
from langchain.document_loaders.bilibili import BiliBiliLoader
|
||||||
from langchain.document_loaders.blackboard import BlackboardLoader
|
from langchain.document_loaders.blackboard import BlackboardLoader
|
||||||
|
from langchain.document_loaders.blockchain import BlockchainDocumentLoader
|
||||||
from langchain.document_loaders.chatgpt import ChatGPTLoader
|
from langchain.document_loaders.chatgpt import ChatGPTLoader
|
||||||
from langchain.document_loaders.college_confidential import CollegeConfidentialLoader
|
from langchain.document_loaders.college_confidential import CollegeConfidentialLoader
|
||||||
from langchain.document_loaders.confluence import ConfluenceLoader
|
from langchain.document_loaders.confluence import ConfluenceLoader
|
||||||
@ -162,4 +163,5 @@ __all__ = [
|
|||||||
"PythonLoader",
|
"PythonLoader",
|
||||||
"ChatGPTLoader",
|
"ChatGPTLoader",
|
||||||
"HuggingFaceDatasetLoader",
|
"HuggingFaceDatasetLoader",
|
||||||
|
"BlockchainDocumentLoader",
|
||||||
]
|
]
|
||||||
|
80
langchain/document_loaders/blockchain.py
Normal file
80
langchain/document_loaders/blockchain.py
Normal file
@ -0,0 +1,80 @@
|
|||||||
|
import os
|
||||||
|
import re
|
||||||
|
from enum import Enum
|
||||||
|
from typing import List
|
||||||
|
|
||||||
|
import requests
|
||||||
|
|
||||||
|
from langchain.docstore.document import Document
|
||||||
|
from langchain.document_loaders.base import BaseLoader
|
||||||
|
|
||||||
|
|
||||||
|
class BlockchainType(Enum):
|
||||||
|
ETH_MAINNET = "eth-mainnet"
|
||||||
|
ETH_GOERLI = "eth-goerli"
|
||||||
|
POLYGON_MAINNET = "polygon-mainnet"
|
||||||
|
POLYGON_MUMBAI = "polygon-mumbai"
|
||||||
|
|
||||||
|
|
||||||
|
class BlockchainDocumentLoader(BaseLoader):
|
||||||
|
"""Loads elements from a blockchain smart contract into Langchain documents.
|
||||||
|
|
||||||
|
The supported blockchains are: Ethereum mainnet, Ethereum Goerli testnet,
|
||||||
|
Polygon mainnet, and Polygon Mumbai testnet.
|
||||||
|
|
||||||
|
If no BlockchainType is specified, the default is Ethereum mainnet.
|
||||||
|
|
||||||
|
The Loader uses the Alchemy API to interact with the blockchain.
|
||||||
|
|
||||||
|
ALCHEMY_API_KEY environment variable must be set to use this loader.
|
||||||
|
|
||||||
|
Future versions of this loader can:
|
||||||
|
- Support additional Alchemy APIs (e.g. getTransactions, etc.)
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
contract_address: str,
|
||||||
|
blockchainType: BlockchainType = BlockchainType.ETH_MAINNET,
|
||||||
|
api_key: str = "docs-demo",
|
||||||
|
startToken: int = 0,
|
||||||
|
):
|
||||||
|
self.contract_address = contract_address
|
||||||
|
self.blockchainType = blockchainType.value
|
||||||
|
self.api_key = os.environ.get("ALCHEMY_API_KEY") or api_key
|
||||||
|
self.startToken = startToken
|
||||||
|
|
||||||
|
if not self.api_key:
|
||||||
|
raise ValueError("Alchemy API key not provided.")
|
||||||
|
|
||||||
|
if not re.match(r"^0x[a-fA-F0-9]{40}$", self.contract_address):
|
||||||
|
raise ValueError(f"Invalid contract address {self.contract_address}")
|
||||||
|
|
||||||
|
def load(self) -> List[Document]:
|
||||||
|
url = (
|
||||||
|
f"https://{self.blockchainType}.g.alchemy.com/nft/v2/"
|
||||||
|
f"{self.api_key}/getNFTsForCollection?withMetadata="
|
||||||
|
f"True&contractAddress={self.contract_address}"
|
||||||
|
f"&startToken={self.startToken}"
|
||||||
|
)
|
||||||
|
|
||||||
|
response = requests.get(url)
|
||||||
|
|
||||||
|
if response.status_code != 200:
|
||||||
|
raise ValueError(f"Request failed with status code {response.status_code}")
|
||||||
|
|
||||||
|
items = response.json()["nfts"]
|
||||||
|
|
||||||
|
if not (items):
|
||||||
|
raise ValueError(
|
||||||
|
f"No NFTs found for contract address {self.contract_address}"
|
||||||
|
)
|
||||||
|
|
||||||
|
result = []
|
||||||
|
|
||||||
|
for item in items:
|
||||||
|
content = str(item)
|
||||||
|
tokenId = item["id"]["tokenId"]
|
||||||
|
metadata = {"tokenId": tokenId}
|
||||||
|
result.append(Document(page_content=content, metadata=metadata))
|
||||||
|
return result
|
64
tests/integration_tests/document_loaders/test_blockchain.py
Normal file
64
tests/integration_tests/document_loaders/test_blockchain.py
Normal file
@ -0,0 +1,64 @@
|
|||||||
|
import os
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from langchain.document_loaders import BlockchainDocumentLoader
|
||||||
|
from langchain.document_loaders.blockchain import BlockchainType
|
||||||
|
|
||||||
|
if "ALCHEMY_API_KEY" in os.environ:
|
||||||
|
alchemyKeySet = True
|
||||||
|
apiKey = os.environ["ALCHEMY_API_KEY"]
|
||||||
|
else:
|
||||||
|
alchemyKeySet = False
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skipif(not alchemyKeySet, reason="Alchemy API key not provided.")
|
||||||
|
def test_get_nfts_valid_contract() -> None:
|
||||||
|
contract_address = (
|
||||||
|
"0x1a92f7381b9f03921564a437210bb9396471050c" # CoolCats contract address
|
||||||
|
)
|
||||||
|
result = BlockchainDocumentLoader(contract_address).load()
|
||||||
|
assert len(result) > 0, "No NFTs returned"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skipif(not alchemyKeySet, reason="Alchemy API key not provided.")
|
||||||
|
def test_get_nfts_with_pagination() -> None:
|
||||||
|
contract_address = (
|
||||||
|
"0x1a92f7381b9f03921564a437210bb9396471050c" # CoolCats contract address
|
||||||
|
)
|
||||||
|
startToken = 20
|
||||||
|
|
||||||
|
result = BlockchainDocumentLoader(
|
||||||
|
contract_address,
|
||||||
|
BlockchainType.ETH_MAINNET,
|
||||||
|
api_key=apiKey,
|
||||||
|
startToken=startToken,
|
||||||
|
).load()
|
||||||
|
|
||||||
|
assert len(result) > 0, "No NFTs returned"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skipif(not alchemyKeySet, reason="Alchemy API key not provided.")
|
||||||
|
def test_get_nfts_polygon() -> None:
|
||||||
|
contract_address = (
|
||||||
|
"0x448676ffCd0aDf2D85C1f0565e8dde6924A9A7D9" # Polygon contract address
|
||||||
|
)
|
||||||
|
result = BlockchainDocumentLoader(
|
||||||
|
contract_address, BlockchainType.POLYGON_MAINNET
|
||||||
|
).load()
|
||||||
|
assert len(result) > 0, "No NFTs returned"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skipif(not alchemyKeySet, reason="Alchemy API key not provided.")
|
||||||
|
def test_get_nfts_invalid_contract() -> None:
|
||||||
|
contract_address = (
|
||||||
|
"0x111D4e82EA7eCA7F62c3fdf6D39A541be95Bf111" # Invalid contract address
|
||||||
|
)
|
||||||
|
|
||||||
|
with pytest.raises(ValueError) as error_NoNfts:
|
||||||
|
BlockchainDocumentLoader(contract_address).load()
|
||||||
|
|
||||||
|
assert (
|
||||||
|
str(error_NoNfts.value)
|
||||||
|
== "No NFTs found for contract address " + contract_address
|
||||||
|
)
|
Loading…
Reference in New Issue
Block a user