From 0fc0aa62f27542dc02505cfa61d3b2e803e697b1 Mon Sep 17 00:00:00 2001 From: Harrison Chase Date: Tue, 25 Apr 2023 08:07:06 -0700 Subject: [PATCH] Harrison/blockchain docloader (#3491) Co-authored-by: Jon Saginaw --- .../examples/blockchain.ipynb | 444 ++++++++++++++++++ langchain/document_loaders/__init__.py | 2 + langchain/document_loaders/blockchain.py | 80 ++++ .../document_loaders/test_blockchain.py | 64 +++ 4 files changed, 590 insertions(+) create mode 100644 docs/modules/indexes/document_loaders/examples/blockchain.ipynb create mode 100644 langchain/document_loaders/blockchain.py create mode 100644 tests/integration_tests/document_loaders/test_blockchain.py diff --git a/docs/modules/indexes/document_loaders/examples/blockchain.ipynb b/docs/modules/indexes/document_loaders/examples/blockchain.ipynb new file mode 100644 index 00000000..0a55eb2e --- /dev/null +++ b/docs/modules/indexes/document_loaders/examples/blockchain.ipynb @@ -0,0 +1,444 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "vm8vn9t8DvC_" + }, + "source": [ + "# Blockchain Document Loader" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "5WjXERXzFEhg" + }, + "source": [ + "## Overview" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "juAmbgoWD17u" + }, + "source": [ + "The intention of this notebook is to provide a means of testing functionality in the Langchain Document Loader for Blockchain.\n", + "\n", + "Initially this Loader supports:\n", + "\n", + "\n", + "* Ethereum Maninnet, Ethereum Testnet, Polgyon Mainnet, Polygon Testnet (default is eth-mainnet)\n", + "* Alchemy's getNFTsForCollection API\n", + "\n", + "It can be extended if the community finds value in this loader. Specifically:\n", + "\n", + "* Additional APIs can be added (e.g. Tranction-related APIs)\n", + "\n", + "To run this notebook, the user will need:\n", + "\n", + "\n", + "* An OpenAI key (for OpenAI models)\n", + "* A free [Alchemy API Key](https://https://www.alchemy.com/)\n", + "\n", + "\n", + "\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "%pip install langchain -q" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.document_loaders import BlockchainDocumentLoader\n", + "from langchain.document_loaders.blockchain import BlockchainType\n", + "import os" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": {}, + "outputs": [], + "source": [ + "alchemyApiKey = \"get your own key from https://www.alchemy.com/\" \n", + "os.environ[\"ALCHEMY_API_KEY\"] = alchemyApiKey" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "nzuPWRaBNCMx" + }, + "source": [ + "## Create a Blockchain Document Loader" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Option 1: Ethereum Mainnet (default BlockchainType)" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "id": "J3LWHARC-Kn0" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[Document(page_content=\"{'contract': {'address': '0xbc4ca0eda7647a8ab7c2061c2e118a18a936f13d'}, 'id': {'tokenId': '0x0000000000000000000000000000000000000000000000000000000000000000', 'tokenMetadata': {'tokenType': 'ERC721'}}, 'title': '', 'description': '', 'tokenUri': {'gateway': 'https://alchemy.mypinata.cloud/ipfs/QmeSjSinHpPnmXmspMjwiXyN6zS4E9zccariGR3jxcaWtq/0', 'raw': 'ipfs://QmeSjSinHpPnmXmspMjwiXyN6zS4E9zccariGR3jxcaWtq/0'}, 'media': [{'gateway': 'https://nft-cdn.alchemy.com/eth-mainnet/415d618f5fef7bfe683e02d4653c4289', 'thumbnail': 'https://res.cloudinary.com/alchemyapi/image/upload/thumbnailv2/eth-mainnet/415d618f5fef7bfe683e02d4653c4289', 'raw': 'ipfs://QmRRPWG96cmgTn2qSzjwr2qvfNEuhunv6FNeMFGa9bx6mQ', 'format': 'png', 'bytes': 133270}], 'metadata': {'image': 'ipfs://QmRRPWG96cmgTn2qSzjwr2qvfNEuhunv6FNeMFGa9bx6mQ', 'attributes': [{'value': 'Silver Hoop', 'trait_type': 'Earring'}, {'value': 'Orange', 'trait_type': 'Background'}, {'value': 'Robot', 'trait_type': 'Fur'}, {'value': 'Striped Tee', 'trait_type': 'Clothes'}, {'value': 'Discomfort', 'trait_type': 'Mouth'}, {'value': 'X Eyes', 'trait_type': 'Eyes'}]}, 'timeLastUpdated': '2023-04-18T04:05:27.817Z', 'contractMetadata': {'name': 'BoredApeYachtClub', 'symbol': 'BAYC', 'totalSupply': '10000', 'tokenType': 'ERC721', 'contractDeployer': '0xaba7161a7fb69c88e16ed9f455ce62b791ee4d03', 'deployedBlockNumber': 12287507, 'openSea': {'floorPrice': 68.16, 'collectionName': 'Bored Ape Yacht Club', 'safelistRequestStatus': 'verified', 'imageUrl': 'https://i.seadn.io/gae/Ju9CkWtV-1Okvf45wo8UctR-M9He2PjILP0oOvxE89AyiPPGtrR3gysu1Zgy0hjd2xKIgjJJtWIc0ybj4Vd7wv8t3pxDGHoJBzDB?w=500&auto=format', 'description': 'The Bored Ape Yacht Club is a collection of 10,000 unique Bored Ape NFTs— unique digital collectibles living on the Ethereum blockchain. Your Bored Ape doubles as your Yacht Club membership card, and grants access to members-only benefits, the first of which is access to THE BATHROOM, a collaborative graffiti board. Future areas and perks can be unlocked by the community through roadmap activation. Visit www.BoredApeYachtClub.com for more details.', 'externalUrl': 'http://www.boredapeyachtclub.com/', 'twitterUsername': 'BoredApeYC', 'discordUrl': 'https://discord.gg/3P5K3dzgdB', 'lastIngestedAt': '2023-03-21T03:54:33.000Z'}}}\", metadata={'tokenId': '0x0000000000000000000000000000000000000000000000000000000000000000'}),\n", + " Document(page_content=\"{'contract': {'address': '0xbc4ca0eda7647a8ab7c2061c2e118a18a936f13d'}, 'id': {'tokenId': '0x0000000000000000000000000000000000000000000000000000000000000001', 'tokenMetadata': {'tokenType': 'ERC721'}}, 'title': '', 'description': '', 'tokenUri': {'gateway': 'https://alchemy.mypinata.cloud/ipfs/QmeSjSinHpPnmXmspMjwiXyN6zS4E9zccariGR3jxcaWtq/1', 'raw': 'ipfs://QmeSjSinHpPnmXmspMjwiXyN6zS4E9zccariGR3jxcaWtq/1'}, 'media': [{'gateway': 'https://nft-cdn.alchemy.com/eth-mainnet/65558a4d0c5b0c56fbc50bf03f55e3fa', 'thumbnail': 'https://res.cloudinary.com/alchemyapi/image/upload/thumbnailv2/eth-mainnet/65558a4d0c5b0c56fbc50bf03f55e3fa', 'raw': 'ipfs://QmPbxeGcXhYQQNgsC6a36dDyYUcHgMLnGKnF8pVFmGsvqi', 'format': 'png', 'bytes': 171425}], 'metadata': {'image': 'ipfs://QmPbxeGcXhYQQNgsC6a36dDyYUcHgMLnGKnF8pVFmGsvqi', 'attributes': [{'value': 'Grin', 'trait_type': 'Mouth'}, {'value': 'Vietnam Jacket', 'trait_type': 'Clothes'}, {'value': 'Orange', 'trait_type': 'Background'}, {'value': 'Blue Beams', 'trait_type': 'Eyes'}, {'value': 'Robot', 'trait_type': 'Fur'}]}, 'timeLastUpdated': '2023-04-24T04:37:37.738Z', 'contractMetadata': {'name': 'BoredApeYachtClub', 'symbol': 'BAYC', 'totalSupply': '10000', 'tokenType': 'ERC721', 'contractDeployer': '0xaba7161a7fb69c88e16ed9f455ce62b791ee4d03', 'deployedBlockNumber': 12287507, 'openSea': {'floorPrice': 68.16, 'collectionName': 'Bored Ape Yacht Club', 'safelistRequestStatus': 'verified', 'imageUrl': 'https://i.seadn.io/gae/Ju9CkWtV-1Okvf45wo8UctR-M9He2PjILP0oOvxE89AyiPPGtrR3gysu1Zgy0hjd2xKIgjJJtWIc0ybj4Vd7wv8t3pxDGHoJBzDB?w=500&auto=format', 'description': 'The Bored Ape Yacht Club is a collection of 10,000 unique Bored Ape NFTs— unique digital collectibles living on the Ethereum blockchain. Your Bored Ape doubles as your Yacht Club membership card, and grants access to members-only benefits, the first of which is access to THE BATHROOM, a collaborative graffiti board. Future areas and perks can be unlocked by the community through roadmap activation. Visit www.BoredApeYachtClub.com for more details.', 'externalUrl': 'http://www.boredapeyachtclub.com/', 'twitterUsername': 'BoredApeYC', 'discordUrl': 'https://discord.gg/3P5K3dzgdB', 'lastIngestedAt': '2023-03-21T03:54:33.000Z'}}}\", metadata={'tokenId': '0x0000000000000000000000000000000000000000000000000000000000000001'})]" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "contractAddress = \"0xbc4ca0eda7647a8ab7c2061c2e118a18a936f13d\" # Bored Ape Yacht Club contract address\n", + "\n", + "blockchainType = BlockchainType.ETH_MAINNET #default value, optional parameter\n", + "\n", + "blockchainLoader = BlockchainDocumentLoader(contractAddress)\n", + "\n", + "nfts = blockchainLoader.load()\n", + "\n", + "nfts[:2]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Option 2: Polygon Mainnet" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[Document(page_content=\"{'contract': {'address': '0x448676ffcd0adf2d85c1f0565e8dde6924a9a7d9'}, 'id': {'tokenId': '0x01', 'tokenMetadata': {'tokenType': 'ERC1155'}}, 'title': 'Wyatt Horton #0001', 'description': 'A sleepy capybara', 'tokenUri': {'gateway': 'https://storage.googleapis.com/minted-nfts/smoothstack/avatars/metadata/1.json', 'raw': 'https://storage.googleapis.com/minted-nfts/smoothstack/avatars/metadata/1.json'}, 'media': [{'gateway': 'https://nft-cdn.alchemy.com/matic-mainnet/9085e06ff9f6c9074de91801d1c72d26', 'thumbnail': 'https://res.cloudinary.com/alchemyapi/image/upload/thumbnailv2/matic-mainnet/9085e06ff9f6c9074de91801d1c72d26', 'raw': 'https://storage.googleapis.com/minted-nfts/smoothstack/avatars/images/1.png', 'format': 'png', 'bytes': 769622}], 'metadata': {'name': 'Wyatt Horton #0001', 'description': 'A sleepy capybara', 'image': 'https://storage.googleapis.com/minted-nfts/smoothstack/avatars/images/1.png', 'attributes': [{'value': 'Avatar', 'trait_type': 'Type'}, {'value': 'Animal', 'trait_type': 'Category'}, {'value': 'Capybara', 'trait_type': 'Class'}, {'value': 'Fall 2022', 'trait_type': 'Collection'}, {'value': 'Furry', 'trait_type': 'Feature'}]}, 'timeLastUpdated': '2023-04-20T14:38:24.947Z', 'contractMetadata': {'name': 'Smoothstack - Avatars', 'symbol': 'SMTH', 'tokenType': 'ERC1155', 'contractDeployer': '0x23075b2523c6563b06920a302a8be4f90ef6e974', 'deployedBlockNumber': 34752389, 'openSea': {'lastIngestedAt': '2023-04-17T20:59:42.000Z'}}}\", metadata={'tokenId': '0x01'}),\n", + " Document(page_content=\"{'contract': {'address': '0x448676ffcd0adf2d85c1f0565e8dde6924a9a7d9'}, 'id': {'tokenId': '0x02', 'tokenMetadata': {'tokenType': 'ERC1155'}}, 'title': 'Dylan Leisler #0002', 'description': 'A chipper cat with a big, red bowtie', 'tokenUri': {'gateway': 'https://storage.googleapis.com/minted-nfts/smoothstack/avatars/metadata/2.json', 'raw': 'https://storage.googleapis.com/minted-nfts/smoothstack/avatars/metadata/2.json'}, 'media': [{'gateway': 'https://nft-cdn.alchemy.com/matic-mainnet/67c3c7ccef44b32bf2ce758e8e73dbcd', 'thumbnail': 'https://res.cloudinary.com/alchemyapi/image/upload/thumbnailv2/matic-mainnet/67c3c7ccef44b32bf2ce758e8e73dbcd', 'raw': 'https://storage.googleapis.com/minted-nfts/smoothstack/avatars/images/2.png', 'format': 'png', 'bytes': 1187749}], 'metadata': {'name': 'Dylan Leisler #0002', 'description': 'A chipper cat with a big, red bowtie', 'image': 'https://storage.googleapis.com/minted-nfts/smoothstack/avatars/images/2.png', 'attributes': [{'value': 'Avatar', 'trait_type': 'Type'}, {'value': 'Animal', 'trait_type': 'Category'}, {'value': 'Cat', 'trait_type': 'Class'}, {'value': 'Fall 2022', 'trait_type': 'Collection'}, {'value': 'Red Bowtie', 'trait_type': 'Feature'}]}, 'timeLastUpdated': '2023-04-23T13:38:29.316Z', 'contractMetadata': {'name': 'Smoothstack - Avatars', 'symbol': 'SMTH', 'tokenType': 'ERC1155', 'contractDeployer': '0x23075b2523c6563b06920a302a8be4f90ef6e974', 'deployedBlockNumber': 34752389, 'openSea': {'lastIngestedAt': '2023-04-17T20:59:42.000Z'}}}\", metadata={'tokenId': '0x02'})]" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "contractAddress = \"0x448676ffCd0aDf2D85C1f0565e8dde6924A9A7D9\" # Polygon Mainnet contract address\n", + "\n", + "blockchainType = BlockchainType.POLYGON_MAINNET \n", + "\n", + "blockchainLoader = BlockchainDocumentLoader(contractAddress, blockchainType)\n", + "\n", + "nfts = blockchainLoader.load()\n", + "\n", + "nfts[:2]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## (Optional) Using the Blockchain Document Loader" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "_PGkFfMCB8J3" + }, + "source": [ + "### Setup Splitter and Index" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "%pip install sentence_transformers chromadb openai tiktoken -q" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": { + "id": "BwxxopOCCABh" + }, + "outputs": [], + "source": [ + "from langchain.indexes import VectorstoreIndexCreator\n", + "from langchain.embeddings import HuggingFaceEmbeddings\n", + "from langchain.text_splitter import RecursiveCharacterTextSplitter" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "JE_myAulCDSZ", + "outputId": "99e16b6a-03b4-4e67-d4b4-9dd611a866ef" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "NUMBER OF DOCUMENTS: 424\n" + ] + } + ], + "source": [ + "text_splitter = RecursiveCharacterTextSplitter(chunk_size=400, chunk_overlap=0)\n", + "\n", + "docs = text_splitter.split_documents(nfts)\n", + "print(\"NUMBER OF DOCUMENTS: \", len(docs))" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": { + "id": "d83yFuAuCKQS" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Using embedded DuckDB without persistence: data will be transient\n" + ] + } + ], + "source": [ + "index = VectorstoreIndexCreator(\n", + " embedding=HuggingFaceEmbeddings(),\n", + " text_splitter=text_splitter).from_loaders([blockchainLoader])" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "y0VfObeXDEXB" + }, + "source": [ + "## Setup Models and Chains" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [], + "source": [ + "openAiKey = \"put OpenAI key here\"\n", + "os.environ[\"OPENAI_API_KEY\"] = openAiKey" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": { + "id": "hiNjDzP9C4pA" + }, + "outputs": [], + "source": [ + "from langchain.chains import RetrievalQA\n", + "from langchain.llms import OpenAI" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "u-xDlKPaC_xg" + }, + "source": [ + "### Retrieval Chain" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": { + "id": "BqP00JovC9R4" + }, + "outputs": [], + "source": [ + "llmOpenAI = OpenAI()\n", + "\n", + "chainQA = RetrievalQA.from_chain_type(llm=llmOpenAI, \n", + " chain_type=\"map_reduce\",\n", + " retriever=index.vectorstore.as_retriever(), \n", + " verbose=True,\n", + " input_key=\"question\")" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 122 + }, + "id": "2Y3cVVKZDVNq", + "outputId": "dfeea416-5193-47cf-e9dc-c17a5c1cd780" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\u001b[1m> Entering new RetrievalQA chain...\u001b[0m\n", + "\n", + "\u001b[1m> Finished chain.\u001b[0m\n" + ] + }, + { + "data": { + "text/plain": [ + "' Popular attributes include \"Avatar\" (Type), \"Character\" (Category), and \"Human\" or \"Wizard\" (Class).'" + ] + }, + "execution_count": 44, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "chainQA.run(\"What are some of the popular attributes?\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 122 + }, + "id": "7o6ArPo9DXbz", + "outputId": "b1f8ad43-27c7-4cdb-95a7-8c8bd6381c5a" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\u001b[1m> Entering new RetrievalQA chain...\u001b[0m\n", + "\n", + "\u001b[1m> Finished chain.\u001b[0m\n" + ] + }, + { + "data": { + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + }, + "text/plain": [ + "' There are 10,000 unique Bored Ape NFTs.'" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "chainQA.run(\"How many NFTs are there?\")" + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [ + "5WjXERXzFEhg" + ], + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.16" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/langchain/document_loaders/__init__.py b/langchain/document_loaders/__init__.py index d2b9205c..d73ae89a 100644 --- a/langchain/document_loaders/__init__.py +++ b/langchain/document_loaders/__init__.py @@ -12,6 +12,7 @@ from langchain.document_loaders.azure_blob_storage_file import ( from langchain.document_loaders.bigquery import BigQueryLoader from langchain.document_loaders.bilibili import BiliBiliLoader from langchain.document_loaders.blackboard import BlackboardLoader +from langchain.document_loaders.blockchain import BlockchainDocumentLoader from langchain.document_loaders.chatgpt import ChatGPTLoader from langchain.document_loaders.college_confidential import CollegeConfidentialLoader from langchain.document_loaders.confluence import ConfluenceLoader @@ -162,4 +163,5 @@ __all__ = [ "PythonLoader", "ChatGPTLoader", "HuggingFaceDatasetLoader", + "BlockchainDocumentLoader", ] diff --git a/langchain/document_loaders/blockchain.py b/langchain/document_loaders/blockchain.py new file mode 100644 index 00000000..b61c0c17 --- /dev/null +++ b/langchain/document_loaders/blockchain.py @@ -0,0 +1,80 @@ +import os +import re +from enum import Enum +from typing import List + +import requests + +from langchain.docstore.document import Document +from langchain.document_loaders.base import BaseLoader + + +class BlockchainType(Enum): + ETH_MAINNET = "eth-mainnet" + ETH_GOERLI = "eth-goerli" + POLYGON_MAINNET = "polygon-mainnet" + POLYGON_MUMBAI = "polygon-mumbai" + + +class BlockchainDocumentLoader(BaseLoader): + """Loads elements from a blockchain smart contract into Langchain documents. + + The supported blockchains are: Ethereum mainnet, Ethereum Goerli testnet, + Polygon mainnet, and Polygon Mumbai testnet. + + If no BlockchainType is specified, the default is Ethereum mainnet. + + The Loader uses the Alchemy API to interact with the blockchain. + + ALCHEMY_API_KEY environment variable must be set to use this loader. + + Future versions of this loader can: + - Support additional Alchemy APIs (e.g. getTransactions, etc.) + """ + + def __init__( + self, + contract_address: str, + blockchainType: BlockchainType = BlockchainType.ETH_MAINNET, + api_key: str = "docs-demo", + startToken: int = 0, + ): + self.contract_address = contract_address + self.blockchainType = blockchainType.value + self.api_key = os.environ.get("ALCHEMY_API_KEY") or api_key + self.startToken = startToken + + if not self.api_key: + raise ValueError("Alchemy API key not provided.") + + if not re.match(r"^0x[a-fA-F0-9]{40}$", self.contract_address): + raise ValueError(f"Invalid contract address {self.contract_address}") + + def load(self) -> List[Document]: + url = ( + f"https://{self.blockchainType}.g.alchemy.com/nft/v2/" + f"{self.api_key}/getNFTsForCollection?withMetadata=" + f"True&contractAddress={self.contract_address}" + f"&startToken={self.startToken}" + ) + + response = requests.get(url) + + if response.status_code != 200: + raise ValueError(f"Request failed with status code {response.status_code}") + + items = response.json()["nfts"] + + if not (items): + raise ValueError( + f"No NFTs found for contract address {self.contract_address}" + ) + + result = [] + + for item in items: + content = str(item) + tokenId = item["id"]["tokenId"] + metadata = {"tokenId": tokenId} + result.append(Document(page_content=content, metadata=metadata)) + return result diff --git a/tests/integration_tests/document_loaders/test_blockchain.py b/tests/integration_tests/document_loaders/test_blockchain.py new file mode 100644 index 00000000..ca40cbb0 --- /dev/null +++ b/tests/integration_tests/document_loaders/test_blockchain.py @@ -0,0 +1,64 @@ +import os + +import pytest + +from langchain.document_loaders import BlockchainDocumentLoader +from langchain.document_loaders.blockchain import BlockchainType + +if "ALCHEMY_API_KEY" in os.environ: + alchemyKeySet = True + apiKey = os.environ["ALCHEMY_API_KEY"] +else: + alchemyKeySet = False + + +@pytest.mark.skipif(not alchemyKeySet, reason="Alchemy API key not provided.") +def test_get_nfts_valid_contract() -> None: + contract_address = ( + "0x1a92f7381b9f03921564a437210bb9396471050c" # CoolCats contract address + ) + result = BlockchainDocumentLoader(contract_address).load() + assert len(result) > 0, "No NFTs returned" + + +@pytest.mark.skipif(not alchemyKeySet, reason="Alchemy API key not provided.") +def test_get_nfts_with_pagination() -> None: + contract_address = ( + "0x1a92f7381b9f03921564a437210bb9396471050c" # CoolCats contract address + ) + startToken = 20 + + result = BlockchainDocumentLoader( + contract_address, + BlockchainType.ETH_MAINNET, + api_key=apiKey, + startToken=startToken, + ).load() + + assert len(result) > 0, "No NFTs returned" + + +@pytest.mark.skipif(not alchemyKeySet, reason="Alchemy API key not provided.") +def test_get_nfts_polygon() -> None: + contract_address = ( + "0x448676ffCd0aDf2D85C1f0565e8dde6924A9A7D9" # Polygon contract address + ) + result = BlockchainDocumentLoader( + contract_address, BlockchainType.POLYGON_MAINNET + ).load() + assert len(result) > 0, "No NFTs returned" + + +@pytest.mark.skipif(not alchemyKeySet, reason="Alchemy API key not provided.") +def test_get_nfts_invalid_contract() -> None: + contract_address = ( + "0x111D4e82EA7eCA7F62c3fdf6D39A541be95Bf111" # Invalid contract address + ) + + with pytest.raises(ValueError) as error_NoNfts: + BlockchainDocumentLoader(contract_address).load() + + assert ( + str(error_NoNfts.value) + == "No NFTs found for contract address " + contract_address + )