From f04faf84963f248ab40cd8b0a569b61b6f967b27 Mon Sep 17 00:00:00 2001 From: Harrison Chase Date: Mon, 1 May 2023 20:56:56 -0700 Subject: [PATCH] Harrison/spreedly (#3937) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Esmit PĂ©rez --- .../document_loaders/examples/spreedly.ipynb | 129 ++++++++++++++++++ langchain/document_loaders/__init__.py | 2 + langchain/document_loaders/spreedly.py | 47 +++++++ .../document_loaders/test_spreedly.py | 11 ++ 4 files changed, 189 insertions(+) create mode 100644 docs/modules/indexes/document_loaders/examples/spreedly.ipynb create mode 100644 langchain/document_loaders/spreedly.py create mode 100644 tests/integration_tests/document_loaders/test_spreedly.py diff --git a/docs/modules/indexes/document_loaders/examples/spreedly.ipynb b/docs/modules/indexes/document_loaders/examples/spreedly.ipynb new file mode 100644 index 00000000..8fdf45f6 --- /dev/null +++ b/docs/modules/indexes/document_loaders/examples/spreedly.ipynb @@ -0,0 +1,129 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Spreedly\n", + "\n", + "This notebook covers how to load data from the [Spreedly REST API](https://docs.spreedly.com/reference/api/v1/) into a format that can be ingested into LangChain, along with example usage for vectorization.\n", + "\n", + "Note: this notebook assumes the following packages are installed: `openai`, `chromadb`, and `tiktoken`." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "from langchain.document_loaders import SpreedlyLoader\n", + "from langchain.indexes import VectorstoreIndexCreator" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Spreedly API requires an access token, which can be found inside the Spreedly Admin Console.\n", + "\n", + "This document loader does not currently support pagination, nor access to more complex objects which require additional parameters. It also requires a `resource` option which defines what objects you want to load.\n", + "\n", + "Following resources are available:\n", + "- `gateways_options`: [Documentation](https://docs.spreedly.com/reference/api/v1/#list-supported-gateways)\n", + "- `gateways`: [Documentation](https://docs.spreedly.com/reference/api/v1/#list-created-gateways)\n", + "- `receivers_options`: [Documentation](https://docs.spreedly.com/reference/api/v1/#list-supported-receivers)\n", + "- `receivers`: [Documentation](https://docs.spreedly.com/reference/api/v1/#list-created-receivers)\n", + "- `payment_methods`: [Documentation](https://docs.spreedly.com/reference/api/v1/#list)\n", + "- `certificates`: [Documentation](https://docs.spreedly.com/reference/api/v1/#list-certificates)\n", + "- `transactions`: [Documentation](https://docs.spreedly.com/reference/api/v1/#list49)\n", + "- `environments`: [Documentation](https://docs.spreedly.com/reference/api/v1/#list-environments)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "spreedly_loader = SpreedlyLoader(os.environ[\"SPREEDLY_ACCESS_TOKEN\"], \"gateways_options\")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Using embedded DuckDB without persistence: data will be transient\n" + ] + } + ], + "source": [ + "# Create a vectorstore retriver from the loader\n", + "# see https://python.langchain.com/en/latest/modules/indexes/getting_started.html for more details\n", + "\n", + "index = VectorstoreIndexCreator().from_loaders([spreedly_loader])\n", + "spreedly_doc_retriever = index.vectorstore.as_retriever()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[Document(page_content='installment_grace_period_duration\\nreference_data_code\\ninvoice_number\\ntax_management_indicator\\noriginal_amount\\ninvoice_amount\\nvat_tax_rate\\nmobile_remote_payment_type\\ngratuity_amount\\nmdd_field_1\\nmdd_field_2\\nmdd_field_3\\nmdd_field_4\\nmdd_field_5\\nmdd_field_6\\nmdd_field_7\\nmdd_field_8\\nmdd_field_9\\nmdd_field_10\\nmdd_field_11\\nmdd_field_12\\nmdd_field_13\\nmdd_field_14\\nmdd_field_15\\nmdd_field_16\\nmdd_field_17\\nmdd_field_18\\nmdd_field_19\\nmdd_field_20\\nsupported_countries: US\\nAE\\nBR\\nCA\\nCN\\nDK\\nFI\\nFR\\nDE\\nIN\\nJP\\nMX\\nNO\\nSE\\nGB\\nSG\\nLB\\nPK\\nsupported_cardtypes: visa\\nmaster\\namerican_express\\ndiscover\\ndiners_club\\njcb\\ndankort\\nmaestro\\nelo\\nregions: asia_pacific\\neurope\\nlatin_america\\nnorth_america\\nhomepage: http://www.cybersource.com\\ndisplay_api_url: https://ics2wsa.ic3.com/commerce/1.x/transactionProcessor\\ncompany_name: CyberSource', metadata={'source': 'https://core.spreedly.com/v1/gateways_options.json'}),\n", + " Document(page_content='BG\\nBH\\nBI\\nBJ\\nBM\\nBN\\nBO\\nBR\\nBS\\nBT\\nBW\\nBY\\nBZ\\nCA\\nCC\\nCF\\nCH\\nCK\\nCL\\nCM\\nCN\\nCO\\nCR\\nCV\\nCX\\nCY\\nCZ\\nDE\\nDJ\\nDK\\nDO\\nDZ\\nEC\\nEE\\nEG\\nEH\\nES\\nET\\nFI\\nFJ\\nFK\\nFM\\nFO\\nFR\\nGA\\nGB\\nGD\\nGE\\nGF\\nGG\\nGH\\nGI\\nGL\\nGM\\nGN\\nGP\\nGQ\\nGR\\nGT\\nGU\\nGW\\nGY\\nHK\\nHM\\nHN\\nHR\\nHT\\nHU\\nID\\nIE\\nIL\\nIM\\nIN\\nIO\\nIS\\nIT\\nJE\\nJM\\nJO\\nJP\\nKE\\nKG\\nKH\\nKI\\nKM\\nKN\\nKR\\nKW\\nKY\\nKZ\\nLA\\nLC\\nLI\\nLK\\nLS\\nLT\\nLU\\nLV\\nMA\\nMC\\nMD\\nME\\nMG\\nMH\\nMK\\nML\\nMN\\nMO\\nMP\\nMQ\\nMR\\nMS\\nMT\\nMU\\nMV\\nMW\\nMX\\nMY\\nMZ\\nNA\\nNC\\nNE\\nNF\\nNG\\nNI\\nNL\\nNO\\nNP\\nNR\\nNU\\nNZ\\nOM\\nPA\\nPE\\nPF\\nPH\\nPK\\nPL\\nPN\\nPR\\nPT\\nPW\\nPY\\nQA\\nRE\\nRO\\nRS\\nRU\\nRW\\nSA\\nSB\\nSC\\nSE\\nSG\\nSI\\nSK\\nSL\\nSM\\nSN\\nST\\nSV\\nSZ\\nTC\\nTD\\nTF\\nTG\\nTH\\nTJ\\nTK\\nTM\\nTO\\nTR\\nTT\\nTV\\nTW\\nTZ\\nUA\\nUG\\nUS\\nUY\\nUZ\\nVA\\nVC\\nVE\\nVI\\nVN\\nVU\\nWF\\nWS\\nYE\\nYT\\nZA\\nZM\\nsupported_cardtypes: visa\\nmaster\\namerican_express\\ndiscover\\njcb\\nmaestro\\nelo\\nnaranja\\ncabal\\nunionpay\\nregions: asia_pacific\\neurope\\nmiddle_east\\nnorth_america\\nhomepage: http://worldpay.com\\ndisplay_api_url: https://secure.worldpay.com/jsp/merchant/xml/paymentService.jsp\\ncompany_name: WorldPay', metadata={'source': 'https://core.spreedly.com/v1/gateways_options.json'}),\n", + " Document(page_content='gateway_specific_fields: receipt_email\\nradar_session_id\\nskip_radar_rules\\napplication_fee\\nstripe_account\\nmetadata\\nidempotency_key\\nreason\\nrefund_application_fee\\nrefund_fee_amount\\nreverse_transfer\\naccount_id\\ncustomer_id\\nvalidate\\nmake_default\\ncancellation_reason\\ncapture_method\\nconfirm\\nconfirmation_method\\ncustomer\\ndescription\\nmoto\\noff_session\\non_behalf_of\\npayment_method_types\\nreturn_email\\nreturn_url\\nsave_payment_method\\nsetup_future_usage\\nstatement_descriptor\\nstatement_descriptor_suffix\\ntransfer_amount\\ntransfer_destination\\ntransfer_group\\napplication_fee_amount\\nrequest_three_d_secure\\nerror_on_requires_action\\nnetwork_transaction_id\\nclaim_without_transaction_id\\nfulfillment_date\\nevent_type\\nmodal_challenge\\nidempotent_request\\nmerchant_reference\\ncustomer_reference\\nshipping_address_zip\\nshipping_from_zip\\nshipping_amount\\nline_items\\nsupported_countries: AE\\nAT\\nAU\\nBE\\nBG\\nBR\\nCA\\nCH\\nCY\\nCZ\\nDE\\nDK\\nEE\\nES\\nFI\\nFR\\nGB\\nGR\\nHK\\nHU\\nIE\\nIN\\nIT\\nJP\\nLT\\nLU\\nLV\\nMT\\nMX\\nMY\\nNL\\nNO\\nNZ\\nPL\\nPT\\nRO\\nSE\\nSG\\nSI\\nSK\\nUS\\nsupported_cardtypes: visa', metadata={'source': 'https://core.spreedly.com/v1/gateways_options.json'}),\n", + " Document(page_content='mdd_field_57\\nmdd_field_58\\nmdd_field_59\\nmdd_field_60\\nmdd_field_61\\nmdd_field_62\\nmdd_field_63\\nmdd_field_64\\nmdd_field_65\\nmdd_field_66\\nmdd_field_67\\nmdd_field_68\\nmdd_field_69\\nmdd_field_70\\nmdd_field_71\\nmdd_field_72\\nmdd_field_73\\nmdd_field_74\\nmdd_field_75\\nmdd_field_76\\nmdd_field_77\\nmdd_field_78\\nmdd_field_79\\nmdd_field_80\\nmdd_field_81\\nmdd_field_82\\nmdd_field_83\\nmdd_field_84\\nmdd_field_85\\nmdd_field_86\\nmdd_field_87\\nmdd_field_88\\nmdd_field_89\\nmdd_field_90\\nmdd_field_91\\nmdd_field_92\\nmdd_field_93\\nmdd_field_94\\nmdd_field_95\\nmdd_field_96\\nmdd_field_97\\nmdd_field_98\\nmdd_field_99\\nmdd_field_100\\nsupported_countries: US\\nAE\\nBR\\nCA\\nCN\\nDK\\nFI\\nFR\\nDE\\nIN\\nJP\\nMX\\nNO\\nSE\\nGB\\nSG\\nLB\\nPK\\nsupported_cardtypes: visa\\nmaster\\namerican_express\\ndiscover\\ndiners_club\\njcb\\nmaestro\\nelo\\nunion_pay\\ncartes_bancaires\\nmada\\nregions: asia_pacific\\neurope\\nlatin_america\\nnorth_america\\nhomepage: http://www.cybersource.com\\ndisplay_api_url: https://api.cybersource.com\\ncompany_name: CyberSource REST', metadata={'source': 'https://core.spreedly.com/v1/gateways_options.json'})]" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Test the retriever\n", + "spreedly_doc_retriever.get_relevant_documents(\"CRC\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 2", + "language": "python", + "name": "python2" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/langchain/document_loaders/__init__.py b/langchain/document_loaders/__init__.py index bf4023fc..c8fbdc0a 100644 --- a/langchain/document_loaders/__init__.py +++ b/langchain/document_loaders/__init__.py @@ -71,6 +71,7 @@ from langchain.document_loaders.s3_directory import S3DirectoryLoader from langchain.document_loaders.s3_file import S3FileLoader from langchain.document_loaders.sitemap import SitemapLoader from langchain.document_loaders.slack_directory import SlackDirectoryLoader +from langchain.document_loaders.spreedly import SpreedlyLoader from langchain.document_loaders.srt import SRTLoader from langchain.document_loaders.stripe import StripeLoader from langchain.document_loaders.telegram import TelegramChatLoader @@ -106,6 +107,7 @@ __all__ = [ "ApifyDatasetLoader", "ArxivLoader", "StripeLoader", + "SpreedlyLoader", "AzureBlobStorageContainerLoader", "AzureBlobStorageFileLoader", "BSHTMLLoader", diff --git a/langchain/document_loaders/spreedly.py b/langchain/document_loaders/spreedly.py new file mode 100644 index 00000000..70add6da --- /dev/null +++ b/langchain/document_loaders/spreedly.py @@ -0,0 +1,47 @@ +"""Loader that fetches data from Spreedly API.""" +import json +import urllib.request +from typing import List + +from langchain.docstore.document import Document +from langchain.document_loaders.base import BaseLoader +from langchain.utils import stringify_dict + +SPREEDLY_ENDPOINTS = { + "gateways_options": "https://core.spreedly.com/v1/gateways_options.json", + "gateways": "https://core.spreedly.com/v1/gateways.json", + "receivers_options": "https://core.spreedly.com/v1/receivers_options.json", + "receivers": "https://core.spreedly.com/v1/receivers.json", + "payment_methods": "https://core.spreedly.com/v1/payment_methods.json", + "certificates": "https://core.spreedly.com/v1/certificates.json", + "transactions": "https://core.spreedly.com/v1/transactions.json", + "environments": "https://core.spreedly.com/v1/environments.json", +} + + +class SpreedlyLoader(BaseLoader): + def __init__(self, access_token: str, resource: str) -> None: + self.access_token = access_token + self.resource = resource + self.headers = { + "Authorization": f"Bearer {self.access_token}", + "Accept": "application/json", + } + + def _make_request(self, url: str) -> List[Document]: + request = urllib.request.Request(url, headers=self.headers) + + with urllib.request.urlopen(request) as response: + json_data = json.loads(response.read().decode()) + text = stringify_dict(json_data) + metadata = {"source": url} + return [Document(page_content=text, metadata=metadata)] + + def _get_resource(self) -> List[Document]: + endpoint = SPREEDLY_ENDPOINTS.get(self.resource) + if endpoint is None: + return [] + return self._make_request(endpoint) + + def load(self) -> List[Document]: + return self._get_resource() diff --git a/tests/integration_tests/document_loaders/test_spreedly.py b/tests/integration_tests/document_loaders/test_spreedly.py new file mode 100644 index 00000000..bb498025 --- /dev/null +++ b/tests/integration_tests/document_loaders/test_spreedly.py @@ -0,0 +1,11 @@ +from langchain.document_loaders.spreedly import SpreedlyLoader + + +def test_spreedly_loader() -> None: + """Test Spreedly Loader.""" + access_token = "" + resource = "gateways_options" + spreedly_loader = SpreedlyLoader(access_token, resource) + documents = spreedly_loader.load() + + assert len(documents) == 1