From 40f6e60e68442df5af2d679dcb946d58301dc466 Mon Sep 17 00:00:00 2001 From: Harrison Chase Date: Fri, 28 Apr 2023 20:03:21 -0700 Subject: [PATCH] Harrison/stripe (#3762) Co-authored-by: Ismail Pelaseyed --- .../document_loaders/examples/stripe.ipynb | 87 +++++++++++++++++++ langchain/document_loaders/__init__.py | 2 + langchain/document_loaders/stripe.py | 59 +++++++++++++ .../document_loaders/test_stripe.py | 12 +++ 4 files changed, 160 insertions(+) create mode 100644 docs/modules/indexes/document_loaders/examples/stripe.ipynb create mode 100644 langchain/document_loaders/stripe.py create mode 100644 tests/integration_tests/document_loaders/test_stripe.py diff --git a/docs/modules/indexes/document_loaders/examples/stripe.ipynb b/docs/modules/indexes/document_loaders/examples/stripe.ipynb new file mode 100644 index 00000000..2889b2f2 --- /dev/null +++ b/docs/modules/indexes/document_loaders/examples/stripe.ipynb @@ -0,0 +1,87 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Stripe\n", + "\n", + "This notebook covers how to load data from the Stripe REST API into a format that can be ingested into LangChain, along with example usage for vectorization." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "\n", + "from langchain.document_loaders import StripeLoader\n", + "from langchain.indexes import VectorstoreIndexCreator" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The Stripe API requires an access token, which can be found inside of the Stripe dashboard.\n", + "\n", + "This document loader also requires a `resource` option which defines what data you want to load.\n", + "\n", + "Following resources are available:\n", + "\n", + "`balance_transations` (Documentation)[https://stripe.com/docs/api/balance_transactions/list]\n", + "`charges` (Documentation)[https://stripe.com/docs/api/charges/list]\n", + "`customers` (Documentation)[https://stripe.com/docs/api/customers/list]\n", + "`events` (Documentation)[https://stripe.com/docs/api/events/list]\n", + "`refunds` (Documentation)[https://stripe.com/docs/api/refunds/list]\n", + "`disputes` (Documentation)[https://stripe.com/docs/api/disputes/list]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "stripe_loader = StripeLoader(os.environ[\"STRIPE_ACCESS_TOKEN\"], \"charges\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Create a vectorstore retriver from the loader\n", + "# see https://python.langchain.com/en/latest/modules/indexes/getting_started.html for more details\n", + "\n", + "index = VectorstoreIndexCreator().from_loaders([stripe_loader])\n", + "stripe_doc_retriever = index.vectorstore.as_retriever()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.1" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/langchain/document_loaders/__init__.py b/langchain/document_loaders/__init__.py index 47c3fc02..3bf89a2c 100644 --- a/langchain/document_loaders/__init__.py +++ b/langchain/document_loaders/__init__.py @@ -69,6 +69,7 @@ from langchain.document_loaders.s3_file import S3FileLoader from langchain.document_loaders.sitemap import SitemapLoader from langchain.document_loaders.slack_directory import SlackDirectoryLoader from langchain.document_loaders.srt import SRTLoader +from langchain.document_loaders.stripe import StripeLoader from langchain.document_loaders.telegram import TelegramChatLoader from langchain.document_loaders.text import TextLoader from langchain.document_loaders.twitter import TwitterTweetLoader @@ -96,6 +97,7 @@ __all__ = [ "AirbyteJSONLoader", "ApifyDatasetLoader", "ArxivLoader", + "StripeLoader", "AzureBlobStorageContainerLoader", "AzureBlobStorageFileLoader", "BSHTMLLoader", diff --git a/langchain/document_loaders/stripe.py b/langchain/document_loaders/stripe.py new file mode 100644 index 00000000..5e842e05 --- /dev/null +++ b/langchain/document_loaders/stripe.py @@ -0,0 +1,59 @@ +"""Loader that fetches data from Stripe""" +import json +import urllib.request +from typing import Any, List + +from langchain.docstore.document import Document +from langchain.document_loaders.base import BaseLoader + +STRIPE_ENDPOINTS = { + "balance_transactions": "https://api.stripe.com/v1/balance_transactions", + "charges": "https://api.stripe.com/v1/charges", + "customers": "https://api.stripe.com/v1/customers", + "events": "https://api.stripe.com/v1/events", + "refunds": "https://api.stripe.com/v1/refunds", + "disputes": "https://api.stripe.com/v1/disputes", +} + + +def _stringify_value(val: Any) -> str: + if isinstance(val, str): + return val + elif isinstance(val, dict): + return "\n" + _stringify_dict(val) + elif isinstance(val, list): + return "\n".join(_stringify_value(v) for v in val) + else: + return str(val) + + +def _stringify_dict(data: dict) -> str: + text = "" + for key, value in data.items(): + text += key + ": " + _stringify_value(value) + "\n" + return text + + +class StripeLoader(BaseLoader): + def __init__(self, access_token: str, resource: str) -> None: + self.access_token = access_token + self.resource = resource + self.headers = {"Authorization": f"Bearer {self.access_token}"} + + def _make_request(self, url: str) -> List[Document]: + request = urllib.request.Request(url, headers=self.headers) + + with urllib.request.urlopen(request) as response: + json_data = json.loads(response.read().decode()) + text = _stringify_dict(json_data) + metadata = {"source": url} + return [Document(page_content=text, metadata=metadata)] + + def _get_resource(self) -> List[Document]: + endpoint = STRIPE_ENDPOINTS.get(self.resource) + if endpoint is None: + return [] + return self._make_request(endpoint) + + def load(self) -> List[Document]: + return self._get_resource() diff --git a/tests/integration_tests/document_loaders/test_stripe.py b/tests/integration_tests/document_loaders/test_stripe.py new file mode 100644 index 00000000..eb2cf47d --- /dev/null +++ b/tests/integration_tests/document_loaders/test_stripe.py @@ -0,0 +1,12 @@ +from langchain.document_loaders.stripe import StripeLoader + +access_token = "" +resource = "charges" + + +def test_stripe_loader() -> None: + """Test Figma file loader.""" + stripe_loader = StripeLoader(access_token, resource) + documents = stripe_loader.load() + + assert len(documents) == 1