Harrison/stripe (#3762)

Co-authored-by: Ismail Pelaseyed <homanp@gmail.com>
This commit is contained in:
Harrison Chase 2023-04-28 20:03:21 -07:00 committed by GitHub
parent 8cf2ff0be0
commit 40f6e60e68
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 160 additions and 0 deletions

View File

@ -0,0 +1,87 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Stripe\n",
"\n",
"This notebook covers how to load data from the Stripe REST API into a format that can be ingested into LangChain, along with example usage for vectorization."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"\n",
"\n",
"from langchain.document_loaders import StripeLoader\n",
"from langchain.indexes import VectorstoreIndexCreator"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The Stripe API requires an access token, which can be found inside of the Stripe dashboard.\n",
"\n",
"This document loader also requires a `resource` option which defines what data you want to load.\n",
"\n",
"Following resources are available:\n",
"\n",
"`balance_transations` (Documentation)[https://stripe.com/docs/api/balance_transactions/list]\n",
"`charges` (Documentation)[https://stripe.com/docs/api/charges/list]\n",
"`customers` (Documentation)[https://stripe.com/docs/api/customers/list]\n",
"`events` (Documentation)[https://stripe.com/docs/api/events/list]\n",
"`refunds` (Documentation)[https://stripe.com/docs/api/refunds/list]\n",
"`disputes` (Documentation)[https://stripe.com/docs/api/disputes/list]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"stripe_loader = StripeLoader(os.environ[\"STRIPE_ACCESS_TOKEN\"], \"charges\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Create a vectorstore retriver from the loader\n",
"# see https://python.langchain.com/en/latest/modules/indexes/getting_started.html for more details\n",
"\n",
"index = VectorstoreIndexCreator().from_loaders([stripe_loader])\n",
"stripe_doc_retriever = index.vectorstore.as_retriever()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.1"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@ -69,6 +69,7 @@ from langchain.document_loaders.s3_file import S3FileLoader
from langchain.document_loaders.sitemap import SitemapLoader from langchain.document_loaders.sitemap import SitemapLoader
from langchain.document_loaders.slack_directory import SlackDirectoryLoader from langchain.document_loaders.slack_directory import SlackDirectoryLoader
from langchain.document_loaders.srt import SRTLoader from langchain.document_loaders.srt import SRTLoader
from langchain.document_loaders.stripe import StripeLoader
from langchain.document_loaders.telegram import TelegramChatLoader from langchain.document_loaders.telegram import TelegramChatLoader
from langchain.document_loaders.text import TextLoader from langchain.document_loaders.text import TextLoader
from langchain.document_loaders.twitter import TwitterTweetLoader from langchain.document_loaders.twitter import TwitterTweetLoader
@ -96,6 +97,7 @@ __all__ = [
"AirbyteJSONLoader", "AirbyteJSONLoader",
"ApifyDatasetLoader", "ApifyDatasetLoader",
"ArxivLoader", "ArxivLoader",
"StripeLoader",
"AzureBlobStorageContainerLoader", "AzureBlobStorageContainerLoader",
"AzureBlobStorageFileLoader", "AzureBlobStorageFileLoader",
"BSHTMLLoader", "BSHTMLLoader",

View File

@ -0,0 +1,59 @@
"""Loader that fetches data from Stripe"""
import json
import urllib.request
from typing import Any, List
from langchain.docstore.document import Document
from langchain.document_loaders.base import BaseLoader
STRIPE_ENDPOINTS = {
"balance_transactions": "https://api.stripe.com/v1/balance_transactions",
"charges": "https://api.stripe.com/v1/charges",
"customers": "https://api.stripe.com/v1/customers",
"events": "https://api.stripe.com/v1/events",
"refunds": "https://api.stripe.com/v1/refunds",
"disputes": "https://api.stripe.com/v1/disputes",
}
def _stringify_value(val: Any) -> str:
if isinstance(val, str):
return val
elif isinstance(val, dict):
return "\n" + _stringify_dict(val)
elif isinstance(val, list):
return "\n".join(_stringify_value(v) for v in val)
else:
return str(val)
def _stringify_dict(data: dict) -> str:
text = ""
for key, value in data.items():
text += key + ": " + _stringify_value(value) + "\n"
return text
class StripeLoader(BaseLoader):
def __init__(self, access_token: str, resource: str) -> None:
self.access_token = access_token
self.resource = resource
self.headers = {"Authorization": f"Bearer {self.access_token}"}
def _make_request(self, url: str) -> List[Document]:
request = urllib.request.Request(url, headers=self.headers)
with urllib.request.urlopen(request) as response:
json_data = json.loads(response.read().decode())
text = _stringify_dict(json_data)
metadata = {"source": url}
return [Document(page_content=text, metadata=metadata)]
def _get_resource(self) -> List[Document]:
endpoint = STRIPE_ENDPOINTS.get(self.resource)
if endpoint is None:
return []
return self._make_request(endpoint)
def load(self) -> List[Document]:
return self._get_resource()

View File

@ -0,0 +1,12 @@
from langchain.document_loaders.stripe import StripeLoader
access_token = ""
resource = "charges"
def test_stripe_loader() -> None:
"""Test Figma file loader."""
stripe_loader = StripeLoader(access_token, resource)
documents = stripe_loader.load()
assert len(documents) == 1