From f10be072ff154a5f5afa49c41b54c07fbcb23f4b Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira <34777347+rasiqueira@users.noreply.github.com> Date: Wed, 24 May 2023 15:47:01 -0300 Subject: [PATCH] Add Iugu document loader (#5162) Create IUGU loader --------- Co-authored-by: Dev 2049 --- docs/modules/indexes/document_loaders.rst | 2 + .../document_loaders/examples/iugu.ipynb | 86 +++++++++++++++++++ langchain/document_loaders/iugu.py | 41 +++++++++ 3 files changed, 129 insertions(+) create mode 100644 docs/modules/indexes/document_loaders/examples/iugu.ipynb create mode 100644 langchain/document_loaders/iugu.py diff --git a/docs/modules/indexes/document_loaders.rst b/docs/modules/indexes/document_loaders.rst index 80f47cd2..54ff07cf 100644 --- a/docs/modules/indexes/document_loaders.rst +++ b/docs/modules/indexes/document_loaders.rst @@ -40,6 +40,8 @@ For detailed instructions on how to get set up with Unstructured, see installati ./document_loaders/examples/file_directory.ipynb ./document_loaders/examples/html.ipynb ./document_loaders/examples/image.ipynb + ./document_loaders/examples/iugu.ipynb + ./document_loaders/examples/jupyter_notebook.ipynb ./document_loaders/examples/json.ipynb ./document_loaders/examples/markdown.ipynb ./document_loaders/examples/microsoft_powerpoint.ipynb diff --git a/docs/modules/indexes/document_loaders/examples/iugu.ipynb b/docs/modules/indexes/document_loaders/examples/iugu.ipynb new file mode 100644 index 00000000..5bec0c77 --- /dev/null +++ b/docs/modules/indexes/document_loaders/examples/iugu.ipynb @@ -0,0 +1,86 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Iugu\n", + "\n", + ">[Iugu](https://www.iugu.com/) is a Brazilian services and software as a service (SaaS) company. It offers payment-processing software and application programming interfaces for e-commerce websites and mobile applications.\n", + "\n", + "This notebook covers how to load data from the `Iugu REST API` into a format that can be ingested into LangChain, along with example usage for vectorization." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "\n", + "from langchain.document_loaders import IuguLoader\n", + "from langchain.indexes import VectorstoreIndexCreator" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The Iugu API requires an access token, which can be found inside of the Iugu dashboard.\n", + "\n", + "This document loader also requires a `resource` option which defines what data you want to load.\n", + "\n", + "Following resources are available:\n", + "\n", + "`Documentation` [Documentation](https://dev.iugu.com/reference/metadados)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "iugu_loader = IuguLoader(\"charges\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Create a vectorstore retriver from the loader\n", + "# see https://python.langchain.com/en/latest/modules/indexes/getting_started.html for more details\n", + "\n", + "index = VectorstoreIndexCreator().from_loaders([iugu_loader])\n", + "iugu_doc_retriever = index.vectorstore.as_retriever()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/langchain/document_loaders/iugu.py b/langchain/document_loaders/iugu.py new file mode 100644 index 00000000..7fd853c4 --- /dev/null +++ b/langchain/document_loaders/iugu.py @@ -0,0 +1,41 @@ +"""Loader that fetches data from IUGU""" +import json +import urllib.request +from typing import List, Optional + +from langchain.docstore.document import Document +from langchain.document_loaders.base import BaseLoader +from langchain.utils import get_from_env, stringify_dict + +IUGU_ENDPOINTS = { + "invoices": "https://api.iugu.com/v1/invoices", + "customers": "https://api.iugu.com/v1/customers", + "charges": "https://api.iugu.com/v1/charges", + "subscriptions": "https://api.iugu.com/v1/subscriptions", + "plans": "https://api.iugu.com/v1/plans", +} + + +class IuguLoader(BaseLoader): + def __init__(self, resource: str, api_token: Optional[str] = None) -> None: + self.resource = resource + api_token = api_token or get_from_env("api_token", "IUGU_API_TOKEN") + self.headers = {"Authorization": f"Bearer {api_token}"} + + def _make_request(self, url: str) -> List[Document]: + request = urllib.request.Request(url, headers=self.headers) + + with urllib.request.urlopen(request) as response: + json_data = json.loads(response.read().decode()) + text = stringify_dict(json_data) + metadata = {"source": url} + return [Document(page_content=text, metadata=metadata)] + + def _get_resource(self) -> List[Document]: + endpoint = IUGU_ENDPOINTS.get(self.resource) + if endpoint is None: + return [] + return self._make_request(endpoint) + + def load(self) -> List[Document]: + return self._get_resource()