diff --git a/docs/modules/indexes/document_loaders/examples/modern_treasury.ipynb b/docs/modules/indexes/document_loaders/examples/modern_treasury.ipynb new file mode 100644 index 00000000..b9048041 --- /dev/null +++ b/docs/modules/indexes/document_loaders/examples/modern_treasury.ipynb @@ -0,0 +1,106 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Modern Treasury\n", + "\n", + "This notebook covers how to load data from the Modern Treasury REST API into a format that can be ingested into LangChain, along with example usage for vectorization." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "\n", + "from langchain.document_loaders import ModernTreasuryLoader\n", + "from langchain.indexes import VectorstoreIndexCreator" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The Modern Treasury API requires an organization ID and API key, which can be found in the Modern Treasury dashboard within developer settings.\n", + "\n", + "This document loader also requires a `resource` option which defines what data you want to load.\n", + "\n", + "Following resources are available:\n", + "\n", + "`payment_orders` [Documentation](https://docs.moderntreasury.com/reference/payment-order-object)\n", + "\n", + "`expected_payments` [Documentation](https://docs.moderntreasury.com/reference/expected-payment-object)\n", + "\n", + "`returns` [Documentation](https://docs.moderntreasury.com/reference/return-object)\n", + "\n", + "`incoming_payment_details` [Documentation](https://docs.moderntreasury.com/reference/incoming-payment-detail-object)\n", + "\n", + "`counterparties` [Documentation](https://docs.moderntreasury.com/reference/counterparty-object)\n", + "\n", + "`internal_accounts` [Documentation](https://docs.moderntreasury.com/reference/internal-account-object)\n", + "\n", + "`external_accounts` [Documentation](https://docs.moderntreasury.com/reference/external-account-object)\n", + "\n", + "`transactions` [Documentation](https://docs.moderntreasury.com/reference/transaction-object)\n", + "\n", + "`ledgers` [Documentation](https://docs.moderntreasury.com/reference/ledger-object)\n", + "\n", + "`ledger_accounts` [Documentation](https://docs.moderntreasury.com/reference/ledger-account-object)\n", + "\n", + "`ledger_transactions` [Documentation](https://docs.moderntreasury.com/reference/ledger-transaction-object)\n", + "\n", + "`events` [Documentation](https://docs.moderntreasury.com/reference/events)\n", + "\n", + "`invoices` [Documentation](https://docs.moderntreasury.com/reference/invoices)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "modern_treasury_loader = ModernTreasuryLoader(\"payment_orders\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Create a vectorstore retriver from the loader\n", + "# see https://python.langchain.com/en/latest/modules/indexes/getting_started.html for more details\n", + "\n", + "index = VectorstoreIndexCreator().from_loaders([modern_treasury_loader])\n", + "modern_treasury_doc_retriever = index.vectorstore.as_retriever()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/modules/indexes/document_loaders/examples/stripe.ipynb b/docs/modules/indexes/document_loaders/examples/stripe.ipynb index f170fd9c..a0b0c0a4 100644 --- a/docs/modules/indexes/document_loaders/examples/stripe.ipynb +++ b/docs/modules/indexes/document_loaders/examples/stripe.ipynb @@ -51,7 +51,7 @@ "metadata": {}, "outputs": [], "source": [ - "stripe_loader = StripeLoader(os.environ[\"STRIPE_ACCESS_TOKEN\"], \"charges\")" + "stripe_loader = StripeLoader(\"charges\")" ] }, { @@ -84,7 +84,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.1" + "version": "3.11.3" } }, "nbformat": 4, diff --git a/langchain/document_loaders/__init__.py b/langchain/document_loaders/__init__.py index 94c94150..00c96205 100644 --- a/langchain/document_loaders/__init__.py +++ b/langchain/document_loaders/__init__.py @@ -46,6 +46,7 @@ from langchain.document_loaders.image import UnstructuredImageLoader from langchain.document_loaders.image_captions import ImageCaptionLoader from langchain.document_loaders.imsdb import IMSDbLoader from langchain.document_loaders.markdown import UnstructuredMarkdownLoader +from langchain.document_loaders.modern_treasury import ModernTreasuryLoader from langchain.document_loaders.notebook import NotebookLoader from langchain.document_loaders.notion import NotionDirectoryLoader from langchain.document_loaders.notiondb import NotionDBLoader @@ -136,6 +137,7 @@ __all__ = [ "IFixitLoader", "IMSDbLoader", "ImageCaptionLoader", + "ModernTreasuryLoader", "NotebookLoader", "NotionDBLoader", "NotionDirectoryLoader", diff --git a/langchain/document_loaders/airbyte_json.py b/langchain/document_loaders/airbyte_json.py index 823267e6..82484344 100644 --- a/langchain/document_loaders/airbyte_json.py +++ b/langchain/document_loaders/airbyte_json.py @@ -1,27 +1,10 @@ """Loader that loads local airbyte json files.""" import json -from typing import Any, List +from typing import List from langchain.docstore.document import Document from langchain.document_loaders.base import BaseLoader - - -def _stringify_value(val: Any) -> str: - if isinstance(val, str): - return val - elif isinstance(val, dict): - return "\n" + _stringify_dict(val) - elif isinstance(val, list): - return "\n".join(_stringify_value(v) for v in val) - else: - return str(val) - - -def _stringify_dict(data: dict) -> str: - text = "" - for key, value in data.items(): - text += key + ": " + _stringify_value(data[key]) + "\n" - return text +from langchain.utils import stringify_dict class AirbyteJSONLoader(BaseLoader): @@ -36,6 +19,6 @@ class AirbyteJSONLoader(BaseLoader): text = "" for line in open(self.file_path, "r"): data = json.loads(line)["_airbyte_data"] - text += _stringify_dict(data) + text += stringify_dict(data) metadata = {"source": self.file_path} return [Document(page_content=text, metadata=metadata)] diff --git a/langchain/document_loaders/figma.py b/langchain/document_loaders/figma.py index 420ef0e9..8a1a4722 100644 --- a/langchain/document_loaders/figma.py +++ b/langchain/document_loaders/figma.py @@ -5,24 +5,7 @@ from typing import Any, List from langchain.docstore.document import Document from langchain.document_loaders.base import BaseLoader - - -def _stringify_value(val: Any) -> str: - if isinstance(val, str): - return val - elif isinstance(val, dict): - return "\n" + _stringify_dict(val) - elif isinstance(val, list): - return "\n".join(_stringify_value(v) for v in val) - else: - return str(val) - - -def _stringify_dict(data: dict) -> str: - text = "" - for key, value in data.items(): - text += key + ": " + _stringify_value(data[key]) + "\n" - return text +from langchain.utils import stringify_dict class FigmaFileLoader(BaseLoader): @@ -54,6 +37,6 @@ class FigmaFileLoader(BaseLoader): def load(self) -> List[Document]: """Load file""" data = self._get_figma_file() - text = _stringify_dict(data) + text = stringify_dict(data) metadata = {"source": self._construct_figma_api_url()} return [Document(page_content=text, metadata=metadata)] diff --git a/langchain/document_loaders/modern_treasury.py b/langchain/document_loaders/modern_treasury.py new file mode 100644 index 00000000..15cb1588 --- /dev/null +++ b/langchain/document_loaders/modern_treasury.py @@ -0,0 +1,61 @@ +"""Loader that fetches data from Modern Treasury""" +import json +import urllib.request +from base64 import b64encode +from typing import List, Optional + +from langchain.docstore.document import Document +from langchain.document_loaders.base import BaseLoader +from langchain.utils import get_from_env, stringify_value + +MODERN_TREASURY_ENDPOINTS = { + "payment_orders": "https://app.moderntreasury.com/api/payment_orders", + "expected_payments": "https://app.moderntreasury.com/api/expected_payments", + "returns": "https://app.moderntreasury.com/api/returns", + "incoming_payment_details": "https://app.moderntreasury.com/api/\ +incoming_payment_details", + "counterparties": "https://app.moderntreasury.com/api/counterparties", + "internal_accounts": "https://app.moderntreasury.com/api/internal_accounts", + "external_accounts": "https://app.moderntreasury.com/api/external_accounts", + "transactions": "https://app.moderntreasury.com/api/transactions", + "ledgers": "https://app.moderntreasury.com/api/ledgers", + "ledger_accounts": "https://app.moderntreasury.com/api/ledger_accounts", + "ledger_transactions": "https://app.moderntreasury.com/api/ledger_transactions", + "events": "https://app.moderntreasury.com/api/events", + "invoices": "https://app.moderntreasury.com/api/invoices", +} + + +class ModernTreasuryLoader(BaseLoader): + def __init__( + self, + resource: str, + organization_id: Optional[str] = None, + api_key: Optional[str] = None, + ) -> None: + self.resource = resource + organization_id = organization_id or get_from_env( + "organization_id", "MODERN_TREASURY_ORGANIZATION_ID" + ) + api_key = api_key or get_from_env("api_key", "MODERN_TREASURY_API_KEY") + credentials = f"{organization_id}:{api_key}".encode("utf-8") + basic_auth_token = b64encode(credentials).decode("utf-8") + self.headers = {"Authorization": f"Basic {basic_auth_token}"} + + def _make_request(self, url: str) -> List[Document]: + request = urllib.request.Request(url, headers=self.headers) + + with urllib.request.urlopen(request) as response: + json_data = json.loads(response.read().decode()) + text = stringify_value(json_data) + metadata = {"source": url} + return [Document(page_content=text, metadata=metadata)] + + def _get_resource(self) -> List[Document]: + endpoint = MODERN_TREASURY_ENDPOINTS.get(self.resource) + if endpoint is None: + return [] + return self._make_request(endpoint) + + def load(self) -> List[Document]: + return self._get_resource() diff --git a/langchain/document_loaders/stripe.py b/langchain/document_loaders/stripe.py index 5e842e05..6dbab180 100644 --- a/langchain/document_loaders/stripe.py +++ b/langchain/document_loaders/stripe.py @@ -1,10 +1,11 @@ """Loader that fetches data from Stripe""" import json import urllib.request -from typing import Any, List +from typing import List, Optional from langchain.docstore.document import Document from langchain.document_loaders.base import BaseLoader +from langchain.utils import get_from_env, stringify_dict STRIPE_ENDPOINTS = { "balance_transactions": "https://api.stripe.com/v1/balance_transactions", @@ -16,36 +17,20 @@ STRIPE_ENDPOINTS = { } -def _stringify_value(val: Any) -> str: - if isinstance(val, str): - return val - elif isinstance(val, dict): - return "\n" + _stringify_dict(val) - elif isinstance(val, list): - return "\n".join(_stringify_value(v) for v in val) - else: - return str(val) - - -def _stringify_dict(data: dict) -> str: - text = "" - for key, value in data.items(): - text += key + ": " + _stringify_value(value) + "\n" - return text - - class StripeLoader(BaseLoader): - def __init__(self, access_token: str, resource: str) -> None: - self.access_token = access_token + def __init__(self, resource: str, access_token: Optional[str] = None) -> None: self.resource = resource - self.headers = {"Authorization": f"Bearer {self.access_token}"} + access_token = access_token or get_from_env( + "access_token", "STRIPE_ACCESS_TOKEN" + ) + self.headers = {"Authorization": f"Bearer {access_token}"} def _make_request(self, url: str) -> List[Document]: request = urllib.request.Request(url, headers=self.headers) with urllib.request.urlopen(request) as response: json_data = json.loads(response.read().decode()) - text = _stringify_dict(json_data) + text = stringify_dict(json_data) metadata = {"source": url} return [Document(page_content=text, metadata=metadata)] diff --git a/langchain/utils.py b/langchain/utils.py index 0daf9c52..1ce2c373 100644 --- a/langchain/utils.py +++ b/langchain/utils.py @@ -9,7 +9,13 @@ def get_from_dict_or_env( """Get a value from a dictionary or an environment variable.""" if key in data and data[key]: return data[key] - elif env_key in os.environ and os.environ[env_key]: + else: + return get_from_env(key, env_key, default=default) + + +def get_from_env(key: str, env_key: str, default: Optional[str] = None) -> str: + """Get a value from a dictionary or an environment variable.""" + if env_key in os.environ and os.environ[env_key]: return os.environ[env_key] elif default is not None: return default @@ -44,3 +50,21 @@ def xor_args(*arg_groups: Tuple[str, ...]) -> Callable: return wrapper return decorator + + +def stringify_value(val: Any) -> str: + if isinstance(val, str): + return val + elif isinstance(val, dict): + return "\n" + stringify_dict(val) + elif isinstance(val, list): + return "\n".join(stringify_value(v) for v in val) + else: + return str(val) + + +def stringify_dict(data: dict) -> str: + text = "" + for key, value in data.items(): + text += key + ": " + stringify_value(value) + "\n" + return text diff --git a/tests/integration_tests/document_loaders/test_modern_treasury.py b/tests/integration_tests/document_loaders/test_modern_treasury.py new file mode 100644 index 00000000..3ce8c711 --- /dev/null +++ b/tests/integration_tests/document_loaders/test_modern_treasury.py @@ -0,0 +1,9 @@ +from langchain.document_loaders.modern_treasury import ModernTreasuryLoader + + +def test_modern_treasury_loader() -> None: + """Test Modern Treasury file loader.""" + modern_treasury_loader = ModernTreasuryLoader("payment_orders") + documents = modern_treasury_loader.load() + + assert len(documents) == 1 diff --git a/tests/integration_tests/document_loaders/test_stripe.py b/tests/integration_tests/document_loaders/test_stripe.py index eb2cf47d..e8484ab6 100644 --- a/tests/integration_tests/document_loaders/test_stripe.py +++ b/tests/integration_tests/document_loaders/test_stripe.py @@ -1,12 +1,9 @@ from langchain.document_loaders.stripe import StripeLoader -access_token = "" -resource = "charges" - def test_stripe_loader() -> None: - """Test Figma file loader.""" - stripe_loader = StripeLoader(access_token, resource) + """Test Stripe file loader.""" + stripe_loader = StripeLoader("charges") documents = stripe_loader.load() assert len(documents) == 1