diff --git a/docs/extras/integrations/toolkits/airbyte_structured_qa.ipynb b/docs/extras/integrations/toolkits/airbyte_structured_qa.ipynb new file mode 100644 index 0000000000..e9139ee16c --- /dev/null +++ b/docs/extras/integrations/toolkits/airbyte_structured_qa.ipynb @@ -0,0 +1,118 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Airbyte Question Answering\n", + "This notebook shows how to do question answering over structured data, in this case using the `AirbyteStripeLoader`.\n", + "\n", + "Vectorstores often have a hard time answering questions that requires computing, grouping and filtering structured data so the high level idea is to use a `pandas` dataframe to help with these types of questions. " + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "1. Load data from Stripe using Airbyte. user the `record_handler` paramater to return a JSON from the data loader." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import pandas as pd\n", + "\n", + "from langchain.document_loaders.airbyte import AirbyteStripeLoader\n", + "from langchain.chat_models.openai import ChatOpenAI\n", + "from langchain.agents import AgentType, create_pandas_dataframe_agent\n", + "\n", + "stream_name = \"customers\"\n", + "config = {\n", + " \"client_secret\": os.getenv(\"STRIPE_CLIENT_SECRET\"),\n", + " \"account_id\": os.getenv(\"STRIPE_ACCOUNT_D\"),\n", + " \"start_date\": \"2023-01-20T00:00:00Z\",\n", + "}\n", + "\n", + "def handle_record(record: dict, _id: str):\n", + " return record.data\n", + "\n", + "loader = AirbyteStripeLoader(\n", + " config=config,\n", + " record_handler=handle_record,\n", + " stream_name=stream_name,\n", + ")\n", + "data = loader.load()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "2. Pass the data to `pandas` dataframe." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.DataFrame(data)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "3. Pass the dataframe `df` to the `create_pandas_dataframe_agent` and invoke\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "agent = create_pandas_dataframe_agent(\n", + " ChatOpenAI(temperature=0, model=\"gpt-4\"),\n", + " df,\n", + " verbose=True,\n", + " agent_type=AgentType.OPENAI_FUNCTIONS,\n", + ")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "4. Run the agent" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "output = agent.run(\"How many rows are there?\")" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +}