From 82baecc89297582e81b3a4c1ff49feee03ba711f Mon Sep 17 00:00:00 2001 From: Ankush Gola <9536492+agola11@users.noreply.github.com> Date: Tue, 28 Feb 2023 19:44:39 -0800 Subject: [PATCH] Add a SQL agent for interacting with SQL Databases and JSON Agent for interacting with large JSON blobs (#1150) This PR adds * `ZeroShotAgent.as_sql_agent`, which returns an agent for interacting with a sql database. This builds off of `SQLDatabaseChain`. The main advantages are 1) answering general questions about the db, 2) access to a tool for double checking queries, and 3) recovering from errors * `ZeroShotAgent.as_json_agent` which returns an agent for interacting with json blobs. * Several examples in notebooks --------- Co-authored-by: Harrison Chase --- docs/modules/agents/agent_toolkits/json.ipynb | 190 + .../agents/agent_toolkits/openai_openapi.yml | 3124 +++++++++++++++++ .../agents/agent_toolkits/openapi.ipynb | 242 ++ .../agents/agent_toolkits/python.ipynb | 210 ++ .../agents/agent_toolkits/sql_database.ipynb | 527 +++ .../agents/agent_toolkits/vectorstore.ipynb | 417 +++ .../modules/agents/examples/async_agent.ipynb | 2 +- docs/modules/agents/how_to_guides.rst | 15 +- .../modules/agents/implementations/mrkl.ipynb | 2 +- .../self_ask_with_search.ipynb | 4 +- langchain/agents/__init__.py | 12 + langchain/agents/agent.py | 2 +- langchain/agents/agent_toolkits/__init__.py | 33 + langchain/agents/agent_toolkits/base.py | 15 + .../agents/agent_toolkits/json/__init__.py | 1 + langchain/agents/agent_toolkits/json/base.py | 43 + .../agents/agent_toolkits/json/prompt.py | 25 + .../agents/agent_toolkits/json/toolkit.py | 21 + .../agents/agent_toolkits/openapi/__init__.py | 1 + .../agents/agent_toolkits/openapi/base.py | 46 + .../agents/agent_toolkits/openapi/prompt.py | 29 + .../agents/agent_toolkits/openapi/toolkit.py | 67 + .../agents/agent_toolkits/python/__init__.py | 0 .../agents/agent_toolkits/python/base.py | 31 + .../agents/agent_toolkits/python/prompt.py | 9 + .../agents/agent_toolkits/sql/__init__.py | 1 + langchain/agents/agent_toolkits/sql/base.py | 45 + langchain/agents/agent_toolkits/sql/prompt.py | 21 + .../agents/agent_toolkits/sql/toolkit.py | 39 + .../agent_toolkits/vectorstore/__init__.py | 1 + .../agents/agent_toolkits/vectorstore/base.py | 55 + .../agent_toolkits/vectorstore/prompt.py | 13 + .../agent_toolkits/vectorstore/toolkit.py | 89 + langchain/agents/load_tools.py | 15 +- langchain/chains/api/base.py | 2 +- langchain/chains/llm_requests.py | 2 +- langchain/python.py | 12 +- langchain/requests.py | 72 +- langchain/sql_database.py | 32 +- langchain/tools/base.py | 10 +- langchain/tools/bing_search/toolkit.py | 26 - langchain/tools/google_search/toolkit.py | 26 - langchain/tools/json/__init__.py | 1 + langchain/tools/json/tool.py | 113 + langchain/tools/python/__init__.py | 0 langchain/tools/python/tool.py | 30 + langchain/tools/requests/__init__.py | 1 + langchain/tools/requests/tool.py | 137 + langchain/tools/sql_database/__init__.py | 1 + langchain/tools/sql_database/prompt.py | 14 + langchain/tools/sql_database/tool.py | 111 + langchain/tools/vectorstore/__init__.py | 1 + langchain/tools/vectorstore/tool.py | 81 + langchain/tools/wolfram_alpha/toolkit.py | 22 - tests/unit_tests/chains/test_api.py | 2 +- tests/unit_tests/test_python.py | 21 +- tests/unit_tests/tools/__init__.py | 1 + tests/unit_tests/tools/test_json.py | 49 + 58 files changed, 5994 insertions(+), 120 deletions(-) create mode 100644 docs/modules/agents/agent_toolkits/json.ipynb create mode 100644 docs/modules/agents/agent_toolkits/openai_openapi.yml create mode 100644 docs/modules/agents/agent_toolkits/openapi.ipynb create mode 100644 docs/modules/agents/agent_toolkits/python.ipynb create mode 100644 docs/modules/agents/agent_toolkits/sql_database.ipynb create mode 100644 docs/modules/agents/agent_toolkits/vectorstore.ipynb create mode 100644 langchain/agents/agent_toolkits/__init__.py create mode 100644 langchain/agents/agent_toolkits/base.py create mode 100644 langchain/agents/agent_toolkits/json/__init__.py create mode 100644 langchain/agents/agent_toolkits/json/base.py create mode 100644 langchain/agents/agent_toolkits/json/prompt.py create mode 100644 langchain/agents/agent_toolkits/json/toolkit.py create mode 100644 langchain/agents/agent_toolkits/openapi/__init__.py create mode 100644 langchain/agents/agent_toolkits/openapi/base.py create mode 100644 langchain/agents/agent_toolkits/openapi/prompt.py create mode 100644 langchain/agents/agent_toolkits/openapi/toolkit.py create mode 100644 langchain/agents/agent_toolkits/python/__init__.py create mode 100644 langchain/agents/agent_toolkits/python/base.py create mode 100644 langchain/agents/agent_toolkits/python/prompt.py create mode 100644 langchain/agents/agent_toolkits/sql/__init__.py create mode 100644 langchain/agents/agent_toolkits/sql/base.py create mode 100644 langchain/agents/agent_toolkits/sql/prompt.py create mode 100644 langchain/agents/agent_toolkits/sql/toolkit.py create mode 100644 langchain/agents/agent_toolkits/vectorstore/__init__.py create mode 100644 langchain/agents/agent_toolkits/vectorstore/base.py create mode 100644 langchain/agents/agent_toolkits/vectorstore/prompt.py create mode 100644 langchain/agents/agent_toolkits/vectorstore/toolkit.py delete mode 100644 langchain/tools/bing_search/toolkit.py delete mode 100644 langchain/tools/google_search/toolkit.py create mode 100644 langchain/tools/json/__init__.py create mode 100644 langchain/tools/json/tool.py create mode 100644 langchain/tools/python/__init__.py create mode 100644 langchain/tools/python/tool.py create mode 100644 langchain/tools/requests/__init__.py create mode 100644 langchain/tools/requests/tool.py create mode 100644 langchain/tools/sql_database/__init__.py create mode 100644 langchain/tools/sql_database/prompt.py create mode 100644 langchain/tools/sql_database/tool.py create mode 100644 langchain/tools/vectorstore/__init__.py create mode 100644 langchain/tools/vectorstore/tool.py delete mode 100644 langchain/tools/wolfram_alpha/toolkit.py create mode 100644 tests/unit_tests/tools/__init__.py create mode 100644 tests/unit_tests/tools/test_json.py diff --git a/docs/modules/agents/agent_toolkits/json.ipynb b/docs/modules/agents/agent_toolkits/json.ipynb new file mode 100644 index 00000000..9bec32e7 --- /dev/null +++ b/docs/modules/agents/agent_toolkits/json.ipynb @@ -0,0 +1,190 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "85fb2c03-ab88-4c8c-97e3-a7f2954555ab", + "metadata": {}, + "source": [ + "# JSON Agent\n", + "\n", + "This notebook showcases an agent designed to interact with large JSON/dict objects. This is useful when you want to answer questions about a JSON blob that's too large to fit in the context window of an LLM. The agent is able to iteratively explore the blob to find what it needs to answer the user's question.\n", + "\n", + "In the below example, we are using the OpenAPI spec for the OpenAI API, which you can find [here](https://github.com/openai/openai-openapi/blob/master/openapi.yaml).\n", + "\n", + "We will use the JSON agent to answer some questions about the API spec." + ] + }, + { + "cell_type": "markdown", + "id": "893f90fd-f8f6-470a-a76d-1f200ba02e2f", + "metadata": {}, + "source": [ + "## Initialization" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "ff988466-c389-4ec6-b6ac-14364a537fd5", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import os\n", + "import yaml\n", + "\n", + "from langchain.agents import (\n", + " create_json_agent,\n", + " AgentExecutor\n", + ")\n", + "from langchain.agents.agent_toolkits import JsonToolkit\n", + "from langchain.chains import LLMChain\n", + "from langchain.llms.openai import OpenAI\n", + "from langchain.requests import RequestsWrapper\n", + "from langchain.tools.json.tool import JsonSpec" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "9ecd1ba0-3937-4359-a41e-68605f0596a1", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "with open(\"openai_openapi.yml\") as f:\n", + " data = yaml.load(f, Loader=yaml.FullLoader)\n", + "json_spec = JsonSpec(dict_=data, max_value_length=4000)\n", + "json_toolkit = JsonToolkit(spec=json_spec)\n", + "\n", + "json_agent_executor = create_json_agent(\n", + " llm=OpenAI(temperature=0),\n", + " toolkit=json_toolkit,\n", + " verbose=True\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "05cfcb24-4389-4b8f-ad9e-466e3fca8db0", + "metadata": {}, + "source": [ + "## Example: getting the required POST parameters for a request" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "faf13702-50f0-4d1b-b91f-48c750ccfd98", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n", + "\u001b[32;1m\u001b[1;3mAction: json_spec_list_keys\n", + "Action Input: data\u001b[0m\n", + "Observation: \u001b[36;1m\u001b[1;3m['openapi', 'info', 'servers', 'tags', 'paths', 'components', 'x-oaiMeta']\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3m I should look at the paths key to see what endpoints exist\n", + "Action: json_spec_list_keys\n", + "Action Input: data[\"paths\"]\u001b[0m\n", + "Observation: \u001b[36;1m\u001b[1;3m['/engines', '/engines/{engine_id}', '/completions', '/edits', '/images/generations', '/images/edits', '/images/variations', '/embeddings', '/engines/{engine_id}/search', '/files', '/files/{file_id}', '/files/{file_id}/content', '/answers', '/classifications', '/fine-tunes', '/fine-tunes/{fine_tune_id}', '/fine-tunes/{fine_tune_id}/cancel', '/fine-tunes/{fine_tune_id}/events', '/models', '/models/{model}', '/moderations']\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3m I should look at the /completions endpoint to see what parameters are required\n", + "Action: json_spec_list_keys\n", + "Action Input: data[\"paths\"][\"/completions\"]\u001b[0m\n", + "Observation: \u001b[36;1m\u001b[1;3m['post']\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3m I should look at the post key to see what parameters are required\n", + "Action: json_spec_list_keys\n", + "Action Input: data[\"paths\"][\"/completions\"][\"post\"]\u001b[0m\n", + "Observation: \u001b[36;1m\u001b[1;3m['operationId', 'tags', 'summary', 'requestBody', 'responses', 'x-oaiMeta']\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3m I should look at the requestBody key to see what parameters are required\n", + "Action: json_spec_list_keys\n", + "Action Input: data[\"paths\"][\"/completions\"][\"post\"][\"requestBody\"]\u001b[0m\n", + "Observation: \u001b[36;1m\u001b[1;3m['required', 'content']\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3m I should look at the required key to see what parameters are required\n", + "Action: json_spec_get_value\n", + "Action Input: data[\"paths\"][\"/completions\"][\"post\"][\"requestBody\"][\"required\"]\u001b[0m\n", + "Observation: \u001b[33;1m\u001b[1;3mTrue\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3m I should look at the content key to see what parameters are required\n", + "Action: json_spec_list_keys\n", + "Action Input: data[\"paths\"][\"/completions\"][\"post\"][\"requestBody\"][\"content\"]\u001b[0m\n", + "Observation: \u001b[36;1m\u001b[1;3m['application/json']\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3m I should look at the application/json key to see what parameters are required\n", + "Action: json_spec_list_keys\n", + "Action Input: data[\"paths\"][\"/completions\"][\"post\"][\"requestBody\"][\"content\"][\"application/json\"]\u001b[0m\n", + "Observation: \u001b[36;1m\u001b[1;3m['schema']\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3m I should look at the schema key to see what parameters are required\n", + "Action: json_spec_list_keys\n", + "Action Input: data[\"paths\"][\"/completions\"][\"post\"][\"requestBody\"][\"content\"][\"application/json\"][\"schema\"]\u001b[0m\n", + "Observation: \u001b[36;1m\u001b[1;3m['$ref']\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3m I should look at the $ref key to see what parameters are required\n", + "Action: json_spec_get_value\n", + "Action Input: data[\"paths\"][\"/completions\"][\"post\"][\"requestBody\"][\"content\"][\"application/json\"][\"schema\"][\"$ref\"]\u001b[0m\n", + "Observation: \u001b[33;1m\u001b[1;3m#/components/schemas/CreateCompletionRequest\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3m I should look at the CreateCompletionRequest schema to see what parameters are required\n", + "Action: json_spec_list_keys\n", + "Action Input: data[\"components\"][\"schemas\"][\"CreateCompletionRequest\"]\u001b[0m\n", + "Observation: \u001b[36;1m\u001b[1;3m['type', 'properties', 'required']\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3m I should look at the required key to see what parameters are required\n", + "Action: json_spec_get_value\n", + "Action Input: data[\"components\"][\"schemas\"][\"CreateCompletionRequest\"][\"required\"]\u001b[0m\n", + "Observation: \u001b[33;1m\u001b[1;3m['model']\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n", + "Final Answer: The required parameters in the request body to the /completions endpoint are 'model'.\u001b[0m\n", + "\n", + "\u001b[1m> Finished chain.\u001b[0m\n" + ] + }, + { + "data": { + "text/plain": [ + "\"The required parameters in the request body to the /completions endpoint are 'model'.\"" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "json_agent_executor.run(\"What are the required parameters in the request body to the /completions endpoint?\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ba9c9d30", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/modules/agents/agent_toolkits/openai_openapi.yml b/docs/modules/agents/agent_toolkits/openai_openapi.yml new file mode 100644 index 00000000..891fbe86 --- /dev/null +++ b/docs/modules/agents/agent_toolkits/openai_openapi.yml @@ -0,0 +1,3124 @@ +openapi: 3.0.0 +info: + title: OpenAI API + description: APIs for sampling from and fine-tuning language models + version: '1.1.0' +servers: + - url: https://api.openai.com/v1 +tags: +- name: OpenAI + description: The OpenAI REST API +paths: + /engines: + get: + operationId: listEngines + deprecated: true + tags: + - OpenAI + summary: Lists the currently available (non-finetuned) models, and provides basic information about each one such as the owner and availability. + responses: + "200": + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/ListEnginesResponse' + x-oaiMeta: + name: List engines + group: engines + path: list + examples: + curl: | + curl https://api.openai.com/v1/engines \ + -H 'Authorization: Bearer YOUR_API_KEY' + python: | + import os + import openai + openai.api_key = os.getenv("OPENAI_API_KEY") + openai.Engine.list() + node.js: | + const { Configuration, OpenAIApi } = require("openai"); + const configuration = new Configuration({ + apiKey: process.env.OPENAI_API_KEY, + }); + const openai = new OpenAIApi(configuration); + const response = await openai.listEngines(); + response: | + { + "data": [ + { + "id": "engine-id-0", + "object": "engine", + "owner": "organization-owner", + "ready": true + }, + { + "id": "engine-id-2", + "object": "engine", + "owner": "organization-owner", + "ready": true + }, + { + "id": "engine-id-3", + "object": "engine", + "owner": "openai", + "ready": false + }, + ], + "object": "list" + } + /engines/{engine_id}: + get: + operationId: retrieveEngine + deprecated: true + tags: + - OpenAI + summary: Retrieves a model instance, providing basic information about it such as the owner and availability. + parameters: + - in: path + name: engine_id + required: true + schema: + type: string + # ideally this will be an actual ID, so this will always work from browser + example: + davinci + description: &engine_id_description > + The ID of the engine to use for this request + responses: + "200": + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/Engine' + x-oaiMeta: + name: Retrieve engine + group: engines + path: retrieve + examples: + curl: | + curl https://api.openai.com/v1/engines/VAR_model_id \ + -H 'Authorization: Bearer YOUR_API_KEY' + python: | + import os + import openai + openai.api_key = os.getenv("OPENAI_API_KEY") + openai.Engine.retrieve("VAR_model_id") + node.js: | + const { Configuration, OpenAIApi } = require("openai"); + const configuration = new Configuration({ + apiKey: process.env.OPENAI_API_KEY, + }); + const openai = new OpenAIApi(configuration); + const response = await openai.retrieveEngine("VAR_model_id"); + response: | + { + "id": "VAR_model_id", + "object": "engine", + "owner": "openai", + "ready": true + } + /completions: + post: + operationId: createCompletion + tags: + - OpenAI + summary: Creates a completion for the provided prompt and parameters + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/CreateCompletionRequest' + responses: + "200": + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/CreateCompletionResponse' + x-oaiMeta: + name: Create completion + group: completions + path: create + examples: + curl: | + curl https://api.openai.com/v1/completions \ + -H 'Content-Type: application/json' \ + -H 'Authorization: Bearer YOUR_API_KEY' \ + -d '{ + "model": "VAR_model_id", + "prompt": "Say this is a test", + "max_tokens": 7, + "temperature": 0 + }' + python: | + import os + import openai + openai.api_key = os.getenv("OPENAI_API_KEY") + openai.Completion.create( + model="VAR_model_id", + prompt="Say this is a test", + max_tokens=7, + temperature=0 + ) + node.js: | + const { Configuration, OpenAIApi } = require("openai"); + const configuration = new Configuration({ + apiKey: process.env.OPENAI_API_KEY, + }); + const openai = new OpenAIApi(configuration); + const response = await openai.createCompletion({ + model: "VAR_model_id", + prompt: "Say this is a test", + max_tokens: 7, + temperature: 0, + }); + parameters: | + { + "model": "VAR_model_id", + "prompt": "Say this is a test", + "max_tokens": 7, + "temperature": 0, + "top_p": 1, + "n": 1, + "stream": false, + "logprobs": null, + "stop": "\n" + } + response: | + { + "id": "cmpl-uqkvlQyYK7bGYrRHQ0eXlWi7", + "object": "text_completion", + "created": 1589478378, + "model": "VAR_model_id", + "choices": [ + { + "text": "\n\nThis is indeed a test", + "index": 0, + "logprobs": null, + "finish_reason": "length" + } + ], + "usage": { + "prompt_tokens": 5, + "completion_tokens": 7, + "total_tokens": 12 + } + } + /edits: + post: + operationId: createEdit + tags: + - OpenAI + summary: Creates a new edit for the provided input, instruction, and parameters + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/CreateEditRequest' + responses: + "200": + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/CreateEditResponse' + x-oaiMeta: + name: Create edit + group: edits + path: create + examples: + curl: | + curl https://api.openai.com/v1/edits \ + -H 'Content-Type: application/json' \ + -H 'Authorization: Bearer YOUR_API_KEY' \ + -d '{ + "model": "VAR_model_id", + "input": "What day of the wek is it?", + "instruction": "Fix the spelling mistakes" + }' + python: | + import os + import openai + openai.api_key = os.getenv("OPENAI_API_KEY") + openai.Edit.create( + model="VAR_model_id", + input="What day of the wek is it?", + instruction="Fix the spelling mistakes" + ) + node.js: | + const { Configuration, OpenAIApi } = require("openai"); + const configuration = new Configuration({ + apiKey: process.env.OPENAI_API_KEY, + }); + const openai = new OpenAIApi(configuration); + const response = await openai.createEdit({ + model: "VAR_model_id", + input: "What day of the wek is it?", + instruction: "Fix the spelling mistakes", + }); + parameters: | + { + "model": "VAR_model_id", + "input": "What day of the wek is it?", + "instruction": "Fix the spelling mistakes", + } + response: | + { + "object": "edit", + "created": 1589478378, + "choices": [ + { + "text": "What day of the week is it?", + "index": 0, + } + ], + "usage": { + "prompt_tokens": 25, + "completion_tokens": 32, + "total_tokens": 57 + } + } + /images/generations: + post: + operationId: createImage + tags: + - OpenAI + summary: Creates an image given a prompt. + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/CreateImageRequest' + responses: + "200": + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/ImagesResponse' + x-oaiMeta: + name: Create image + group: images + path: create + examples: + curl: | + curl https://api.openai.com/v1/images/generations \ + -H 'Content-Type: application/json' \ + -H 'Authorization: Bearer YOUR_API_KEY' \ + -d '{ + "prompt": "A cute baby sea otter", + "n": 2, + "size": "1024x1024" + }' + python: | + import os + import openai + openai.api_key = os.getenv("OPENAI_API_KEY") + openai.Image.create( + prompt="A cute baby sea otter", + n=2, + size="1024x1024" + ) + node.js: | + const { Configuration, OpenAIApi } = require("openai"); + const configuration = new Configuration({ + apiKey: process.env.OPENAI_API_KEY, + }); + const openai = new OpenAIApi(configuration); + const response = await openai.createImage({ + prompt: "A cute baby sea otter", + n: 2, + size: "1024x1024", + }); + parameters: | + { + "prompt": "A cute baby sea otter", + "n": 2, + "size": "1024x1024" + } + response: | + { + "created": 1589478378, + "data": [ + { + "url": "https://..." + }, + { + "url": "https://..." + } + ] + } + /images/edits: + post: + operationId: createImageEdit + tags: + - OpenAI + summary: Creates an edited or extended image given an original image and a prompt. + requestBody: + required: true + content: + multipart/form-data: + schema: + $ref: '#/components/schemas/CreateImageEditRequest' + responses: + "200": + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/ImagesResponse' + x-oaiMeta: + name: Create image edit + group: images + path: create-edit + examples: + curl: | + curl https://api.openai.com/v1/images/edits \ + -H 'Authorization: Bearer YOUR_API_KEY' \ + -F image='@otter.png' \ + -F mask='@mask.png' \ + -F prompt="A cute baby sea otter wearing a beret" \ + -F n=2 \ + -F size="1024x1024" + python: | + import os + import openai + openai.api_key = os.getenv("OPENAI_API_KEY") + openai.Image.create_edit( + image=open("otter.png", "rb"), + mask=open("mask.png", "rb"), + prompt="A cute baby sea otter wearing a beret", + n=2, + size="1024x1024" + ) + node.js: | + const { Configuration, OpenAIApi } = require("openai"); + const configuration = new Configuration({ + apiKey: process.env.OPENAI_API_KEY, + }); + const openai = new OpenAIApi(configuration); + const response = await openai.createImageEdit( + fs.createReadStream("otter.png"), + fs.createReadStream("mask.png"), + "A cute baby sea otter wearing a beret", + 2, + "1024x1024" + ); + response: | + { + "created": 1589478378, + "data": [ + { + "url": "https://..." + }, + { + "url": "https://..." + } + ] + } + /images/variations: + post: + operationId: createImageVariation + tags: + - OpenAI + summary: Creates a variation of a given image. + requestBody: + required: true + content: + multipart/form-data: + schema: + $ref: '#/components/schemas/CreateImageVariationRequest' + responses: + "200": + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/ImagesResponse' + x-oaiMeta: + name: Create image variation + group: images + path: create-variation + examples: + curl: | + curl https://api.openai.com/v1/images/variations \ + -H 'Authorization: Bearer YOUR_API_KEY' \ + -F image='@otter.png' \ + -F n=2 \ + -F size="1024x1024" + python: | + import os + import openai + openai.api_key = os.getenv("OPENAI_API_KEY") + openai.Image.create_variation( + image=open("otter.png", "rb"), + n=2, + size="1024x1024" + ) + node.js: | + const { Configuration, OpenAIApi } = require("openai"); + const configuration = new Configuration({ + apiKey: process.env.OPENAI_API_KEY, + }); + const openai = new OpenAIApi(configuration); + const response = await openai.createImageVariation( + fs.createReadStream("otter.png"), + 2, + "1024x1024" + ); + response: | + { + "created": 1589478378, + "data": [ + { + "url": "https://..." + }, + { + "url": "https://..." + } + ] + } + /embeddings: + post: + operationId: createEmbedding + tags: + - OpenAI + summary: Creates an embedding vector representing the input text. + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/CreateEmbeddingRequest' + responses: + "200": + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/CreateEmbeddingResponse' + x-oaiMeta: + name: Create embeddings + group: embeddings + path: create + examples: + curl: | + curl https://api.openai.com/v1/embeddings \ + -X POST \ + -H "Authorization: Bearer YOUR_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{"input": "The food was delicious and the waiter...", + "model": "text-embedding-ada-002"}' + python: | + import os + import openai + openai.api_key = os.getenv("OPENAI_API_KEY") + openai.Embedding.create( + model="text-embedding-ada-002", + input="The food was delicious and the waiter..." + ) + node.js: | + const { Configuration, OpenAIApi } = require("openai"); + const configuration = new Configuration({ + apiKey: process.env.OPENAI_API_KEY, + }); + const openai = new OpenAIApi(configuration); + const response = await openai.createEmbedding({ + model: "text-embedding-ada-002", + input: "The food was delicious and the waiter...", + }); + parameters: | + { + "model": "text-embedding-ada-002", + "input": "The food was delicious and the waiter..." + } + response: | + { + "object": "list", + "data": [ + { + "object": "embedding", + "embedding": [ + 0.0023064255, + -0.009327292, + .... (1056 floats total for ada) + -0.0028842222, + ], + "index": 0 + } + ], + "model": "text-embedding-ada-002", + "usage": { + "prompt_tokens": 8, + "total_tokens": 8 + } + } + /engines/{engine_id}/search: + post: + operationId: createSearch + deprecated: true + tags: + - OpenAI + summary: | + The search endpoint computes similarity scores between provided query and documents. Documents can be passed directly to the API if there are no more than 200 of them. + To go beyond the 200 document limit, documents can be processed offline and then used for efficient retrieval at query time. When `file` is set, the search endpoint searches over all the documents in the given file and returns up to the `max_rerank` number of documents. These documents will be returned along with their search scores. + The similarity score is a positive score that usually ranges from 0 to 300 (but can sometimes go higher), where a score above 200 usually means the document is semantically similar to the query. + parameters: + - in: path + name: engine_id + required: true + schema: + type: string + example: davinci + description: The ID of the engine to use for this request. You can select one of `ada`, `babbage`, `curie`, or `davinci`. + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/CreateSearchRequest' + responses: + "200": + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/CreateSearchResponse' + x-oaiMeta: + name: Create search + group: searches + path: create + examples: + curl: | + curl https://api.openai.com/v1/engines/davinci/search \ + -H "Content-Type: application/json" \ + -H 'Authorization: Bearer YOUR_API_KEY' \ + -d '{ + "documents": ["White House", "hospital", "school"], + "query": "the president" + }' + python: | + import os + import openai + openai.api_key = os.getenv("OPENAI_API_KEY") + openai.Engine("davinci").search( + documents=["White House", "hospital", "school"], + query="the president" + ) + node.js: | + const { Configuration, OpenAIApi } = require("openai"); + const configuration = new Configuration({ + apiKey: process.env.OPENAI_API_KEY, + }); + const openai = new OpenAIApi(configuration); + const response = await openai.createSearch("davinci", { + documents: ["White House", "hospital", "school"], + query: "the president", + }); + parameters: | + { + "documents": [ + "White House", + "hospital", + "school" + ], + "query": "the president" + } + response: | + { + "data": [ + { + "document": 0, + "object": "search_result", + "score": 215.412 + }, + { + "document": 1, + "object": "search_result", + "score": 40.316 + }, + { + "document": 2, + "object": "search_result", + "score": 55.226 + } + ], + "object": "list" + } + /files: + get: + operationId: listFiles + tags: + - OpenAI + summary: Returns a list of files that belong to the user's organization. + responses: + "200": + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/ListFilesResponse' + x-oaiMeta: + name: List files + group: files + path: list + examples: + curl: | + curl https://api.openai.com/v1/files \ + -H 'Authorization: Bearer YOUR_API_KEY' + python: | + import os + import openai + openai.api_key = os.getenv("OPENAI_API_KEY") + openai.File.list() + node.js: | + const { Configuration, OpenAIApi } = require("openai"); + const configuration = new Configuration({ + apiKey: process.env.OPENAI_API_KEY, + }); + const openai = new OpenAIApi(configuration); + const response = await openai.listFiles(); + response: | + { + "data": [ + { + "id": "file-ccdDZrC3iZVNiQVeEA6Z66wf", + "object": "file", + "bytes": 175, + "created_at": 1613677385, + "filename": "train.jsonl", + "purpose": "search" + }, + { + "id": "file-XjGxS3KTG0uNmNOK362iJua3", + "object": "file", + "bytes": 140, + "created_at": 1613779121, + "filename": "puppy.jsonl", + "purpose": "search" + } + ], + "object": "list" + } + post: + operationId: createFile + tags: + - OpenAI + summary: | + Upload a file that contains document(s) to be used across various endpoints/features. Currently, the size of all the files uploaded by one organization can be up to 1 GB. Please contact us if you need to increase the storage limit. + requestBody: + required: true + content: + multipart/form-data: + schema: + $ref: '#/components/schemas/CreateFileRequest' + responses: + "200": + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/OpenAIFile' + x-oaiMeta: + name: Upload file + group: files + path: upload + examples: + curl: | + curl https://api.openai.com/v1/files \ + -H "Authorization: Bearer YOUR_API_KEY" \ + -F purpose="fine-tune" \ + -F file='@mydata.jsonl' + python: | + import os + import openai + openai.api_key = os.getenv("OPENAI_API_KEY") + openai.File.create( + file=open("mydata.jsonl", "rb"), + purpose='fine-tune' + ) + node.js: | + const fs = require("fs"); + const { Configuration, OpenAIApi } = require("openai"); + const configuration = new Configuration({ + apiKey: process.env.OPENAI_API_KEY, + }); + const openai = new OpenAIApi(configuration); + const response = await openai.createFile( + fs.createReadStream("mydata.jsonl"), + "fine-tune" + ); + response: | + { + "id": "file-XjGxS3KTG0uNmNOK362iJua3", + "object": "file", + "bytes": 140, + "created_at": 1613779121, + "filename": "mydata.jsonl", + "purpose": "fine-tune" + } + /files/{file_id}: + delete: + operationId: deleteFile + tags: + - OpenAI + summary: Delete a file. + parameters: + - in: path + name: file_id + required: true + schema: + type: string + description: The ID of the file to use for this request + responses: + "200": + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/DeleteFileResponse' + x-oaiMeta: + name: Delete file + group: files + path: delete + examples: + curl: | + curl https://api.openai.com/v1/files/file-XjGxS3KTG0uNmNOK362iJua3 \ + -X DELETE \ + -H 'Authorization: Bearer YOUR_API_KEY' + python: | + import os + import openai + openai.api_key = os.getenv("OPENAI_API_KEY") + openai.File.delete("file-XjGxS3KTG0uNmNOK362iJua3") + node.js: | + const { Configuration, OpenAIApi } = require("openai"); + const configuration = new Configuration({ + apiKey: process.env.OPENAI_API_KEY, + }); + const openai = new OpenAIApi(configuration); + const response = await openai.deleteFile("file-XjGxS3KTG0uNmNOK362iJua3"); + response: | + { + "id": "file-XjGxS3KTG0uNmNOK362iJua3", + "object": "file", + "deleted": true + } + get: + operationId: retrieveFile + tags: + - OpenAI + summary: Returns information about a specific file. + parameters: + - in: path + name: file_id + required: true + schema: + type: string + description: The ID of the file to use for this request + responses: + "200": + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/OpenAIFile' + x-oaiMeta: + name: Retrieve file + group: files + path: retrieve + examples: + curl: | + curl https://api.openai.com/v1/files/file-XjGxS3KTG0uNmNOK362iJua3 \ + -H 'Authorization: Bearer YOUR_API_KEY' + python: | + import os + import openai + openai.api_key = os.getenv("OPENAI_API_KEY") + openai.File.retrieve("file-XjGxS3KTG0uNmNOK362iJua3") + node.js: | + const { Configuration, OpenAIApi } = require("openai"); + const configuration = new Configuration({ + apiKey: process.env.OPENAI_API_KEY, + }); + const openai = new OpenAIApi(configuration); + const response = await openai.retrieveFile("file-XjGxS3KTG0uNmNOK362iJua3"); + response: | + { + "id": "file-XjGxS3KTG0uNmNOK362iJua3", + "object": "file", + "bytes": 140, + "created_at": 1613779657, + "filename": "mydata.jsonl", + "purpose": "fine-tune" + } + /files/{file_id}/content: + get: + operationId: downloadFile + tags: + - OpenAI + summary: Returns the contents of the specified file + parameters: + - in: path + name: file_id + required: true + schema: + type: string + description: The ID of the file to use for this request + responses: + "200": + description: OK + content: + application/json: + schema: + type: string + x-oaiMeta: + name: Retrieve file content + group: files + path: retrieve-content + examples: + curl: | + curl https://api.openai.com/v1/files/file-XjGxS3KTG0uNmNOK362iJua3/content \ + -H 'Authorization: Bearer YOUR_API_KEY' > file.jsonl + python: | + import os + import openai + openai.api_key = os.getenv("OPENAI_API_KEY") + content = openai.File.download("file-XjGxS3KTG0uNmNOK362iJua3") + node.js: | + const { Configuration, OpenAIApi } = require("openai"); + const configuration = new Configuration({ + apiKey: process.env.OPENAI_API_KEY, + }); + const openai = new OpenAIApi(configuration); + const response = await openai.downloadFile("file-XjGxS3KTG0uNmNOK362iJua3"); + /answers: + post: + operationId: createAnswer + deprecated: true + tags: + - OpenAI + summary: | + Answers the specified question using the provided documents and examples. + The endpoint first [searches](/docs/api-reference/searches) over provided documents or files to find relevant context. The relevant context is combined with the provided examples and question to create the prompt for [completion](/docs/api-reference/completions). + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/CreateAnswerRequest' + responses: + "200": + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/CreateAnswerResponse' + x-oaiMeta: + name: Create answer + group: answers + path: create + examples: + curl: | + curl https://api.openai.com/v1/answers \ + -X POST \ + -H "Authorization: Bearer YOUR_API_KEY" \ + -H 'Content-Type: application/json' \ + -d '{ + "documents": ["Puppy A is happy.", "Puppy B is sad."], + "question": "which puppy is happy?", + "search_model": "ada", + "model": "curie", + "examples_context": "In 2017, U.S. life expectancy was 78.6 years.", + "examples": [["What is human life expectancy in the United States?","78 years."]], + "max_tokens": 5, + "stop": ["\n", "<|endoftext|>"] + }' + python: | + import os + import openai + openai.api_key = os.getenv("OPENAI_API_KEY") + openai.Answer.create( + search_model="ada", + model="curie", + question="which puppy is happy?", + documents=["Puppy A is happy.", "Puppy B is sad."], + examples_context="In 2017, U.S. life expectancy was 78.6 years.", + examples=[["What is human life expectancy in the United States?","78 years."]], + max_tokens=5, + stop=["\n", "<|endoftext|>"], + ) + node.js: | + const { Configuration, OpenAIApi } = require("openai"); + const configuration = new Configuration({ + apiKey: process.env.OPENAI_API_KEY, + }); + const openai = new OpenAIApi(configuration); + const response = await openai.createAnswer({ + search_model: "ada", + model: "curie", + question: "which puppy is happy?", + documents: ["Puppy A is happy.", "Puppy B is sad."], + examples_context: "In 2017, U.S. life expectancy was 78.6 years.", + examples: [["What is human life expectancy in the United States?","78 years."]], + max_tokens: 5, + stop: ["\n", "<|endoftext|>"], + }); + parameters: | + { + "documents": ["Puppy A is happy.", "Puppy B is sad."], + "question": "which puppy is happy?", + "search_model": "ada", + "model": "curie", + "examples_context": "In 2017, U.S. life expectancy was 78.6 years.", + "examples": [["What is human life expectancy in the United States?","78 years."]], + "max_tokens": 5, + "stop": ["\n", "<|endoftext|>"] + } + response: | + { + "answers": [ + "puppy A." + ], + "completion": "cmpl-2euVa1kmKUuLpSX600M41125Mo9NI", + "model": "curie:2020-05-03", + "object": "answer", + "search_model": "ada", + "selected_documents": [ + { + "document": 0, + "text": "Puppy A is happy. " + }, + { + "document": 1, + "text": "Puppy B is sad. " + } + ] + } + /classifications: + post: + operationId: createClassification + deprecated: true + tags: + - OpenAI + summary: | + Classifies the specified `query` using provided examples. + The endpoint first [searches](/docs/api-reference/searches) over the labeled examples + to select the ones most relevant for the particular query. Then, the relevant examples + are combined with the query to construct a prompt to produce the final label via the + [completions](/docs/api-reference/completions) endpoint. + Labeled examples can be provided via an uploaded `file`, or explicitly listed in the + request using the `examples` parameter for quick tests and small scale use cases. + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/CreateClassificationRequest' + responses: + "200": + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/CreateClassificationResponse' + x-oaiMeta: + name: Create classification + group: classifications + path: create + examples: + curl: | + curl https://api.openai.com/v1/classifications \ + -X POST \ + -H "Authorization: Bearer YOUR_API_KEY" \ + -H 'Content-Type: application/json' \ + -d '{ + "examples": [ + ["A happy moment", "Positive"], + ["I am sad.", "Negative"], + ["I am feeling awesome", "Positive"]], + "query": "It is a raining day :(", + "search_model": "ada", + "model": "curie", + "labels":["Positive", "Negative", "Neutral"] + }' + python: | + import os + import openai + openai.api_key = os.getenv("OPENAI_API_KEY") + openai.Classification.create( + search_model="ada", + model="curie", + examples=[ + ["A happy moment", "Positive"], + ["I am sad.", "Negative"], + ["I am feeling awesome", "Positive"] + ], + query="It is a raining day :(", + labels=["Positive", "Negative", "Neutral"], + ) + node.js: | + const { Configuration, OpenAIApi } = require("openai"); + const configuration = new Configuration({ + apiKey: process.env.OPENAI_API_KEY, + }); + const openai = new OpenAIApi(configuration); + const response = await openai.createClassification({ + search_model: "ada", + model: "curie", + examples: [ + ["A happy moment", "Positive"], + ["I am sad.", "Negative"], + ["I am feeling awesome", "Positive"] + ], + query:"It is a raining day :(", + labels: ["Positive", "Negative", "Neutral"], + }); + parameters: | + { + "examples": [ + ["A happy moment", "Positive"], + ["I am sad.", "Negative"], + ["I am feeling awesome", "Positive"] + ], + "labels": ["Positive", "Negative", "Neutral"], + "query": "It is a raining day :(", + "search_model": "ada", + "model": "curie" + } + response: | + { + "completion": "cmpl-2euN7lUVZ0d4RKbQqRV79IiiE6M1f", + "label": "Negative", + "model": "curie:2020-05-03", + "object": "classification", + "search_model": "ada", + "selected_examples": [ + { + "document": 1, + "label": "Negative", + "text": "I am sad." + }, + { + "document": 0, + "label": "Positive", + "text": "A happy moment" + }, + { + "document": 2, + "label": "Positive", + "text": "I am feeling awesome" + } + ] + } + /fine-tunes: + post: + operationId: createFineTune + tags: + - OpenAI + summary: | + Creates a job that fine-tunes a specified model from a given dataset. + Response includes details of the enqueued job including job status and the name of the fine-tuned models once complete. + [Learn more about Fine-tuning](/docs/guides/fine-tuning) + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/CreateFineTuneRequest' + responses: + "200": + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/FineTune' + x-oaiMeta: + name: Create fine-tune + group: fine-tunes + path: create + beta: true + examples: + curl: | + curl https://api.openai.com/v1/fine-tunes \ + -X POST \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer YOUR_API_KEY" \ + -d '{ + "training_file": "file-XGinujblHPwGLSztz8cPS8XY" + }' + python: | + import os + import openai + openai.api_key = os.getenv("OPENAI_API_KEY") + openai.FineTune.create(training_file="file-XGinujblHPwGLSztz8cPS8XY") + node.js: | + const { Configuration, OpenAIApi } = require("openai"); + const configuration = new Configuration({ + apiKey: process.env.OPENAI_API_KEY, + }); + const openai = new OpenAIApi(configuration); + const response = await openai.createFineTune({ + training_file: "file-XGinujblHPwGLSztz8cPS8XY", + }); + response: | + { + "id": "ft-AF1WoRqd3aJAHsqc9NY7iL8F", + "object": "fine-tune", + "model": "curie", + "created_at": 1614807352, + "events": [ + { + "object": "fine-tune-event", + "created_at": 1614807352, + "level": "info", + "message": "Job enqueued. Waiting for jobs ahead to complete. Queue number: 0." + } + ], + "fine_tuned_model": null, + "hyperparams": { + "batch_size": 4, + "learning_rate_multiplier": 0.1, + "n_epochs": 4, + "prompt_loss_weight": 0.1, + }, + "organization_id": "org-...", + "result_files": [], + "status": "pending", + "validation_files": [], + "training_files": [ + { + "id": "file-XGinujblHPwGLSztz8cPS8XY", + "object": "file", + "bytes": 1547276, + "created_at": 1610062281, + "filename": "my-data-train.jsonl", + "purpose": "fine-tune-train" + } + ], + "updated_at": 1614807352, + } + get: + operationId: listFineTunes + tags: + - OpenAI + summary: | + List your organization's fine-tuning jobs + responses: + "200": + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/ListFineTunesResponse' + x-oaiMeta: + name: List fine-tunes + group: fine-tunes + path: list + beta: true + examples: + curl: | + curl https://api.openai.com/v1/fine-tunes \ + -H 'Authorization: Bearer YOUR_API_KEY' + python: | + import os + import openai + openai.api_key = os.getenv("OPENAI_API_KEY") + openai.FineTune.list() + node.js: | + const { Configuration, OpenAIApi } = require("openai"); + const configuration = new Configuration({ + apiKey: process.env.OPENAI_API_KEY, + }); + const openai = new OpenAIApi(configuration); + const response = await openai.listFineTunes(); + response: | + { + "object": "list", + "data": [ + { + "id": "ft-AF1WoRqd3aJAHsqc9NY7iL8F", + "object": "fine-tune", + "model": "curie", + "created_at": 1614807352, + "fine_tuned_model": null, + "hyperparams": { ... }, + "organization_id": "org-...", + "result_files": [], + "status": "pending", + "validation_files": [], + "training_files": [ { ... } ], + "updated_at": 1614807352, + }, + { ... }, + { ... } + ] + } + /fine-tunes/{fine_tune_id}: + get: + operationId: retrieveFineTune + tags: + - OpenAI + summary: | + Gets info about the fine-tune job. + [Learn more about Fine-tuning](/docs/guides/fine-tuning) + parameters: + - in: path + name: fine_tune_id + required: true + schema: + type: string + example: + ft-AF1WoRqd3aJAHsqc9NY7iL8F + description: | + The ID of the fine-tune job + responses: + "200": + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/FineTune' + x-oaiMeta: + name: Retrieve fine-tune + group: fine-tunes + path: retrieve + beta: true + examples: + curl: | + curl https://api.openai.com/v1/fine-tunes/ft-AF1WoRqd3aJAHsqc9NY7iL8F \ + -H "Authorization: Bearer YOUR_API_KEY" + python: | + import os + import openai + openai.api_key = os.getenv("OPENAI_API_KEY") + openai.FineTune.retrieve(id="ft-AF1WoRqd3aJAHsqc9NY7iL8F") + node.js: | + const { Configuration, OpenAIApi } = require("openai"); + const configuration = new Configuration({ + apiKey: process.env.OPENAI_API_KEY, + }); + const openai = new OpenAIApi(configuration); + const response = await openai.retrieveFineTune("ft-AF1WoRqd3aJAHsqc9NY7iL8F"); + response: | + { + "id": "ft-AF1WoRqd3aJAHsqc9NY7iL8F", + "object": "fine-tune", + "model": "curie", + "created_at": 1614807352, + "events": [ + { + "object": "fine-tune-event", + "created_at": 1614807352, + "level": "info", + "message": "Job enqueued. Waiting for jobs ahead to complete. Queue number: 0." + }, + { + "object": "fine-tune-event", + "created_at": 1614807356, + "level": "info", + "message": "Job started." + }, + { + "object": "fine-tune-event", + "created_at": 1614807861, + "level": "info", + "message": "Uploaded snapshot: curie:ft-acmeco-2021-03-03-21-44-20." + }, + { + "object": "fine-tune-event", + "created_at": 1614807864, + "level": "info", + "message": "Uploaded result files: file-QQm6ZpqdNwAaVC3aSz5sWwLT." + }, + { + "object": "fine-tune-event", + "created_at": 1614807864, + "level": "info", + "message": "Job succeeded." + } + ], + "fine_tuned_model": "curie:ft-acmeco-2021-03-03-21-44-20", + "hyperparams": { + "batch_size": 4, + "learning_rate_multiplier": 0.1, + "n_epochs": 4, + "prompt_loss_weight": 0.1, + }, + "organization_id": "org-...", + "result_files": [ + { + "id": "file-QQm6ZpqdNwAaVC3aSz5sWwLT", + "object": "file", + "bytes": 81509, + "created_at": 1614807863, + "filename": "compiled_results.csv", + "purpose": "fine-tune-results" + } + ], + "status": "succeeded", + "validation_files": [], + "training_files": [ + { + "id": "file-XGinujblHPwGLSztz8cPS8XY", + "object": "file", + "bytes": 1547276, + "created_at": 1610062281, + "filename": "my-data-train.jsonl", + "purpose": "fine-tune-train" + } + ], + "updated_at": 1614807865, + } + /fine-tunes/{fine_tune_id}/cancel: + post: + operationId: cancelFineTune + tags: + - OpenAI + summary: | + Immediately cancel a fine-tune job. + parameters: + - in: path + name: fine_tune_id + required: true + schema: + type: string + example: + ft-AF1WoRqd3aJAHsqc9NY7iL8F + description: | + The ID of the fine-tune job to cancel + responses: + "200": + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/FineTune' + x-oaiMeta: + name: Cancel fine-tune + group: fine-tunes + path: cancel + beta: true + examples: + curl: | + curl https://api.openai.com/v1/fine-tunes/ft-AF1WoRqd3aJAHsqc9NY7iL8F/cancel \ + -X POST \ + -H "Authorization: Bearer YOUR_API_KEY" + python: | + import os + import openai + openai.api_key = os.getenv("OPENAI_API_KEY") + openai.FineTune.cancel(id="ft-AF1WoRqd3aJAHsqc9NY7iL8F") + node.js: | + const { Configuration, OpenAIApi } = require("openai"); + const configuration = new Configuration({ + apiKey: process.env.OPENAI_API_KEY, + }); + const openai = new OpenAIApi(configuration); + const response = await openai.cancelFineTune("ft-AF1WoRqd3aJAHsqc9NY7iL8F"); + response: | + { + "id": "ft-xhrpBbvVUzYGo8oUO1FY4nI7", + "object": "fine-tune", + "model": "curie", + "created_at": 1614807770, + "events": [ { ... } ], + "fine_tuned_model": null, + "hyperparams": { ... }, + "organization_id": "org-...", + "result_files": [], + "status": "cancelled", + "validation_files": [], + "training_files": [ + { + "id": "file-XGinujblHPwGLSztz8cPS8XY", + "object": "file", + "bytes": 1547276, + "created_at": 1610062281, + "filename": "my-data-train.jsonl", + "purpose": "fine-tune-train" + } + ], + "updated_at": 1614807789, + } + /fine-tunes/{fine_tune_id}/events: + get: + operationId: listFineTuneEvents + tags: + - OpenAI + summary: | + Get fine-grained status updates for a fine-tune job. + parameters: + - in: path + name: fine_tune_id + required: true + schema: + type: string + example: + ft-AF1WoRqd3aJAHsqc9NY7iL8F + description: | + The ID of the fine-tune job to get events for. + - in: query + name: stream + required: false + schema: + type: boolean + default: false + description: | + Whether to stream events for the fine-tune job. If set to true, + events will be sent as data-only + [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format) + as they become available. The stream will terminate with a + `data: [DONE]` message when the job is finished (succeeded, cancelled, + or failed). + If set to false, only events generated so far will be returned. + responses: + "200": + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/ListFineTuneEventsResponse' + x-oaiMeta: + name: List fine-tune events + group: fine-tunes + path: events + beta: true + examples: + curl: | + curl https://api.openai.com/v1/fine-tunes/ft-AF1WoRqd3aJAHsqc9NY7iL8F/events \ + -H "Authorization: Bearer YOUR_API_KEY" + python: | + import os + import openai + openai.api_key = os.getenv("OPENAI_API_KEY") + openai.FineTune.list_events(id="ft-AF1WoRqd3aJAHsqc9NY7iL8F") + node.js: | + const { Configuration, OpenAIApi } = require("openai"); + const configuration = new Configuration({ + apiKey: process.env.OPENAI_API_KEY, + }); + const openai = new OpenAIApi(configuration); + const response = await openai.listFineTuneEvents("ft-AF1WoRqd3aJAHsqc9NY7iL8F"); + response: | + { + "object": "list", + "data": [ + { + "object": "fine-tune-event", + "created_at": 1614807352, + "level": "info", + "message": "Job enqueued. Waiting for jobs ahead to complete. Queue number: 0." + }, + { + "object": "fine-tune-event", + "created_at": 1614807356, + "level": "info", + "message": "Job started." + }, + { + "object": "fine-tune-event", + "created_at": 1614807861, + "level": "info", + "message": "Uploaded snapshot: curie:ft-acmeco-2021-03-03-21-44-20." + }, + { + "object": "fine-tune-event", + "created_at": 1614807864, + "level": "info", + "message": "Uploaded result files: file-QQm6ZpqdNwAaVC3aSz5sWwLT." + }, + { + "object": "fine-tune-event", + "created_at": 1614807864, + "level": "info", + "message": "Job succeeded." + } + ] + } + /models: + get: + operationId: listModels + tags: + - OpenAI + summary: Lists the currently available models, and provides basic information about each one such as the owner and availability. + responses: + "200": + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/ListModelsResponse' + x-oaiMeta: + name: List models + group: models + path: list + examples: + curl: | + curl https://api.openai.com/v1/models \ + -H 'Authorization: Bearer YOUR_API_KEY' + python: | + import os + import openai + openai.api_key = os.getenv("OPENAI_API_KEY") + openai.Model.list() + node.js: | + const { Configuration, OpenAIApi } = require("openai"); + const configuration = new Configuration({ + apiKey: process.env.OPENAI_API_KEY, + }); + const openai = new OpenAIApi(configuration); + const response = await openai.listModels(); + response: | + { + "data": [ + { + "id": "model-id-0", + "object": "model", + "owned_by": "organization-owner", + "permission": [...] + }, + { + "id": "model-id-1", + "object": "model", + "owned_by": "organization-owner", + "permission": [...] + }, + { + "id": "model-id-2", + "object": "model", + "owned_by": "openai", + "permission": [...] + }, + ], + "object": "list" + } + /models/{model}: + get: + operationId: retrieveModel + tags: + - OpenAI + summary: Retrieves a model instance, providing basic information about the model such as the owner and permissioning. + parameters: + - in: path + name: model + required: true + schema: + type: string + # ideally this will be an actual ID, so this will always work from browser + example: + text-davinci-001 + description: + The ID of the model to use for this request + responses: + "200": + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/Model' + x-oaiMeta: + name: Retrieve model + group: models + path: retrieve + examples: + curl: | + curl https://api.openai.com/v1/models/VAR_model_id \ + -H 'Authorization: Bearer YOUR_API_KEY' + python: | + import os + import openai + openai.api_key = os.getenv("OPENAI_API_KEY") + openai.Model.retrieve("VAR_model_id") + node.js: | + const { Configuration, OpenAIApi } = require("openai"); + const configuration = new Configuration({ + apiKey: process.env.OPENAI_API_KEY, + }); + const openai = new OpenAIApi(configuration); + const response = await openai.retrieveModel("VAR_model_id"); + response: | + { + "id": "VAR_model_id", + "object": "model", + "owned_by": "openai", + "permission": [...] + } + delete: + operationId: deleteModel + tags: + - OpenAI + summary: Delete a fine-tuned model. You must have the Owner role in your organization. + parameters: + - in: path + name: model + required: true + schema: + type: string + example: curie:ft-acmeco-2021-03-03-21-44-20 + description: The model to delete + responses: + "200": + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/DeleteModelResponse' + x-oaiMeta: + name: Delete fine-tune model + group: fine-tunes + path: delete-model + beta: true + examples: + curl: | + curl https://api.openai.com/v1/models/curie:ft-acmeco-2021-03-03-21-44-20 \ + -X DELETE \ + -H "Authorization: Bearer YOUR_API_KEY" + python: | + import os + import openai + openai.api_key = os.getenv("OPENAI_API_KEY") + openai.Model.delete("curie:ft-acmeco-2021-03-03-21-44-20") + node.js: | + const { Configuration, OpenAIApi } = require("openai"); + const configuration = new Configuration({ + apiKey: process.env.OPENAI_API_KEY, + }); + const openai = new OpenAIApi(configuration); + const response = await openai.deleteModel('curie:ft-acmeco-2021-03-03-21-44-20'); + response: | + { + "id": "curie:ft-acmeco-2021-03-03-21-44-20", + "object": "model", + "deleted": true + } + /moderations: + post: + operationId: createModeration + tags: + - OpenAI + summary: Classifies if text violates OpenAI's Content Policy + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/CreateModerationRequest' + responses: + "200": + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/CreateModerationResponse' + x-oaiMeta: + name: Create moderation + group: moderations + path: create + examples: + curl: | + curl https://api.openai.com/v1/moderations \ + -H 'Content-Type: application/json' \ + -H 'Authorization: Bearer YOUR_API_KEY' \ + -d '{ + "input": "I want to kill them." + }' + python: | + import os + import openai + openai.api_key = os.getenv("OPENAI_API_KEY") + openai.Moderation.create( + input="I want to kill them.", + ) + node.js: | + const { Configuration, OpenAIApi } = require("openai"); + const configuration = new Configuration({ + apiKey: process.env.OPENAI_API_KEY, + }); + const openai = new OpenAIApi(configuration); + const response = await openai.createModeration({ + input: "I want to kill them.", + }); + parameters: | + { + "input": "I want to kill them." + } + response: | + { + "id": "modr-5MWoLO", + "model": "text-moderation-001", + "results": [ + { + "categories": { + "hate": false, + "hate/threatening": true, + "self-harm": false, + "sexual": false, + "sexual/minors": false, + "violence": true, + "violence/graphic": false + }, + "category_scores": { + "hate": 0.22714105248451233, + "hate/threatening": 0.4132447838783264, + "self-harm": 0.005232391878962517, + "sexual": 0.01407341007143259, + "sexual/minors": 0.0038522258400917053, + "violence": 0.9223177433013916, + "violence/graphic": 0.036865197122097015 + }, + "flagged": true + } + ] + } +components: + schemas: + ListEnginesResponse: + type: object + properties: + object: + type: string + data: + type: array + items: + $ref: '#/components/schemas/Engine' + required: + - object + - data + + ListModelsResponse: + type: object + properties: + object: + type: string + data: + type: array + items: + $ref: '#/components/schemas/Model' + required: + - object + - data + + DeleteModelResponse: + type: object + properties: + id: + type: string + object: + type: string + deleted: + type: boolean + required: + - id + - object + - deleted + + CreateCompletionRequest: + type: object + properties: + model: &model_configuration + description: ID of the model to use. You can use the [List models](/docs/api-reference/models/list) API to see all of your available models, or see our [Model overview](/docs/models/overview) for descriptions of them. + type: string + prompt: + description: &completions_prompt_description | + The prompt(s) to generate completions for, encoded as a string, array of strings, array of tokens, or array of token arrays. + + Note that <|endoftext|> is the document separator that the model sees during training, so if a prompt is not specified the model will generate as if from the beginning of a new document. + default: '<|endoftext|>' + nullable: true + oneOf: + - type: string + default: '' + example: "This is a test." + - type: array + items: + type: string + default: '' + example: "This is a test." + - type: array + minItems: 1 + items: + type: integer + example: "[1212, 318, 257, 1332, 13]" + - type: array + minItems: 1 + items: + type: array + minItems: 1 + items: + type: integer + example: "[[1212, 318, 257, 1332, 13]]" + suffix: + description: + The suffix that comes after a completion of inserted text. + default: null + nullable: true + type: string + example: "test." + max_tokens: + type: integer + minimum: 0 + default: 16 + example: 16 + nullable: true + description: &completions_max_tokens_description | + The maximum number of [tokens](/tokenizer) to generate in the completion. + + The token count of your prompt plus `max_tokens` cannot exceed the model's context length. Most models have a context length of 2048 tokens (except for the newest models, which support 4096). + temperature: + type: number + minimum: 0 + maximum: 2 + default: 1 + example: 1 + nullable: true + description: &completions_temperature_description | + What [sampling temperature](https://towardsdatascience.com/how-to-sample-from-language-models-682bceb97277) to use. Higher values means the model will take more risks. Try 0.9 for more creative applications, and 0 (argmax sampling) for ones with a well-defined answer. + + We generally recommend altering this or `top_p` but not both. + top_p: + type: number + minimum: 0 + maximum: 1 + default: 1 + example: 1 + nullable: true + description: &completions_top_p_description | + An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. + + We generally recommend altering this or `temperature` but not both. + n: + type: integer + minimum: 1 + maximum: 128 + default: 1 + example: 1 + nullable: true + description: &completions_completions_description | + How many completions to generate for each prompt. + + **Note:** Because this parameter generates many completions, it can quickly consume your token quota. Use carefully and ensure that you have reasonable settings for `max_tokens` and `stop`. + stream: + description: > + Whether to stream back partial progress. If set, tokens will be sent as data-only [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format) + as they become available, with the stream terminated by a `data: [DONE]` message. + type: boolean + nullable: true + default: false + logprobs: &completions_logprobs_configuration + type: integer + minimum: 0 + maximum: 5 + default: null + nullable: true + description: &completions_logprobs_description | + Include the log probabilities on the `logprobs` most likely tokens, as well the chosen tokens. For example, if `logprobs` is 5, the API will return a list of the 5 most likely tokens. The API will always return the `logprob` of the sampled token, so there may be up to `logprobs+1` elements in the response. + + The maximum value for `logprobs` is 5. If you need more than this, please contact us through our [Help center](https://help.openai.com) and describe your use case. + echo: + type: boolean + default: false + nullable: true + description: &completions_echo_description > + Echo back the prompt in addition to the completion + stop: + description: &completions_stop_description > + Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence. + default: null + nullable: true + oneOf: + - type: string + default: <|endoftext|> + example: "\n" + nullable: true + - type: array + minItems: 1 + maxItems: 4 + items: + type: string + example: '["\n"]' + presence_penalty: + type: number + default: 0 + minimum: -2 + maximum: 2 + nullable: true + description: &completions_presence_penalty_description | + Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics. + + [See more information about frequency and presence penalties.](/docs/api-reference/parameter-details) + frequency_penalty: + type: number + default: 0 + minimum: -2 + maximum: 2 + nullable: true + description: &completions_frequency_penalty_description | + Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. + + [See more information about frequency and presence penalties.](/docs/api-reference/parameter-details) + best_of: + type: integer + default: 1 + minimum: 0 + maximum: 20 + nullable: true + description: &completions_best_of_description | + Generates `best_of` completions server-side and returns the "best" (the one with the highest log probability per token). Results cannot be streamed. + + When used with `n`, `best_of` controls the number of candidate completions and `n` specifies how many to return – `best_of` must be greater than `n`. + + **Note:** Because this parameter generates many completions, it can quickly consume your token quota. Use carefully and ensure that you have reasonable settings for `max_tokens` and `stop`. + logit_bias: &completions_logit_bias + type: object + x-oaiTypeLabel: map + default: null + nullable: true + description: &completions_logit_bias_description | + Modify the likelihood of specified tokens appearing in the completion. + + Accepts a json object that maps tokens (specified by their token ID in the GPT tokenizer) to an associated bias value from -100 to 100. You can use this [tokenizer tool](/tokenizer?view=bpe) (which works for both GPT-2 and GPT-3) to convert text to token IDs. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model, but values between -1 and 1 should decrease or increase likelihood of selection; values like -100 or 100 should result in a ban or exclusive selection of the relevant token. + + As an example, you can pass `{"50256": -100}` to prevent the <|endoftext|> token from being generated. + user: &end_user_param_configuration + type: string + example: user-1234 + description: | + A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. [Learn more](/docs/guides/safety-best-practices/end-user-ids). + required: + - model + + CreateCompletionResponse: + type: object + properties: + id: + type: string + object: + type: string + created: + type: integer + model: + type: string + choices: + type: array + items: + type: object + properties: + text: + type: string + index: + type: integer + logprobs: + type: object + nullable: true + properties: + tokens: + type: array + items: + type: string + token_logprobs: + type: array + items: + type: number + top_logprobs: + type: array + items: + type: object + text_offset: + type: array + items: + type: integer + finish_reason: + type: string + usage: + type: object + properties: + prompt_tokens: + type: integer + completion_tokens: + type: integer + total_tokens: + type: integer + required: + - prompt_tokens + - completion_tokens + - total_tokens + required: + - id + - object + - created + - model + - choices + + CreateEditRequest: + type: object + properties: + model: *model_configuration + input: + description: + The input text to use as a starting point for the edit. + type: string + default: '' + nullable: true + example: "What day of the wek is it?" + instruction: + description: + The instruction that tells the model how to edit the prompt. + type: string + example: "Fix the spelling mistakes." + n: + type: integer + minimum: 1 + maximum: 20 + default: 1 + example: 1 + nullable: true + description: + How many edits to generate for the input and instruction. + temperature: + type: number + minimum: 0 + maximum: 2 + default: 1 + example: 1 + nullable: true + description: *completions_temperature_description + top_p: + type: number + minimum: 0 + maximum: 1 + default: 1 + example: 1 + nullable: true + description: *completions_top_p_description + required: + - model + - instruction + + CreateEditResponse: + type: object + properties: + object: + type: string + created: + type: integer + choices: + type: array + items: + type: object + properties: + text: + type: string + index: + type: integer + logprobs: + type: object + nullable: true + properties: + tokens: + type: array + items: + type: string + token_logprobs: + type: array + items: + type: number + top_logprobs: + type: array + items: + type: object + text_offset: + type: array + items: + type: integer + finish_reason: + type: string + usage: + type: object + properties: + prompt_tokens: + type: integer + completion_tokens: + type: integer + total_tokens: + type: integer + required: + - prompt_tokens + - completion_tokens + - total_tokens + required: + - object + - created + - choices + - usage + + CreateImageRequest: + type: object + properties: + prompt: + description: A text description of the desired image(s). The maximum length is 1000 characters. + type: string + example: "A cute baby sea otter" + n: &images_n + type: integer + minimum: 1 + maximum: 10 + default: 1 + example: 1 + nullable: true + description: The number of images to generate. Must be between 1 and 10. + size: &images_size + type: string + enum: ["256x256", "512x512", "1024x1024"] + default: "1024x1024" + example: "1024x1024" + nullable: true + description: The size of the generated images. Must be one of `256x256`, `512x512`, or `1024x1024`. + response_format: &images_response_format + type: string + enum: ["url", "b64_json"] + default: "url" + example: "url" + nullable: true + description: The format in which the generated images are returned. Must be one of `url` or `b64_json`. + user: *end_user_param_configuration + required: + - prompt + + ImagesResponse: + properties: + created: + type: integer + data: + type: array + items: + type: object + properties: + url: + type: string + b64_json: + type: string + required: + - created + - data + + CreateImageEditRequest: + type: object + properties: + image: + description: The image to edit. Must be a valid PNG file, less than 4MB, and square. If mask is not provided, image must have transparency, which will be used as the mask. + type: string + format: binary + mask: + description: An additional image whose fully transparent areas (e.g. where alpha is zero) indicate where `image` should be edited. Must be a valid PNG file, less than 4MB, and have the same dimensions as `image`. + type: string + format: binary + prompt: + description: A text description of the desired image(s). The maximum length is 1000 characters. + type: string + example: "A cute baby sea otter wearing a beret" + n: *images_n + size: *images_size + response_format: *images_response_format + user: *end_user_param_configuration + required: + - prompt + - image + + CreateImageVariationRequest: + type: object + properties: + image: + description: The image to use as the basis for the variation(s). Must be a valid PNG file, less than 4MB, and square. + type: string + format: binary + n: *images_n + size: *images_size + response_format: *images_response_format + user: *end_user_param_configuration + required: + - image + + CreateModerationRequest: + type: object + properties: + input: + description: The input text to classify + oneOf: + - type: string + default: '' + example: "I want to kill them." + - type: array + items: + type: string + default: '' + example: "I want to kill them." + model: + description: | + Two content moderations models are available: `text-moderation-stable` and `text-moderation-latest`. + The default is `text-moderation-latest` which will be automatically upgraded over time. This ensures you are always using our most accurate model. If you use `text-moderation-stable`, we will provide advanced notice before updating the model. Accuracy of `text-moderation-stable` may be slightly lower than for `text-moderation-latest`. + type: string + nullable: false + default: "text-moderation-latest" + example: "text-moderation-stable" + required: + - input + + CreateModerationResponse: + type: object + properties: + id: + type: string + model: + type: string + results: + type: array + items: + type: object + properties: + flagged: + type: boolean + categories: + type: object + properties: + hate: + type: boolean + hate/threatening: + type: boolean + self-harm: + type: boolean + sexual: + type: boolean + sexual/minors: + type: boolean + violence: + type: boolean + violence/graphic: + type: boolean + required: + - hate + - hate/threatening + - self-harm + - sexual + - sexual/minors + - violence + - violence/graphic + category_scores: + type: object + properties: + hate: + type: number + hate/threatening: + type: number + self-harm: + type: number + sexual: + type: number + sexual/minors: + type: number + violence: + type: number + violence/graphic: + type: number + required: + - hate + - hate/threatening + - self-harm + - sexual + - sexual/minors + - violence + - violence/graphic + required: + - flagged + - categories + - category_scores + required: + - id + - model + - results + + CreateSearchRequest: + type: object + properties: + query: + description: Query to search against the documents. + type: string + example: "the president" + minLength: 1 + documents: + description: | + Up to 200 documents to search over, provided as a list of strings. + The maximum document length (in tokens) is 2034 minus the number of tokens in the query. + You should specify either `documents` or a `file`, but not both. + type: array + minItems: 1 + maxItems: 200 + items: + type: string + nullable: true + example: "['White House', 'hospital', 'school']" + file: + description: | + The ID of an uploaded file that contains documents to search over. + You should specify either `documents` or a `file`, but not both. + type: string + nullable: true + max_rerank: + description: | + The maximum number of documents to be re-ranked and returned by search. + This flag only takes effect when `file` is set. + type: integer + minimum: 1 + default: 200 + nullable: true + return_metadata: &return_metadata_configuration + description: | + A special boolean flag for showing metadata. If set to `true`, each document entry in the returned JSON will contain a "metadata" field. + This flag only takes effect when `file` is set. + type: boolean + default: false + nullable: true + user: *end_user_param_configuration + required: + - query + + CreateSearchResponse: + type: object + properties: + object: + type: string + model: + type: string + data: + type: array + items: + type: object + properties: + object: + type: string + document: + type: integer + score: + type: number + + ListFilesResponse: + type: object + properties: + object: + type: string + data: + type: array + items: + $ref: '#/components/schemas/OpenAIFile' + required: + - object + - data + + CreateFileRequest: + type: object + additionalProperties: false + properties: + file: + description: | + Name of the [JSON Lines](https://jsonlines.readthedocs.io/en/latest/) file to be uploaded. + If the `purpose` is set to "fine-tune", each line is a JSON record with "prompt" and "completion" fields representing your [training examples](/docs/guides/fine-tuning/prepare-training-data). + type: string + format: binary + purpose: + description: | + The intended purpose of the uploaded documents. + Use "fine-tune" for [Fine-tuning](/docs/api-reference/fine-tunes). This allows us to validate the format of the uploaded file. + type: string + required: + - file + - purpose + + DeleteFileResponse: + type: object + properties: + id: + type: string + object: + type: string + deleted: + type: boolean + required: + - id + - object + - deleted + + CreateAnswerRequest: + type: object + additionalProperties: false + properties: + model: + description: ID of the model to use for completion. You can select one of `ada`, `babbage`, `curie`, or `davinci`. + type: string + question: + description: Question to get answered. + type: string + minLength: 1 + example: "What is the capital of Japan?" + examples: + description: List of (question, answer) pairs that will help steer the model towards the tone and answer format you'd like. We recommend adding 2 to 3 examples. + type: array + minItems: 1 + maxItems: 200 + items: + type: array + minItems: 2 + maxItems: 2 + items: + type: string + minLength: 1 + example: "[['What is the capital of Canada?', 'Ottawa'], ['Which province is Ottawa in?', 'Ontario']]" + examples_context: + description: A text snippet containing the contextual information used to generate the answers for the `examples` you provide. + type: string + example: "Ottawa, Canada's capital, is located in the east of southern Ontario, near the city of Montréal and the U.S. border." + documents: + description: | + List of documents from which the answer for the input `question` should be derived. If this is an empty list, the question will be answered based on the question-answer examples. + You should specify either `documents` or a `file`, but not both. + type: array + maxItems: 200 + items: + type: string + example: "['Japan is an island country in East Asia, located in the northwest Pacific Ocean.', 'Tokyo is the capital and most populous prefecture of Japan.']" + nullable: true + file: + description: | + The ID of an uploaded file that contains documents to search over. See [upload file](/docs/api-reference/files/upload) for how to upload a file of the desired format and purpose. + You should specify either `documents` or a `file`, but not both. + type: string + nullable: true + search_model: &search_model_configuration + description: ID of the model to use for [Search](/docs/api-reference/searches/create). You can select one of `ada`, `babbage`, `curie`, or `davinci`. + type: string + default: ada + nullable: true + max_rerank: + description: The maximum number of documents to be ranked by [Search](/docs/api-reference/searches/create) when using `file`. Setting it to a higher value leads to improved accuracy but with increased latency and cost. + type: integer + default: 200 + nullable: true + temperature: + description: What [sampling temperature](https://towardsdatascience.com/how-to-sample-from-language-models-682bceb97277) to use. Higher values mean the model will take more risks and value 0 (argmax sampling) works better for scenarios with a well-defined answer. + type: number + default: 0 + nullable: true + logprobs: &context_completions_logprobs_configuration + type: integer + minimum: 0 + maximum: 5 + default: null + nullable: true + description: | + Include the log probabilities on the `logprobs` most likely tokens, as well the chosen tokens. For example, if `logprobs` is 5, the API will return a list of the 5 most likely tokens. The API will always return the `logprob` of the sampled token, so there may be up to `logprobs+1` elements in the response. + The maximum value for `logprobs` is 5. If you need more than this, please contact us through our [Help center](https://help.openai.com) and describe your use case. + When `logprobs` is set, `completion` will be automatically added into `expand` to get the logprobs. + max_tokens: + description: The maximum number of tokens allowed for the generated answer + type: integer + default: 16 + nullable: true + stop: + description: *completions_stop_description + default: null + oneOf: + - type: string + default: <|endoftext|> + example: "\n" + - type: array + minItems: 1 + maxItems: 4 + items: + type: string + example: '["\n"]' + nullable: true + n: + description: How many answers to generate for each question. + type: integer + minimum: 1 + maximum: 10 + default: 1 + nullable: true + logit_bias: *completions_logit_bias + return_metadata: *return_metadata_configuration + return_prompt: &return_prompt_configuration + description: If set to `true`, the returned JSON will include a "prompt" field containing the final prompt that was used to request a completion. This is mainly useful for debugging purposes. + type: boolean + default: false + nullable: true + expand: &expand_configuration + description: If an object name is in the list, we provide the full information of the object; otherwise, we only provide the object ID. Currently we support `completion` and `file` objects for expansion. + type: array + items: {} + nullable: true + default: [] + user: *end_user_param_configuration + required: + - model + - question + - examples + - examples_context + + CreateAnswerResponse: + type: object + properties: + object: + type: string + model: + type: string + search_model: + type: string + completion: + type: string + answers: + type: array + items: + type: string + selected_documents: + type: array + items: + type: object + properties: + document: + type: integer + text: + type: string + + CreateClassificationRequest: + type: object + additionalProperties: false + properties: + model: *model_configuration + query: + description: Query to be classified. + type: string + minLength: 1 + example: "The plot is not very attractive." + examples: + description: | + A list of examples with labels, in the following format: + `[["The movie is so interesting.", "Positive"], ["It is quite boring.", "Negative"], ...]` + All the label strings will be normalized to be capitalized. + You should specify either `examples` or `file`, but not both. + type: array + minItems: 2 + maxItems: 200 + items: + type: array + minItems: 2 + maxItems: 2 + items: + type: string + minLength: 1 + example: "[['Do not see this film.', 'Negative'], ['Smart, provocative and blisteringly funny.', 'Positive']]" + nullable: true + file: + description: | + The ID of the uploaded file that contains training examples. See [upload file](/docs/api-reference/files/upload) for how to upload a file of the desired format and purpose. + You should specify either `examples` or `file`, but not both. + type: string + nullable: true + labels: + description: The set of categories being classified. If not specified, candidate labels will be automatically collected from the examples you provide. All the label strings will be normalized to be capitalized. + type: array + minItems: 2 + maxItems: 200 + default: null + items: + type: string + example: ["Positive", "Negative"] + nullable: true + search_model: *search_model_configuration + temperature: + description: + What sampling `temperature` to use. Higher values mean the model will take more risks. Try 0.9 for more creative applications, and 0 (argmax sampling) for ones with a well-defined answer. + type: number + minimum: 0 + maximum: 2 + default: 0 + nullable: true + example: 0 + logprobs: *context_completions_logprobs_configuration + max_examples: + description: The maximum number of examples to be ranked by [Search](/docs/api-reference/searches/create) when using `file`. Setting it to a higher value leads to improved accuracy but with increased latency and cost. + type: integer + default: 200 + nullable: true + logit_bias: *completions_logit_bias + return_prompt: *return_prompt_configuration + return_metadata: *return_metadata_configuration + expand: *expand_configuration + user: *end_user_param_configuration + required: + - model + - query + + CreateClassificationResponse: + type: object + properties: + object: + type: string + model: + type: string + search_model: + type: string + completion: + type: string + label: + type: string + selected_examples: + type: array + items: + type: object + properties: + document: + type: integer + text: + type: string + label: + type: string + + CreateFineTuneRequest: + type: object + properties: + training_file: + description: | + The ID of an uploaded file that contains training data. + See [upload file](/docs/api-reference/files/upload) for how to upload a file. + Your dataset must be formatted as a JSONL file, where each training + example is a JSON object with the keys "prompt" and "completion". + Additionally, you must upload your file with the purpose `fine-tune`. + See the [fine-tuning guide](/docs/guides/fine-tuning/creating-training-data) for more details. + type: string + example: "file-ajSREls59WBbvgSzJSVWxMCB" + validation_file: + description: | + The ID of an uploaded file that contains validation data. + If you provide this file, the data is used to generate validation + metrics periodically during fine-tuning. These metrics can be viewed in + the [fine-tuning results file](/docs/guides/fine-tuning/analyzing-your-fine-tuned-model). + Your train and validation data should be mutually exclusive. + Your dataset must be formatted as a JSONL file, where each validation + example is a JSON object with the keys "prompt" and "completion". + Additionally, you must upload your file with the purpose `fine-tune`. + See the [fine-tuning guide](/docs/guides/fine-tuning/creating-training-data) for more details. + type: string + nullable: true + example: "file-XjSREls59WBbvgSzJSVWxMCa" + model: + description: | + The name of the base model to fine-tune. You can select one of "ada", + "babbage", "curie", "davinci", or a fine-tuned model created after 2022-04-21. + To learn more about these models, see the + [Models](https://beta.openai.com/docs/models) documentation. + default: "curie" + type: string + nullable: true + n_epochs: + description: | + The number of epochs to train the model for. An epoch refers to one + full cycle through the training dataset. + default: 4 + type: integer + nullable: true + batch_size: + description: | + The batch size to use for training. The batch size is the number of + training examples used to train a single forward and backward pass. + By default, the batch size will be dynamically configured to be + ~0.2% of the number of examples in the training set, capped at 256 - + in general, we've found that larger batch sizes tend to work better + for larger datasets. + default: null + type: integer + nullable: true + learning_rate_multiplier: + description: | + The learning rate multiplier to use for training. + The fine-tuning learning rate is the original learning rate used for + pretraining multiplied by this value. + By default, the learning rate multiplier is the 0.05, 0.1, or 0.2 + depending on final `batch_size` (larger learning rates tend to + perform better with larger batch sizes). We recommend experimenting + with values in the range 0.02 to 0.2 to see what produces the best + results. + default: null + type: number + nullable: true + prompt_loss_weight: + description: | + The weight to use for loss on the prompt tokens. This controls how + much the model tries to learn to generate the prompt (as compared + to the completion which always has a weight of 1.0), and can add + a stabilizing effect to training when completions are short. + If prompts are extremely long (relative to completions), it may make + sense to reduce this weight so as to avoid over-prioritizing + learning the prompt. + default: 0.01 + type: number + nullable: true + compute_classification_metrics: + description: | + If set, we calculate classification-specific metrics such as accuracy + and F-1 score using the validation set at the end of every epoch. + These metrics can be viewed in the [results file](/docs/guides/fine-tuning/analyzing-your-fine-tuned-model). + In order to compute classification metrics, you must provide a + `validation_file`. Additionally, you must + specify `classification_n_classes` for multiclass classification or + `classification_positive_class` for binary classification. + type: boolean + default: false + nullable: true + classification_n_classes: + description: | + The number of classes in a classification task. + This parameter is required for multiclass classification. + type: integer + default: null + nullable: true + classification_positive_class: + description: | + The positive class in binary classification. + This parameter is needed to generate precision, recall, and F1 + metrics when doing binary classification. + type: string + default: null + nullable: true + classification_betas: + description: | + If this is provided, we calculate F-beta scores at the specified + beta values. The F-beta score is a generalization of F-1 score. + This is only used for binary classification. + With a beta of 1 (i.e. the F-1 score), precision and recall are + given the same weight. A larger beta score puts more weight on + recall and less on precision. A smaller beta score puts more weight + on precision and less on recall. + type: array + items: + type: number + example: [0.6, 1, 1.5, 2] + default: null + nullable: true + suffix: + description: | + A string of up to 40 characters that will be added to your fine-tuned model name. + For example, a `suffix` of "custom-model-name" would produce a model name like `ada:ft-your-org:custom-model-name-2022-02-15-04-21-04`. + type: string + minLength: 1 + maxLength: 40 + default: null + nullable: true + required: + - training_file + + ListFineTunesResponse: + type: object + properties: + object: + type: string + data: + type: array + items: + $ref: '#/components/schemas/FineTune' + required: + - object + - data + + ListFineTuneEventsResponse: + type: object + properties: + object: + type: string + data: + type: array + items: + $ref: '#/components/schemas/FineTuneEvent' + required: + - object + - data + + CreateEmbeddingRequest: + type: object + additionalProperties: false + properties: + model: *model_configuration + input: + description: | + Input text to get embeddings for, encoded as a string or array of tokens. To get embeddings for multiple inputs in a single request, pass an array of strings or array of token arrays. Each input must not exceed 8192 tokens in length. + example: "The quick brown fox jumped over the lazy dog" + oneOf: + - type: string + default: '' + example: "This is a test." + - type: array + items: + type: string + default: '' + example: "This is a test." + - type: array + minItems: 1 + items: + type: integer + example: "[1212, 318, 257, 1332, 13]" + - type: array + minItems: 1 + items: + type: array + minItems: 1 + items: + type: integer + example: "[[1212, 318, 257, 1332, 13]]" + user: *end_user_param_configuration + required: + - model + - input + + CreateEmbeddingResponse: + type: object + properties: + object: + type: string + model: + type: string + data: + type: array + items: + type: object + properties: + index: + type: integer + object: + type: string + embedding: + type: array + items: + type: number + required: + - index + - object + - embedding + usage: + type: object + properties: + prompt_tokens: + type: integer + total_tokens: + type: integer + required: + - prompt_tokens + - total_tokens + required: + - object + - model + - data + - usage + + Engine: + title: Engine + properties: + id: + type: string + object: + type: string + created: + type: integer + nullable: true + ready: + type: boolean + required: + - id + - object + - created + - ready + + Model: + title: Model + properties: + id: + type: string + object: + type: string + created: + type: integer + owned_by: + type: string + required: + - id + - object + - created + - owned_by + + OpenAIFile: + title: OpenAIFile + properties: + id: + type: string + object: + type: string + bytes: + type: integer + created_at: + type: integer + filename: + type: string + purpose: + type: string + status: + type: string + status_details: + type: object + nullable: true + required: + - id + - object + - bytes + - created_at + - filename + - purpose + + FineTune: + title: FineTune + properties: + id: + type: string + object: + type: string + created_at: + type: integer + updated_at: + type: integer + model: + type: string + fine_tuned_model: + type: string + nullable: true + organization_id: + type: string + status: + type: string + hyperparams: + type: object + training_files: + type: array + items: + $ref: '#/components/schemas/OpenAIFile' + validation_files: + type: array + items: + $ref: '#/components/schemas/OpenAIFile' + result_files: + type: array + items: + $ref: '#/components/schemas/OpenAIFile' + events: + type: array + items: + $ref: '#/components/schemas/FineTuneEvent' + required: + - id + - object + - created_at + - updated_at + - model + - fine_tuned_model + - organization_id + - status + - hyperparams + - training_files + - validation_files + - result_files + + FineTuneEvent: + title: FineTuneEvent + properties: + object: + type: string + created_at: + type: integer + level: + type: string + message: + type: string + required: + - object + - created_at + - level + - message + +x-oaiMeta: + groups: + - id: models + title: Models + description: | + List and describe the various models available in the API. You can refer to the [Models](/docs/models) documentation to understand what models are available and the differences between them. + - id: completions + title: Completions + description: | + Given a prompt, the model will return one or more predicted completions, and can also return the probabilities of alternative tokens at each position. + - id: edits + title: Edits + description: | + Given a prompt and an instruction, the model will return an edited version of the prompt. + - id: images + title: Images + description: | + Given a prompt and/or an input image, the model will generate a new image. + Related guide: [Image generation](/docs/guides/images) + - id: embeddings + title: Embeddings + description: | + Get a vector representation of a given input that can be easily consumed by machine learning models and algorithms. + Related guide: [Embeddings](/docs/guides/embeddings) + - id: files + title: Files + description: | + Files are used to upload documents that can be used with features like [Fine-tuning](/docs/api-reference/fine-tunes). + - id: fine-tunes + title: Fine-tunes + description: | + Manage fine-tuning jobs to tailor a model to your specific training data. + Related guide: [Fine-tune models](/docs/guides/fine-tuning) + - id: moderations + title: Moderations + description: | + Given a input text, outputs if the model classifies it as violating OpenAI's content policy. + Related guide: [Moderations](/docs/guides/moderation) + - id: searches + title: Searches + warning: + title: This endpoint is deprecated and will be removed on December 3rd, 2022 + message: We’ve developed new methods with better performance. [Learn more](https://help.openai.com/en/articles/6272952-search-transition-guide). + description: | + Given a query and a set of documents or labels, the model ranks each document based on its semantic similarity to the provided query. + Related guide: [Search](/docs/guides/search) + - id: classifications + title: Classifications + warning: + title: This endpoint is deprecated and will be removed on December 3rd, 2022 + message: We’ve developed new methods with better performance. [Learn more](https://help.openai.com/en/articles/6272941-classifications-transition-guide). + description: | + Given a query and a set of labeled examples, the model will predict the most likely label for the query. Useful as a drop-in replacement for any ML classification or text-to-label task. + Related guide: [Classification](/docs/guides/classifications) + - id: answers + title: Answers + warning: + title: This endpoint is deprecated and will be removed on December 3rd, 2022 + message: We’ve developed new methods with better performance. [Learn more](https://help.openai.com/en/articles/6233728-answers-transition-guide). + description: | + Given a question, a set of documents, and some examples, the API generates an answer to the question based on the information in the set of documents. This is useful for question-answering applications on sources of truth, like company documentation or a knowledge base. + Related guide: [Question answering](/docs/guides/answers) + - id: engines + title: Engines + description: These endpoints describe and provide access to the various engines available in the API. + warning: + title: The Engines endpoints are deprecated. + message: Please use their replacement, [Models](/docs/api-reference/models), instead. [Learn more](https://help.openai.com/TODO). \ No newline at end of file diff --git a/docs/modules/agents/agent_toolkits/openapi.ipynb b/docs/modules/agents/agent_toolkits/openapi.ipynb new file mode 100644 index 00000000..f7e12df3 --- /dev/null +++ b/docs/modules/agents/agent_toolkits/openapi.ipynb @@ -0,0 +1,242 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "85fb2c03-ab88-4c8c-97e3-a7f2954555ab", + "metadata": {}, + "source": [ + "# OpenAPI Agent\n", + "\n", + "This notebook showcases an agent designed to interact with an OpenAPI spec and make a correct API request based on the information it has gathered from the spec.\n", + "\n", + "In the below example, we are using the OpenAPI spec for the OpenAI API, which you can find [here](https://github.com/openai/openai-openapi/blob/master/openapi.yaml)." + ] + }, + { + "cell_type": "markdown", + "id": "893f90fd-f8f6-470a-a76d-1f200ba02e2f", + "metadata": {}, + "source": [ + "## Initialization" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "ff988466-c389-4ec6-b6ac-14364a537fd5", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import os\n", + "import yaml\n", + "\n", + "from langchain.agents import create_openapi_agent\n", + "from langchain.agents.agent_toolkits import OpenAPIToolkit\n", + "from langchain.llms.openai import OpenAI\n", + "from langchain.requests import RequestsWrapper\n", + "from langchain.tools.json.tool import JsonSpec" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "9ecd1ba0-3937-4359-a41e-68605f0596a1", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "with open(\"openai_openapi.yml\") as f:\n", + " data = yaml.load(f, Loader=yaml.FullLoader)\n", + "json_spec=JsonSpec(dict_=data, max_value_length=4000)\n", + "headers = {\n", + " \"Authorization\": f\"Bearer {os.getenv('OPENAI_API_KEY')}\"\n", + "}\n", + "requests_wrapper=RequestsWrapper(headers=headers)\n", + "openapi_toolkit = OpenAPIToolkit.from_llm(OpenAI(temperature=0), json_spec, requests_wrapper, verbose=True)\n", + "openapi_agent_executor = create_openapi_agent(\n", + " llm=OpenAI(temperature=0),\n", + " toolkit=openapi_toolkit,\n", + " verbose=True\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "f111879d-ae84-41f9-ad82-d3e6b72c41ba", + "metadata": {}, + "source": [ + "## Example: agent capable of analyzing OpenAPI spec and making requests" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "548db7f7-337b-4ba8-905c-e7fd58c01799", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n", + "\u001b[32;1m\u001b[1;3mAction: json_explorer\n", + "Action Input: What is the base url for the API?\u001b[0m\n", + "\n", + "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n", + "\u001b[32;1m\u001b[1;3mAction: json_spec_list_keys\n", + "Action Input: data\u001b[0m\n", + "Observation: \u001b[36;1m\u001b[1;3m['openapi', 'info', 'servers', 'tags', 'paths', 'components', 'x-oaiMeta']\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3m I should look at the servers key to see what the base url is\n", + "Action: json_spec_list_keys\n", + "Action Input: data[\"servers\"][0]\u001b[0m\n", + "Observation: \u001b[36;1m\u001b[1;3mValueError('Value at path `data[\"servers\"][0]` is not a dict, get the value directly.')\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3m I should get the value of the servers key\n", + "Action: json_spec_get_value\n", + "Action Input: data[\"servers\"][0]\u001b[0m\n", + "Observation: \u001b[33;1m\u001b[1;3m{'url': 'https://api.openai.com/v1'}\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3m I now know the base url for the API\n", + "Final Answer: The base url for the API is https://api.openai.com/v1\u001b[0m\n", + "\n", + "\u001b[1m> Finished chain.\u001b[0m\n", + "\n", + "Observation: \u001b[33;1m\u001b[1;3mThe base url for the API is https://api.openai.com/v1\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3m I should find the path for the /completions endpoint.\n", + "Action: json_explorer\n", + "Action Input: What is the path for the /completions endpoint?\u001b[0m\n", + "\n", + "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n", + "\u001b[32;1m\u001b[1;3mAction: json_spec_list_keys\n", + "Action Input: data\u001b[0m\n", + "Observation: \u001b[36;1m\u001b[1;3m['openapi', 'info', 'servers', 'tags', 'paths', 'components', 'x-oaiMeta']\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3m I should look at the paths key to see what endpoints exist\n", + "Action: json_spec_list_keys\n", + "Action Input: data[\"paths\"]\u001b[0m\n", + "Observation: \u001b[36;1m\u001b[1;3m['/engines', '/engines/{engine_id}', '/completions', '/edits', '/images/generations', '/images/edits', '/images/variations', '/embeddings', '/engines/{engine_id}/search', '/files', '/files/{file_id}', '/files/{file_id}/content', '/answers', '/classifications', '/fine-tunes', '/fine-tunes/{fine_tune_id}', '/fine-tunes/{fine_tune_id}/cancel', '/fine-tunes/{fine_tune_id}/events', '/models', '/models/{model}', '/moderations']\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3m I now know the path for the /completions endpoint\n", + "Final Answer: data[\"paths\"][2]\u001b[0m\n", + "\n", + "\u001b[1m> Finished chain.\u001b[0m\n", + "\n", + "Observation: \u001b[33;1m\u001b[1;3mdata[\"paths\"][2]\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3m I should find the required parameters for the POST request.\n", + "Action: json_explorer\n", + "Action Input: What are the required parameters for a POST request to the /completions endpoint?\u001b[0m\n", + "\n", + "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n", + "\u001b[32;1m\u001b[1;3mAction: json_spec_list_keys\n", + "Action Input: data\u001b[0m\n", + "Observation: \u001b[36;1m\u001b[1;3m['openapi', 'info', 'servers', 'tags', 'paths', 'components', 'x-oaiMeta']\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3m I should look at the paths key to see what endpoints exist\n", + "Action: json_spec_list_keys\n", + "Action Input: data[\"paths\"]\u001b[0m\n", + "Observation: \u001b[36;1m\u001b[1;3m['/engines', '/engines/{engine_id}', '/completions', '/edits', '/images/generations', '/images/edits', '/images/variations', '/embeddings', '/engines/{engine_id}/search', '/files', '/files/{file_id}', '/files/{file_id}/content', '/answers', '/classifications', '/fine-tunes', '/fine-tunes/{fine_tune_id}', '/fine-tunes/{fine_tune_id}/cancel', '/fine-tunes/{fine_tune_id}/events', '/models', '/models/{model}', '/moderations']\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3m I should look at the /completions endpoint to see what parameters are required\n", + "Action: json_spec_list_keys\n", + "Action Input: data[\"paths\"][\"/completions\"]\u001b[0m\n", + "Observation: \u001b[36;1m\u001b[1;3m['post']\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3m I should look at the post key to see what parameters are required\n", + "Action: json_spec_list_keys\n", + "Action Input: data[\"paths\"][\"/completions\"][\"post\"]\u001b[0m\n", + "Observation: \u001b[36;1m\u001b[1;3m['operationId', 'tags', 'summary', 'requestBody', 'responses', 'x-oaiMeta']\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3m I should look at the requestBody key to see what parameters are required\n", + "Action: json_spec_list_keys\n", + "Action Input: data[\"paths\"][\"/completions\"][\"post\"][\"requestBody\"]\u001b[0m\n", + "Observation: \u001b[36;1m\u001b[1;3m['required', 'content']\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3m I should look at the content key to see what parameters are required\n", + "Action: json_spec_list_keys\n", + "Action Input: data[\"paths\"][\"/completions\"][\"post\"][\"requestBody\"][\"content\"]\u001b[0m\n", + "Observation: \u001b[36;1m\u001b[1;3m['application/json']\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3m I should look at the application/json key to see what parameters are required\n", + "Action: json_spec_list_keys\n", + "Action Input: data[\"paths\"][\"/completions\"][\"post\"][\"requestBody\"][\"content\"][\"application/json\"]\u001b[0m\n", + "Observation: \u001b[36;1m\u001b[1;3m['schema']\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3m I should look at the schema key to see what parameters are required\n", + "Action: json_spec_list_keys\n", + "Action Input: data[\"paths\"][\"/completions\"][\"post\"][\"requestBody\"][\"content\"][\"application/json\"][\"schema\"]\u001b[0m\n", + "Observation: \u001b[36;1m\u001b[1;3m['$ref']\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3m I should look at the $ref key to see what parameters are required\n", + "Action: json_spec_list_keys\n", + "Action Input: data[\"paths\"][\"/completions\"][\"post\"][\"requestBody\"][\"content\"][\"application/json\"][\"schema\"][\"$ref\"]\u001b[0m\n", + "Observation: \u001b[36;1m\u001b[1;3mValueError('Value at path `data[\"paths\"][\"/completions\"][\"post\"][\"requestBody\"][\"content\"][\"application/json\"][\"schema\"][\"$ref\"]` is not a dict, get the value directly.')\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3m I should look at the $ref key to get the value directly\n", + "Action: json_spec_get_value\n", + "Action Input: data[\"paths\"][\"/completions\"][\"post\"][\"requestBody\"][\"content\"][\"application/json\"][\"schema\"][\"$ref\"]\u001b[0m\n", + "Observation: \u001b[33;1m\u001b[1;3m#/components/schemas/CreateCompletionRequest\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3m I should look at the CreateCompletionRequest schema to see what parameters are required\n", + "Action: json_spec_list_keys\n", + "Action Input: data[\"components\"][\"schemas\"][\"CreateCompletionRequest\"]\u001b[0m\n", + "Observation: \u001b[36;1m\u001b[1;3m['type', 'properties', 'required']\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3m I should look at the required key to see what parameters are required\n", + "Action: json_spec_get_value\n", + "Action Input: data[\"components\"][\"schemas\"][\"CreateCompletionRequest\"][\"required\"]\u001b[0m\n", + "Observation: \u001b[33;1m\u001b[1;3m['model']\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n", + "Final Answer: The required parameters for a POST request to the /completions endpoint are 'model'.\u001b[0m\n", + "\n", + "\u001b[1m> Finished chain.\u001b[0m\n", + "\n", + "Observation: \u001b[33;1m\u001b[1;3mThe required parameters for a POST request to the /completions endpoint are 'model'.\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3m I now know the parameters needed to make the request.\n", + "Action: requests_post\n", + "Action Input: { \"url\": \"https://api.openai.com/v1/completions\", \"data\": { \"model\": \"davinci\", \"prompt\": \"tell me a joke\" } }\u001b[0m\n", + "Observation: \u001b[33;1m\u001b[1;3m{\"id\":\"cmpl-6oeEcNETfq8TOuIUQvAct6NrBXihs\",\"object\":\"text_completion\",\"created\":1677529082,\"model\":\"davinci\",\"choices\":[{\"text\":\"\\n\\n\\n\\nLove is a battlefield\\n\\n\\n\\nIt's me...And some\",\"index\":0,\"logprobs\":null,\"finish_reason\":\"length\"}],\"usage\":{\"prompt_tokens\":4,\"completion_tokens\":16,\"total_tokens\":20}}\n", + "\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3m I now know the final answer.\n", + "Final Answer: Love is a battlefield. It's me...And some.\u001b[0m\n", + "\n", + "\u001b[1m> Finished chain.\u001b[0m\n" + ] + }, + { + "data": { + "text/plain": [ + "\"Love is a battlefield. It's me...And some.\"" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "openapi_agent_executor.run(\"Make a post request to openai /completions. The prompt should be 'tell me a joke.'\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6ec9582b", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/modules/agents/agent_toolkits/python.ipynb b/docs/modules/agents/agent_toolkits/python.ipynb new file mode 100644 index 00000000..f6aa7bbd --- /dev/null +++ b/docs/modules/agents/agent_toolkits/python.ipynb @@ -0,0 +1,210 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "82a4c2cc-20ea-4b20-a565-63e905dee8ff", + "metadata": {}, + "source": [ + "## Python Agent\n", + "\n", + "This notebook showcases an agent designed to write and execute python code to answer a question." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "f98e9c90-5c37-4fb9-af3e-d09693af8543", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "from langchain.agents.agent_toolkits import create_python_agent\n", + "from langchain.tools.python.tool import PythonREPLTool\n", + "from langchain.python import PythonREPL\n", + "from langchain.llms.openai import OpenAI" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "cc422f53-c51c-4694-a834-72ecd1e68363", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "agent_executor = create_python_agent(\n", + " llm=OpenAI(temperature=0, max_tokens=1000),\n", + " tool=PythonREPLTool(),\n", + " verbose=True\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "25cd4f92-ea9b-4fe6-9838-a4f85f81eebe", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n", + "\u001b[32;1m\u001b[1;3m I need to calculate the 10th fibonacci number\n", + "Action: Python REPL\n", + "Action Input: def fibonacci(n):\n", + " if n == 0:\n", + " return 0\n", + " elif n == 1:\n", + " return 1\n", + " else:\n", + " return fibonacci(n-1) + fibonacci(n-2)\u001b[0m\n", + "Observation: \u001b[36;1m\u001b[1;3m\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3m I need to call the function with 10 as the argument\n", + "Action: Python REPL\n", + "Action Input: fibonacci(10)\u001b[0m\n", + "Observation: \u001b[36;1m\u001b[1;3m\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n", + "Final Answer: 55\u001b[0m\n", + "\n", + "\u001b[1m> Finished chain.\u001b[0m\n" + ] + }, + { + "data": { + "text/plain": [ + "'55'" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "agent_executor.run(\"What is the 10th fibonacci number?\")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "4b9f60e7-eb6a-4f14-8604-498d863d4482", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n", + "\u001b[32;1m\u001b[1;3m I need to write a neural network in PyTorch and train it on the given data.\n", + "Action: Python REPL\n", + "Action Input: \n", + "import torch\n", + "\n", + "# Define the model\n", + "model = torch.nn.Sequential(\n", + " torch.nn.Linear(1, 1)\n", + ")\n", + "\n", + "# Define the loss\n", + "loss_fn = torch.nn.MSELoss()\n", + "\n", + "# Define the optimizer\n", + "optimizer = torch.optim.SGD(model.parameters(), lr=0.01)\n", + "\n", + "# Define the data\n", + "x_data = torch.tensor([[1.0], [2.0], [3.0], [4.0]])\n", + "y_data = torch.tensor([[2.0], [4.0], [6.0], [8.0]])\n", + "\n", + "# Train the model\n", + "for epoch in range(1000):\n", + " # Forward pass\n", + " y_pred = model(x_data)\n", + "\n", + " # Compute and print loss\n", + " loss = loss_fn(y_pred, y_data)\n", + " if (epoch+1) % 100 == 0:\n", + " print(f'Epoch {epoch+1}: loss = {loss.item():.4f}')\n", + "\n", + " # Zero the gradients\n", + " optimizer.zero_grad()\n", + "\n", + " # Backward pass\n", + " loss.backward()\n", + "\n", + " # Update the weights\n", + " optimizer.step()\n", + "\u001b[0m\n", + "Observation: \u001b[36;1m\u001b[1;3mEpoch 100: loss = 0.0013\n", + "Epoch 200: loss = 0.0007\n", + "Epoch 300: loss = 0.0004\n", + "Epoch 400: loss = 0.0002\n", + "Epoch 500: loss = 0.0001\n", + "Epoch 600: loss = 0.0001\n", + "Epoch 700: loss = 0.0000\n", + "Epoch 800: loss = 0.0000\n", + "Epoch 900: loss = 0.0000\n", + "Epoch 1000: loss = 0.0000\n", + "\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n", + "Final Answer: The prediction for x = 5 is 10.0.\u001b[0m\n", + "\n", + "\u001b[1m> Finished chain.\u001b[0m\n" + ] + }, + { + "data": { + "text/plain": [ + "'The prediction for x = 5 is 10.0.'" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "agent_executor.run(\"\"\"Understand, write a single neuron neural network in PyTorch.\n", + "Take synthetic data for y=2x. Train for 1000 epochs and print every 100 epochs.\n", + "Return prediction for x = 5\"\"\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "eb654671", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/modules/agents/agent_toolkits/sql_database.ipynb b/docs/modules/agents/agent_toolkits/sql_database.ipynb new file mode 100644 index 00000000..f9cb0895 --- /dev/null +++ b/docs/modules/agents/agent_toolkits/sql_database.ipynb @@ -0,0 +1,527 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "0e499e90-7a6d-4fab-8aab-31a4df417601", + "metadata": {}, + "source": [ + "# SQL Database Agent\n", + "\n", + "This notebook showcases an agent designed to interact with a sql databases. The agent builds off of [SQLDatabaseChain](https://langchain.readthedocs.io/en/latest/modules/chains/examples/sqlite.html) and is designed to answer more general questions about a database, as well as recover from errors.\n", + "\n", + "Note that, as this agent is in active development, all answers might not be correct. Additionally, it is not guaranteed that the agent won't perform DML statements on your database given certain questions. Be careful running it on sensitive data!\n", + "\n", + "This uses the example Chinook database. To set it up follow the instructions on https://database.guide/2-sample-databases-sqlite/, placing the .db file in a notebooks folder at the root of this repository." + ] + }, + { + "cell_type": "markdown", + "id": "ec927ac6-9b2a-4e8a-9a6e-3e429191875c", + "metadata": { + "tags": [] + }, + "source": [ + "## Initialization" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "53422913-967b-4f2a-8022-00269c1be1b1", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "from langchain.agents import create_sql_agent\n", + "from langchain.agents.agent_toolkits import SQLDatabaseToolkit\n", + "from langchain.sql_database import SQLDatabase\n", + "from langchain.llms.openai import OpenAI\n", + "from langchain.agents import AgentExecutor" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "090f3699-79c6-4ce1-ab96-a94f0121fd64", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "db = SQLDatabase.from_uri(\"sqlite:///../../../../notebooks/Chinook.db\")\n", + "toolkit = SQLDatabaseToolkit(db=db)\n", + "\n", + "agent_executor = create_sql_agent(\n", + " llm=OpenAI(temperature=0),\n", + " toolkit=toolkit,\n", + " verbose=True\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "36ae48c7-cb08-4fef-977e-c7d4b96a464b", + "metadata": {}, + "source": [ + "## Example: describing a table" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "ff70e83d-5ad0-4fc7-bb96-27d82ac166d7", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n", + "\u001b[32;1m\u001b[1;3mAction: list_tables_sql_db\n", + "Action Input: \"\"\u001b[0m\n", + "Observation: \u001b[38;5;200m\u001b[1;3mArtist, Invoice, Playlist, Genre, Album, PlaylistTrack, Track, InvoiceLine, MediaType, Employee, Customer\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3m I should look at the schema of the playlisttrack table\n", + "Action: schema_sql_db\n", + "Action Input: \"PlaylistTrack\"\u001b[0m\n", + "Observation: \u001b[33;1m\u001b[1;3m\n", + "CREATE TABLE \"PlaylistTrack\" (\n", + "\t\"PlaylistId\" INTEGER NOT NULL, \n", + "\t\"TrackId\" INTEGER NOT NULL, \n", + "\tPRIMARY KEY (\"PlaylistId\", \"TrackId\"), \n", + "\tFOREIGN KEY(\"TrackId\") REFERENCES \"Track\" (\"TrackId\"), \n", + "\tFOREIGN KEY(\"PlaylistId\") REFERENCES \"Playlist\" (\"PlaylistId\")\n", + ")\n", + "\n", + "SELECT * FROM 'PlaylistTrack' LIMIT 3;\n", + "PlaylistId TrackId\n", + "1 3402\n", + "1 3389\n", + "1 3390\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n", + "Final Answer: The PlaylistTrack table has two columns, PlaylistId and TrackId, and is linked to the Playlist and Track tables.\u001b[0m\n", + "\n", + "\u001b[1m> Finished chain.\u001b[0m\n" + ] + }, + { + "data": { + "text/plain": [ + "'The PlaylistTrack table has two columns, PlaylistId and TrackId, and is linked to the Playlist and Track tables.'" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "agent_executor.run(\"Describe the playlisttrack table\")" + ] + }, + { + "cell_type": "markdown", + "id": "9abcfe8e-1868-42a4-8345-ad2d9b44c681", + "metadata": {}, + "source": [ + "## Example: describing a table, recovering from an error\n", + "\n", + "In this example, the agent tries to search for a table that doesn't exist, but finds the next best result" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "bea76658-a65b-47e2-b294-6d52c5556246", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n", + "\u001b[32;1m\u001b[1;3mAction: list_tables_sql_db\n", + "Action Input: \"\"\u001b[0m\n", + "Observation: \u001b[38;5;200m\u001b[1;3mGenre, PlaylistTrack, MediaType, Invoice, InvoiceLine, Track, Playlist, Customer, Album, Employee, Artist\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3m I should look at the schema of the PlaylistSong table\n", + "Action: schema_sql_db\n", + "Action Input: \"PlaylistSong\"\u001b[0m\n", + "Observation: \u001b[33;1m\u001b[1;3mError: table_names {'PlaylistSong'} not found in database\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3m I should check the spelling of the table\n", + "Action: list_tables_sql_db\n", + "Action Input: \"\"\u001b[0m\n", + "Observation: \u001b[38;5;200m\u001b[1;3mGenre, PlaylistTrack, MediaType, Invoice, InvoiceLine, Track, Playlist, Customer, Album, Employee, Artist\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3m The table is called PlaylistTrack\n", + "Action: schema_sql_db\n", + "Action Input: \"PlaylistTrack\"\u001b[0m\n", + "Observation: \u001b[33;1m\u001b[1;3m\n", + "CREATE TABLE \"PlaylistTrack\" (\n", + "\t\"PlaylistId\" INTEGER NOT NULL, \n", + "\t\"TrackId\" INTEGER NOT NULL, \n", + "\tPRIMARY KEY (\"PlaylistId\", \"TrackId\"), \n", + "\tFOREIGN KEY(\"TrackId\") REFERENCES \"Track\" (\"TrackId\"), \n", + "\tFOREIGN KEY(\"PlaylistId\") REFERENCES \"Playlist\" (\"PlaylistId\")\n", + ")\n", + "\n", + "SELECT * FROM 'PlaylistTrack' LIMIT 3;\n", + "PlaylistId TrackId\n", + "1 3402\n", + "1 3389\n", + "1 3390\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n", + "Final Answer: The PlaylistTrack table contains two columns, PlaylistId and TrackId, which are both integers and are used to link Playlist and Track tables.\u001b[0m\n", + "\n", + "\u001b[1m> Finished chain.\u001b[0m\n" + ] + }, + { + "data": { + "text/plain": [ + "'The PlaylistTrack table contains two columns, PlaylistId and TrackId, which are both integers and are used to link Playlist and Track tables.'" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "agent_executor.run(\"Describe the playlistsong table\")" + ] + }, + { + "cell_type": "markdown", + "id": "6fbc26af-97e4-4a21-82aa-48bdc992da26", + "metadata": {}, + "source": [ + "## Example: running queries" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "17bea710-4a23-4de0-b48e-21d57be48293", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n", + "\u001b[32;1m\u001b[1;3mAction: list_tables_sql_db\n", + "Action Input: \"\"\u001b[0m\n", + "Observation: \u001b[38;5;200m\u001b[1;3mInvoice, MediaType, Artist, InvoiceLine, Genre, Playlist, Employee, Album, PlaylistTrack, Track, Customer\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3m I should look at the schema of the relevant tables to see what columns I can use.\n", + "Action: schema_sql_db\n", + "Action Input: \"Invoice, Customer\"\u001b[0m\n", + "Observation: \u001b[33;1m\u001b[1;3m\n", + "CREATE TABLE \"Customer\" (\n", + "\t\"CustomerId\" INTEGER NOT NULL, \n", + "\t\"FirstName\" NVARCHAR(40) NOT NULL, \n", + "\t\"LastName\" NVARCHAR(20) NOT NULL, \n", + "\t\"Company\" NVARCHAR(80), \n", + "\t\"Address\" NVARCHAR(70), \n", + "\t\"City\" NVARCHAR(40), \n", + "\t\"State\" NVARCHAR(40), \n", + "\t\"Country\" NVARCHAR(40), \n", + "\t\"PostalCode\" NVARCHAR(10), \n", + "\t\"Phone\" NVARCHAR(24), \n", + "\t\"Fax\" NVARCHAR(24), \n", + "\t\"Email\" NVARCHAR(60) NOT NULL, \n", + "\t\"SupportRepId\" INTEGER, \n", + "\tPRIMARY KEY (\"CustomerId\"), \n", + "\tFOREIGN KEY(\"SupportRepId\") REFERENCES \"Employee\" (\"EmployeeId\")\n", + ")\n", + "\n", + "SELECT * FROM 'Customer' LIMIT 3;\n", + "CustomerId FirstName LastName Company Address City State Country PostalCode Phone Fax Email SupportRepId\n", + "1 Luís Gonçalves Embraer - Empresa Brasileira de Aeronáutica S.A. Av. Brigadeiro Faria Lima, 2170 São José dos Campos SP Brazil 12227-000 +55 (12) 3923-5555 +55 (12) 3923-5566 luisg@embraer.com.br 3\n", + "2 Leonie Köhler None Theodor-Heuss-Straße 34 Stuttgart None Germany 70174 +49 0711 2842222 None leonekohler@surfeu.de 5\n", + "3 François Tremblay None 1498 rue Bélanger Montréal QC Canada H2G 1A7 +1 (514) 721-4711 None ftremblay@gmail.com 3\n", + "\n", + "\n", + "CREATE TABLE \"Invoice\" (\n", + "\t\"InvoiceId\" INTEGER NOT NULL, \n", + "\t\"CustomerId\" INTEGER NOT NULL, \n", + "\t\"InvoiceDate\" DATETIME NOT NULL, \n", + "\t\"BillingAddress\" NVARCHAR(70), \n", + "\t\"BillingCity\" NVARCHAR(40), \n", + "\t\"BillingState\" NVARCHAR(40), \n", + "\t\"BillingCountry\" NVARCHAR(40), \n", + "\t\"BillingPostalCode\" NVARCHAR(10), \n", + "\t\"Total\" NUMERIC(10, 2) NOT NULL, \n", + "\tPRIMARY KEY (\"InvoiceId\"), \n", + "\tFOREIGN KEY(\"CustomerId\") REFERENCES \"Customer\" (\"CustomerId\")\n", + ")\n", + "\n", + "SELECT * FROM 'Invoice' LIMIT 3;\n", + "InvoiceId CustomerId InvoiceDate BillingAddress BillingCity BillingState BillingCountry BillingPostalCode Total\n", + "1 2 2009-01-01 00:00:00 Theodor-Heuss-Straße 34 Stuttgart None Germany 70174 1.98\n", + "2 4 2009-01-02 00:00:00 Ullevålsveien 14 Oslo None Norway 0171 3.96\n", + "3 8 2009-01-03 00:00:00 Grétrystraat 63 Brussels None Belgium 1000 5.94\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3m I should query the Invoice and Customer tables to get the total sales per country.\n", + "Action: query_sql_db\n", + "Action Input: SELECT c.Country, SUM(i.Total) AS TotalSales FROM Invoice i INNER JOIN Customer c ON i.CustomerId = c.CustomerId GROUP BY c.Country ORDER BY TotalSales DESC LIMIT 10\u001b[0m\n", + "Observation: \u001b[36;1m\u001b[1;3m[('USA', 523.0600000000003), ('Canada', 303.9599999999999), ('France', 195.09999999999994), ('Brazil', 190.09999999999997), ('Germany', 156.48), ('United Kingdom', 112.85999999999999), ('Czech Republic', 90.24000000000001), ('Portugal', 77.23999999999998), ('India', 75.25999999999999), ('Chile', 46.62)]\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n", + "Final Answer: The customers from the USA spent the most, with a total of $523.06.\u001b[0m\n", + "\n", + "\u001b[1m> Finished chain.\u001b[0m\n" + ] + }, + { + "data": { + "text/plain": [ + "'The customers from the USA spent the most, with a total of $523.06.'" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "agent_executor.run(\"List the total sales per country. Which country's customers spent the most?\")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "474dddda-c067-4eeb-98b1-e763ee78b18c", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n", + "\u001b[32;1m\u001b[1;3mAction: list_tables_sql_db\n", + "Action Input: \"\"\u001b[0m\n", + "Observation: \u001b[38;5;200m\u001b[1;3mInvoice, MediaType, Artist, InvoiceLine, Genre, Playlist, Employee, Album, PlaylistTrack, Track, Customer\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3m I should look at the schema of the Playlist and PlaylistTrack tables to see what columns I can use.\n", + "Action: schema_sql_db\n", + "Action Input: \"Playlist, PlaylistTrack\"\u001b[0m\n", + "Observation: \u001b[33;1m\u001b[1;3m\n", + "CREATE TABLE \"Playlist\" (\n", + "\t\"PlaylistId\" INTEGER NOT NULL, \n", + "\t\"Name\" NVARCHAR(120), \n", + "\tPRIMARY KEY (\"PlaylistId\")\n", + ")\n", + "\n", + "SELECT * FROM 'Playlist' LIMIT 3;\n", + "PlaylistId Name\n", + "1 Music\n", + "2 Movies\n", + "3 TV Shows\n", + "\n", + "\n", + "CREATE TABLE \"PlaylistTrack\" (\n", + "\t\"PlaylistId\" INTEGER NOT NULL, \n", + "\t\"TrackId\" INTEGER NOT NULL, \n", + "\tPRIMARY KEY (\"PlaylistId\", \"TrackId\"), \n", + "\tFOREIGN KEY(\"TrackId\") REFERENCES \"Track\" (\"TrackId\"), \n", + "\tFOREIGN KEY(\"PlaylistId\") REFERENCES \"Playlist\" (\"PlaylistId\")\n", + ")\n", + "\n", + "SELECT * FROM 'PlaylistTrack' LIMIT 3;\n", + "PlaylistId TrackId\n", + "1 3402\n", + "1 3389\n", + "1 3390\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3m I can use a SELECT statement to get the total number of tracks in each playlist.\n", + "Action: query_checker_sql_db\n", + "Action Input: SELECT Playlist.Name, COUNT(PlaylistTrack.TrackId) AS TotalTracks FROM Playlist INNER JOIN PlaylistTrack ON Playlist.PlaylistId = PlaylistTrack.PlaylistId GROUP BY Playlist.Name\u001b[0m\n", + "Observation: \u001b[31;1m\u001b[1;3m\n", + "\n", + "SELECT Playlist.Name, COUNT(PlaylistTrack.TrackId) AS TotalTracks FROM Playlist INNER JOIN PlaylistTrack ON Playlist.PlaylistId = PlaylistTrack.PlaylistId GROUP BY Playlist.Name\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3m The query looks correct, I can now execute it.\n", + "Action: query_sql_db\n", + "Action Input: SELECT Playlist.Name, COUNT(PlaylistTrack.TrackId) AS TotalTracks FROM Playlist INNER JOIN PlaylistTrack ON Playlist.PlaylistId = PlaylistTrack.PlaylistId GROUP BY Playlist.Name LIMIT 10\u001b[0m\n", + "Observation: \u001b[36;1m\u001b[1;3m[('90’s Music', 1477), ('Brazilian Music', 39), ('Classical', 75), ('Classical 101 - Deep Cuts', 25), ('Classical 101 - Next Steps', 25), ('Classical 101 - The Basics', 25), ('Grunge', 15), ('Heavy Metal Classic', 26), ('Music', 6580), ('Music Videos', 1)]\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3m I now know the final answer.\n", + "Final Answer: The total number of tracks in each playlist are: '90’s Music' (1477), 'Brazilian Music' (39), 'Classical' (75), 'Classical 101 - Deep Cuts' (25), 'Classical 101 - Next Steps' (25), 'Classical 101 - The Basics' (25), 'Grunge' (15), 'Heavy Metal Classic' (26), 'Music' (6580), 'Music Videos' (1).\u001b[0m\n", + "\n", + "\u001b[1m> Finished chain.\u001b[0m\n" + ] + }, + { + "data": { + "text/plain": [ + "\"The total number of tracks in each playlist are: '90’s Music' (1477), 'Brazilian Music' (39), 'Classical' (75), 'Classical 101 - Deep Cuts' (25), 'Classical 101 - Next Steps' (25), 'Classical 101 - The Basics' (25), 'Grunge' (15), 'Heavy Metal Classic' (26), 'Music' (6580), 'Music Videos' (1).\"" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "agent_executor.run(\"Show the total number of tracks in each playlist. The Playlist name should be included in the result.\")" + ] + }, + { + "cell_type": "markdown", + "id": "7c7503b5-d9d9-4faa-b064-29fcdb5ff213", + "metadata": {}, + "source": [ + "## Recovering from an error\n", + "\n", + "In this example, the agent is able to recover from an error after initially trying to access an attribute (`Track.ArtistId`) which doesn't exist." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "9fe4901e-f9e1-4022-b6bc-80e2b2d6a3a4", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n", + "\u001b[32;1m\u001b[1;3mAction: list_tables_sql_db\n", + "Action Input: \"\"\u001b[0m\n", + "Observation: \u001b[38;5;200m\u001b[1;3mMediaType, Track, Invoice, Album, Playlist, Customer, Employee, InvoiceLine, PlaylistTrack, Genre, Artist\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3m I should look at the schema of the Artist, InvoiceLine, and Track tables to see what columns I can use.\n", + "Action: schema_sql_db\n", + "Action Input: \"Artist, InvoiceLine, Track\"\u001b[0m\n", + "Observation: \u001b[33;1m\u001b[1;3m\n", + "CREATE TABLE \"Artist\" (\n", + "\t\"ArtistId\" INTEGER NOT NULL, \n", + "\t\"Name\" NVARCHAR(120), \n", + "\tPRIMARY KEY (\"ArtistId\")\n", + ")\n", + "\n", + "SELECT * FROM 'Artist' LIMIT 3;\n", + "ArtistId Name\n", + "1 AC/DC\n", + "2 Accept\n", + "3 Aerosmith\n", + "\n", + "\n", + "CREATE TABLE \"Track\" (\n", + "\t\"TrackId\" INTEGER NOT NULL, \n", + "\t\"Name\" NVARCHAR(200) NOT NULL, \n", + "\t\"AlbumId\" INTEGER, \n", + "\t\"MediaTypeId\" INTEGER NOT NULL, \n", + "\t\"GenreId\" INTEGER, \n", + "\t\"Composer\" NVARCHAR(220), \n", + "\t\"Milliseconds\" INTEGER NOT NULL, \n", + "\t\"Bytes\" INTEGER, \n", + "\t\"UnitPrice\" NUMERIC(10, 2) NOT NULL, \n", + "\tPRIMARY KEY (\"TrackId\"), \n", + "\tFOREIGN KEY(\"MediaTypeId\") REFERENCES \"MediaType\" (\"MediaTypeId\"), \n", + "\tFOREIGN KEY(\"GenreId\") REFERENCES \"Genre\" (\"GenreId\"), \n", + "\tFOREIGN KEY(\"AlbumId\") REFERENCES \"Album\" (\"AlbumId\")\n", + ")\n", + "\n", + "SELECT * FROM 'Track' LIMIT 3;\n", + "TrackId Name AlbumId MediaTypeId GenreId Composer Milliseconds Bytes UnitPrice\n", + "1 For Those About To Rock (We Salute You) 1 1 1 Angus Young, Malcolm Young, Brian Johnson 343719 11170334 0.99\n", + "2 Balls to the Wall 2 2 1 None 342562 5510424 0.99\n", + "3 Fast As a Shark 3 2 1 F. Baltes, S. Kaufman, U. Dirkscneider & W. Hoffman 230619 3990994 0.99\n", + "\n", + "\n", + "CREATE TABLE \"InvoiceLine\" (\n", + "\t\"InvoiceLineId\" INTEGER NOT NULL, \n", + "\t\"InvoiceId\" INTEGER NOT NULL, \n", + "\t\"TrackId\" INTEGER NOT NULL, \n", + "\t\"UnitPrice\" NUMERIC(10, 2) NOT NULL, \n", + "\t\"Quantity\" INTEGER NOT NULL, \n", + "\tPRIMARY KEY (\"InvoiceLineId\"), \n", + "\tFOREIGN KEY(\"TrackId\") REFERENCES \"Track\" (\"TrackId\"), \n", + "\tFOREIGN KEY(\"InvoiceId\") REFERENCES \"Invoice\" (\"InvoiceId\")\n", + ")\n", + "\n", + "SELECT * FROM 'InvoiceLine' LIMIT 3;\n", + "InvoiceLineId InvoiceId TrackId UnitPrice Quantity\n", + "1 1 2 0.99 1\n", + "2 1 4 0.99 1\n", + "3 2 6 0.99 1\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3m I should query the database to get the top 3 best selling artists.\n", + "Action: query_sql_db\n", + "Action Input: SELECT Artist.Name, SUM(InvoiceLine.Quantity) AS TotalQuantity FROM Artist INNER JOIN Track ON Artist.ArtistId = Track.ArtistId INNER JOIN InvoiceLine ON Track.TrackId = InvoiceLine.TrackId GROUP BY Artist.Name ORDER BY TotalQuantity DESC LIMIT 3\u001b[0m\n", + "Observation: \u001b[36;1m\u001b[1;3mError: (sqlite3.OperationalError) no such column: Track.ArtistId\n", + "[SQL: SELECT Artist.Name, SUM(InvoiceLine.Quantity) AS TotalQuantity FROM Artist INNER JOIN Track ON Artist.ArtistId = Track.ArtistId INNER JOIN InvoiceLine ON Track.TrackId = InvoiceLine.TrackId GROUP BY Artist.Name ORDER BY TotalQuantity DESC LIMIT 3]\n", + "(Background on this error at: https://sqlalche.me/e/14/e3q8)\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3m I should double check my query before executing it.\n", + "Action: query_checker_sql_db\n", + "Action Input: SELECT Artist.Name, SUM(InvoiceLine.Quantity) AS TotalQuantity FROM Artist INNER JOIN Track ON Artist.ArtistId = Track.ArtistId INNER JOIN InvoiceLine ON Track.TrackId = InvoiceLine.TrackId GROUP BY Artist.Name ORDER BY TotalQuantity DESC LIMIT 3\u001b[0m\n", + "Observation: \u001b[31;1m\u001b[1;3m\n", + "\n", + "SELECT Artist.Name, SUM(InvoiceLine.Quantity) AS TotalQuantity \n", + "FROM Artist \n", + "INNER JOIN Track ON Artist.ArtistId = Track.ArtistId \n", + "INNER JOIN InvoiceLine ON Track.TrackId = InvoiceLine.TrackId \n", + "GROUP BY Artist.Name \n", + "ORDER BY TotalQuantity DESC \n", + "LIMIT 3;\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3m I now know the final answer.\n", + "Action: query_sql_db\n", + "Action Input: SELECT Artist.Name, SUM(InvoiceLine.Quantity) AS TotalQuantity FROM Artist INNER JOIN Album ON Artist.ArtistId = Album.ArtistId INNER JOIN Track ON Album.AlbumId = Track.AlbumId INNER JOIN InvoiceLine ON Track.TrackId = InvoiceLine.TrackId GROUP BY Artist.Name ORDER BY TotalQuantity DESC LIMIT 3\u001b[0m\n", + "Observation: \u001b[36;1m\u001b[1;3m[('Iron Maiden', 140), ('U2', 107), ('Metallica', 91)]\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3m I now know the final answer.\n", + "Final Answer: The top 3 best selling artists are Iron Maiden, U2, and Metallica.\u001b[0m\n", + "\n", + "\u001b[1m> Finished chain.\u001b[0m\n" + ] + }, + { + "data": { + "text/plain": [ + "'The top 3 best selling artists are Iron Maiden, U2, and Metallica.'" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "agent_executor.run(\"Who are the top 3 best selling artists?\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/modules/agents/agent_toolkits/vectorstore.ipynb b/docs/modules/agents/agent_toolkits/vectorstore.ipynb new file mode 100644 index 00000000..9a84d1c7 --- /dev/null +++ b/docs/modules/agents/agent_toolkits/vectorstore.ipynb @@ -0,0 +1,417 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "18ada398-dce6-4049-9b56-fc0ede63da9c", + "metadata": {}, + "source": [ + "# Vectorstore Agent\n", + "\n", + "This notebook showcases an agent designed to retrieve information from one or more vectorstores, either with or without sources." + ] + }, + { + "cell_type": "markdown", + "id": "eecb683b-3a46-4b9d-81a3-7caefbfec1a1", + "metadata": {}, + "source": [ + "## Create the Vectorstores" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "9bfd0ed8-a5eb-443e-8e92-90be8cabb0a7", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "from langchain.embeddings.openai import OpenAIEmbeddings\n", + "from langchain.vectorstores import Chroma\n", + "from langchain.text_splitter import CharacterTextSplitter\n", + "from langchain import OpenAI, VectorDBQA\n", + "llm = OpenAI(temperature=0)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "345bb078-4ec1-4e3a-827b-cd238c49054d", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Running Chroma using direct local API.\n", + "Using DuckDB in-memory for database. Data will be transient.\n" + ] + } + ], + "source": [ + "from langchain.document_loaders import TextLoader\n", + "loader = TextLoader('../../state_of_the_union.txt')\n", + "documents = loader.load()\n", + "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", + "texts = text_splitter.split_documents(documents)\n", + "\n", + "embeddings = OpenAIEmbeddings()\n", + "state_of_union_store = Chroma.from_documents(texts, embeddings, collection_name=\"state-of-union\")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "5f50eb82-e1a5-4252-8306-8ec1b478d9b4", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Running Chroma using direct local API.\n", + "Using DuckDB in-memory for database. Data will be transient.\n" + ] + } + ], + "source": [ + "from langchain.document_loaders import WebBaseLoader\n", + "loader = WebBaseLoader(\"https://beta.ruff.rs/docs/faq/\")\n", + "docs = loader.load()\n", + "ruff_texts = text_splitter.split_documents(docs)\n", + "ruff_store = Chroma.from_documents(ruff_texts, embeddings, collection_name=\"ruff\")" + ] + }, + { + "cell_type": "markdown", + "id": "f4814175-964d-42f1-aa9d-22801ce1e912", + "metadata": {}, + "source": [ + "## Initalize Toolkit and Agent\n", + "\n", + "First, we'll create an agent with a single vectorstore." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "5b3b3206", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.agents.agent_toolkits import (\n", + " create_vectorstore_agent,\n", + " VectorStoreToolkit,\n", + " VectorStoreInfo,\n", + ")\n", + "vectorstore_info = VectorStoreInfo(\n", + " name=\"state_of_union_address\",\n", + " description=\"the most recent state of the Union adress\",\n", + " vectorstore=state_of_union_store\n", + ")\n", + "toolkit = VectorStoreToolkit(vectorstore_info=vectorstore_info)\n", + "agent_executor = create_vectorstore_agent(\n", + " llm=llm,\n", + " toolkit=toolkit,\n", + " verbose=True\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "8a38ad10", + "metadata": {}, + "source": [ + "## Examples" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "3f2f455c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n", + "\u001b[32;1m\u001b[1;3m I need to find the answer in the state of the union address\n", + "Action: state_of_union_address\n", + "Action Input: What did biden say about ketanji brown jackson\u001b[0m\n", + "Observation: \u001b[36;1m\u001b[1;3m Biden said that Ketanji Brown Jackson is one of the nation's top legal minds and that she will continue Justice Breyer's legacy of excellence.\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n", + "Final Answer: Biden said that Ketanji Brown Jackson is one of the nation's top legal minds and that she will continue Justice Breyer's legacy of excellence.\u001b[0m\n", + "\n", + "\u001b[1m> Finished chain.\u001b[0m\n" + ] + }, + { + "data": { + "text/plain": [ + "\"Biden said that Ketanji Brown Jackson is one of the nation's top legal minds and that she will continue Justice Breyer's legacy of excellence.\"" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "agent_executor.run(\"What did biden say about ketanji brown jackson is the state of the union address?\")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "d61e1e63", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n", + "\u001b[32;1m\u001b[1;3m I need to use the state_of_union_address_with_sources tool to answer this question.\n", + "Action: state_of_union_address_with_sources\n", + "Action Input: What did biden say about ketanji brown jackson\u001b[0m\n", + "Observation: \u001b[33;1m\u001b[1;3m{\"answer\": \" Biden said that he nominated Circuit Court of Appeals Judge Ketanji Brown Jackson to the United States Supreme Court, and that she is one of the nation's top legal minds who will continue Justice Breyer's legacy of excellence.\\n\", \"sources\": \"../../state_of_the_union.txt\"}\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n", + "Final Answer: Biden said that he nominated Circuit Court of Appeals Judge Ketanji Brown Jackson to the United States Supreme Court, and that she is one of the nation's top legal minds who will continue Justice Breyer's legacy of excellence. Sources: ../../state_of_the_union.txt\u001b[0m\n", + "\n", + "\u001b[1m> Finished chain.\u001b[0m\n" + ] + }, + { + "data": { + "text/plain": [ + "\"Biden said that he nominated Circuit Court of Appeals Judge Ketanji Brown Jackson to the United States Supreme Court, and that she is one of the nation's top legal minds who will continue Justice Breyer's legacy of excellence. Sources: ../../state_of_the_union.txt\"" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "agent_executor.run(\"What did biden say about ketanji brown jackson is the state of the union address? List the source.\")" + ] + }, + { + "cell_type": "markdown", + "id": "7ca07707", + "metadata": {}, + "source": [ + "## Multiple Vectorstores\n", + "We can also easily use this initialize an agent with multiple vectorstores and use the agent to route between them. To do this. This agent is optimized for routing, so it is a different toolkit and initializer." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "c3209fd3", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.agents.agent_toolkits import (\n", + " create_vectorstore_router_agent,\n", + " VectorStoreRouterToolkit,\n", + " VectorStoreInfo,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "815c4f39-308d-4949-b992-1361036e6e09", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "ruff_vectorstore_info = VectorStoreInfo(\n", + " name=\"ruff\",\n", + " description=\"Information about the Ruff python linting library\",\n", + " vectorstore=ruff_store\n", + ")\n", + "router_toolkit = VectorStoreRouterToolkit(\n", + " vectorstores=[vectorstore_info, ruff_vectorstore_info],\n", + " llm=llm\n", + ")\n", + "agent_executor = create_vectorstore_agent(\n", + " llm=llm,\n", + " toolkit=router_toolkit,\n", + " verbose=True\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "71680984-edaf-4a63-90f5-94edbd263550", + "metadata": {}, + "source": [ + "## Examples" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "3cd1bf3e-e3df-4e69-bbe1-71c64b1af947", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n", + "\u001b[32;1m\u001b[1;3m I need to use the state_of_union_address tool to answer this question.\n", + "Action: state_of_union_address\n", + "Action Input: What did biden say about ketanji brown jackson\u001b[0m\n", + "Observation: \u001b[36;1m\u001b[1;3m Biden said that Ketanji Brown Jackson is one of the nation's top legal minds and that she will continue Justice Breyer's legacy of excellence.\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n", + "Final Answer: Biden said that Ketanji Brown Jackson is one of the nation's top legal minds and that she will continue Justice Breyer's legacy of excellence.\u001b[0m\n", + "\n", + "\u001b[1m> Finished chain.\u001b[0m\n" + ] + }, + { + "data": { + "text/plain": [ + "\"Biden said that Ketanji Brown Jackson is one of the nation's top legal minds and that she will continue Justice Breyer's legacy of excellence.\"" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "agent_executor.run(\"What did biden say about ketanji brown jackson is the state of the union address?\")" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "c5998b8d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n", + "\u001b[32;1m\u001b[1;3m I need to find out what tool ruff uses to run over Jupyter Notebooks\n", + "Action: ruff\n", + "Action Input: What tool does ruff use to run over Jupyter Notebooks?\u001b[0m\n", + "Observation: \u001b[33;1m\u001b[1;3m Ruff is integrated into nbQA, a tool for running linters and code formatters over Jupyter Notebooks. After installing ruff and nbqa, you can run Ruff over a notebook like so: > nbqa ruff Untitled.ipynb\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n", + "Final Answer: Ruff is integrated into nbQA, a tool for running linters and code formatters over Jupyter Notebooks. After installing ruff and nbqa, you can run Ruff over a notebook like so: > nbqa ruff Untitled.ipynb\u001b[0m\n", + "\n", + "\u001b[1m> Finished chain.\u001b[0m\n" + ] + }, + { + "data": { + "text/plain": [ + "'Ruff is integrated into nbQA, a tool for running linters and code formatters over Jupyter Notebooks. After installing ruff and nbqa, you can run Ruff over a notebook like so: > nbqa ruff Untitled.ipynb'" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "agent_executor.run(\"What tool does ruff use to run over Jupyter Notebooks?\")" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "744e9b51-fbd9-4778-b594-ea957d0f3467", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n", + "\u001b[32;1m\u001b[1;3m I need to find out what tool ruff uses and if the president mentioned it in the state of the union.\n", + "Action: ruff\n", + "Action Input: What tool does ruff use to run over Jupyter Notebooks?\u001b[0m\n", + "Observation: \u001b[33;1m\u001b[1;3m Ruff is integrated into nbQA, a tool for running linters and code formatters over Jupyter Notebooks. After installing ruff and nbqa, you can run Ruff over a notebook like so: > nbqa ruff Untitled.ipynb\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3m I need to find out if the president mentioned nbQA in the state of the union.\n", + "Action: state_of_union_address\n", + "Action Input: Did the president mention nbQA in the state of the union?\u001b[0m\n", + "Observation: \u001b[36;1m\u001b[1;3m No, the president did not mention nbQA in the state of the union.\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3m I now know the final answer.\n", + "Final Answer: No, the president did not mention nbQA in the state of the union.\u001b[0m\n", + "\n", + "\u001b[1m> Finished chain.\u001b[0m\n" + ] + }, + { + "data": { + "text/plain": [ + "'No, the president did not mention nbQA in the state of the union.'" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "agent_executor.run(\"What tool does ruff use to run over Jupyter Notebooks? Did the president mention that tool in the state of the union?\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "92203aa9-f63a-4ce1-b562-fadf4474ad9d", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/modules/agents/examples/async_agent.ipynb b/docs/modules/agents/examples/async_agent.ipynb index 4dbf1215..854f2c86 100644 --- a/docs/modules/agents/examples/async_agent.ipynb +++ b/docs/modules/agents/examples/async_agent.ipynb @@ -403,7 +403,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.1" + "version": "3.10.9" } }, "nbformat": 4, diff --git a/docs/modules/agents/how_to_guides.rst b/docs/modules/agents/how_to_guides.rst index a8ac0ffc..b6c7b35f 100644 --- a/docs/modules/agents/how_to_guides.rst +++ b/docs/modules/agents/how_to_guides.rst @@ -21,7 +21,7 @@ The first category of how-to guides here cover specific parts of working with ag `Asynchronous <./examples/async_agent.html>`_: Covering asynchronous functionality. -The next set of examples are all end-to-end agents for specific applications. +The next set of examples are all end-to-end example of different agent types. In all examples there is an Agent with a particular set of tools. - Tools: A tool can be anything that takes in a string and returns a string. This means that you can use both the primitives AND the chains found in `this <../chains.html>`_ documentation. LangChain also provides a list of easily loadable tools. For detailed information on those, please see `this documentation <./tools.html>`_ @@ -50,6 +50,19 @@ In all examples there is an Agent with a particular set of tools. - `Example Notebook <./implementations/react.html>`_ +The final set of examples covers agents with toolkits. +As opposed to the examples above, these examples are not intended to show off an agent `type`, +but rather to show off an agent applied to particular use case. + +`SQLDatabase Agent <./agent_toolkits/sql_database.html>`_: This notebook covers how to interact with an arbitrary SQL database using an agent. + +`JSON Agent <./agent_toolkits/json.html>`_: This notebook covers how to interact with a JSON dictionary using an agent. + +`OpenAPI Agent <./agent_toolkits/openapi.html>`_: This notebook covers how to interact with an arbitrary OpenAPI endpoint using an agent. + +`VectorStore Agent <./agent_toolkits/vectorstore.html>`_: This notebook covers how to interact with VectorStores using an agent. + +`Python Agent <./agent_toolkits/python.html>`_: This notebook covers how to produce and execute python code using an agent. .. toctree:: :maxdepth: 1 diff --git a/docs/modules/agents/implementations/mrkl.ipynb b/docs/modules/agents/implementations/mrkl.ipynb index 0afc3da0..f0ec3a37 100644 --- a/docs/modules/agents/implementations/mrkl.ipynb +++ b/docs/modules/agents/implementations/mrkl.ipynb @@ -205,7 +205,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.1" + "version": "3.10.9" } }, "nbformat": 4, diff --git a/docs/modules/agents/implementations/self_ask_with_search.ipynb b/docs/modules/agents/implementations/self_ask_with_search.ipynb index 21d1391e..8a264239 100644 --- a/docs/modules/agents/implementations/self_ask_with_search.ipynb +++ b/docs/modules/agents/implementations/self_ask_with_search.ipynb @@ -63,7 +63,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3.9.0 64-bit ('llm-env')", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -77,7 +77,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.0" + "version": "3.10.9" }, "vscode": { "interpreter": { diff --git a/langchain/agents/__init__.py b/langchain/agents/__init__.py index 314814b7..8d2c026c 100644 --- a/langchain/agents/__init__.py +++ b/langchain/agents/__init__.py @@ -1,5 +1,12 @@ """Interface for agents.""" from langchain.agents.agent import Agent, AgentExecutor +from langchain.agents.agent_toolkits import ( + create_json_agent, + create_openapi_agent, + create_sql_agent, + create_vectorstore_agent, + create_vectorstore_router_agent, +) from langchain.agents.conversational.base import ConversationalAgent from langchain.agents.initialize import initialize_agent from langchain.agents.load_tools import get_all_tool_names, load_tools @@ -24,4 +31,9 @@ __all__ = [ "get_all_tool_names", "ConversationalAgent", "load_agent", + "create_sql_agent", + "create_json_agent", + "create_openapi_agent", + "create_vectorstore_router_agent", + "create_vectorstore_agent", ] diff --git a/langchain/agents/agent.py b/langchain/agents/agent.py index 6fd6d875..095e7365 100644 --- a/langchain/agents/agent.py +++ b/langchain/agents/agent.py @@ -47,7 +47,7 @@ class Agent(BaseModel): @property def _stop(self) -> List[str]: - return [f"\n{self.observation_prefix}"] + return [f"\n{self.observation_prefix}", f"\n\t{self.observation_prefix}"] def _construct_scratchpad( self, intermediate_steps: List[Tuple[AgentAction, str]] diff --git a/langchain/agents/agent_toolkits/__init__.py b/langchain/agents/agent_toolkits/__init__.py new file mode 100644 index 00000000..2d72ea0c --- /dev/null +++ b/langchain/agents/agent_toolkits/__init__.py @@ -0,0 +1,33 @@ +"""Agent toolkits.""" + +from langchain.agents.agent_toolkits.json.base import create_json_agent +from langchain.agents.agent_toolkits.json.toolkit import JsonToolkit +from langchain.agents.agent_toolkits.openapi.base import create_openapi_agent +from langchain.agents.agent_toolkits.openapi.toolkit import OpenAPIToolkit +from langchain.agents.agent_toolkits.python.base import create_python_agent +from langchain.agents.agent_toolkits.sql.base import create_sql_agent +from langchain.agents.agent_toolkits.sql.toolkit import SQLDatabaseToolkit +from langchain.agents.agent_toolkits.vectorstore.base import ( + create_vectorstore_agent, + create_vectorstore_router_agent, +) +from langchain.agents.agent_toolkits.vectorstore.toolkit import ( + VectorStoreInfo, + VectorStoreRouterToolkit, + VectorStoreToolkit, +) + +__all__ = [ + "create_json_agent", + "create_sql_agent", + "create_openapi_agent", + "create_python_agent", + "create_vectorstore_agent", + "JsonToolkit", + "SQLDatabaseToolkit", + "OpenAPIToolkit", + "VectorStoreToolkit", + "create_vectorstore_router_agent", + "VectorStoreInfo", + "VectorStoreRouterToolkit", +] diff --git a/langchain/agents/agent_toolkits/base.py b/langchain/agents/agent_toolkits/base.py new file mode 100644 index 00000000..ce9b9e43 --- /dev/null +++ b/langchain/agents/agent_toolkits/base.py @@ -0,0 +1,15 @@ +"""Toolkits for agents.""" +from abc import abstractmethod +from typing import List + +from pydantic import BaseModel + +from langchain.tools import BaseTool + + +class BaseToolkit(BaseModel): + """Class responsible for defining a collection of related tools.""" + + @abstractmethod + def get_tools(self) -> List[BaseTool]: + """Get the tools in the toolkit.""" diff --git a/langchain/agents/agent_toolkits/json/__init__.py b/langchain/agents/agent_toolkits/json/__init__.py new file mode 100644 index 00000000..bfab0ec6 --- /dev/null +++ b/langchain/agents/agent_toolkits/json/__init__.py @@ -0,0 +1 @@ +"""Json agent.""" diff --git a/langchain/agents/agent_toolkits/json/base.py b/langchain/agents/agent_toolkits/json/base.py new file mode 100644 index 00000000..48d3fd9c --- /dev/null +++ b/langchain/agents/agent_toolkits/json/base.py @@ -0,0 +1,43 @@ +"""Json agent.""" +from typing import Any, List, Optional + +from langchain.agents.agent import AgentExecutor +from langchain.agents.agent_toolkits.json.prompt import JSON_PREFIX, JSON_SUFFIX +from langchain.agents.agent_toolkits.json.toolkit import JsonToolkit +from langchain.agents.mrkl.base import ZeroShotAgent +from langchain.agents.mrkl.prompt import FORMAT_INSTRUCTIONS +from langchain.callbacks.base import BaseCallbackManager +from langchain.chains.llm import LLMChain +from langchain.llms.base import BaseLLM + + +def create_json_agent( + llm: BaseLLM, + toolkit: JsonToolkit, + callback_manager: Optional[BaseCallbackManager] = None, + prefix: str = JSON_PREFIX, + suffix: str = JSON_SUFFIX, + format_instructions: str = FORMAT_INSTRUCTIONS, + input_variables: Optional[List[str]] = None, + verbose: bool = False, + **kwargs: Any, +) -> AgentExecutor: + """Construct a json agent from an LLM and tools.""" + tools = toolkit.get_tools() + prompt = ZeroShotAgent.create_prompt( + tools, + prefix=prefix, + suffix=suffix, + format_instructions=format_instructions, + input_variables=input_variables, + ) + llm_chain = LLMChain( + llm=llm, + prompt=prompt, + callback_manager=callback_manager, + ) + tool_names = [tool.name for tool in tools] + agent = ZeroShotAgent(llm_chain=llm_chain, allowed_tools=tool_names, **kwargs) + return AgentExecutor.from_agent_and_tools( + agent=agent, tools=toolkit.get_tools(), verbose=verbose + ) diff --git a/langchain/agents/agent_toolkits/json/prompt.py b/langchain/agents/agent_toolkits/json/prompt.py new file mode 100644 index 00000000..a3b7584a --- /dev/null +++ b/langchain/agents/agent_toolkits/json/prompt.py @@ -0,0 +1,25 @@ +# flake8: noqa + +JSON_PREFIX = """You are an agent designed to interact with JSON. +Your goal is to return a final answer by interacting with the JSON. +You have access to the following tools which help you learn more about the JSON you are interacting with. +Only use the below tools. Only use the information returned by the below tools to construct your final answer. +Do not make up any information that is not contained in the JSON. +Your input to the tools should be in the form of `data["key"][0]` where `data` is the JSON blob you are interacting with, and the syntax used is Python. +You should only use keys that you know for a fact exist. You must validate that a key exists by seeing it previously when calling `json_spec_list_keys`. +If you have not seen a key in one of those responses, you cannot use it. +You should only add one key at a time to the path. You cannot add multiple keys at once. +If you encounter a "KeyError", go back to the previous key, look at the available keys, and try again. + +If the question does not seem to be related to the JSON, just return "I don't know" as the answer. +Always begin your interaction with the `json_spec_list_keys` tool with input "data" to see what keys exist in the JSON. + +Note that sometimes the value at a given path is large. In this case, you will get an error "Value is a large dictionary, should explore its keys directly". +In this case, you should ALWAYS follow up by using the `json_spec_list_keys` tool to see what keys exist at that path. +Do not simply refer the user to the JSON or a section of the JSON, as this is not a valid answer. Keep digging until you find the answer and explicitly return it. +""" +JSON_SUFFIX = """Begin!" + +Question: {input} +Thought: I should look at the keys that exist in data to see what I have access to +{agent_scratchpad}""" diff --git a/langchain/agents/agent_toolkits/json/toolkit.py b/langchain/agents/agent_toolkits/json/toolkit.py new file mode 100644 index 00000000..a12cf290 --- /dev/null +++ b/langchain/agents/agent_toolkits/json/toolkit.py @@ -0,0 +1,21 @@ +"""Toolkit for interacting with a JSON spec.""" +from __future__ import annotations + +from typing import List + +from langchain.agents.agent_toolkits.base import BaseToolkit +from langchain.tools import BaseTool +from langchain.tools.json.tool import JsonGetValueTool, JsonListKeysTool, JsonSpec + + +class JsonToolkit(BaseToolkit): + """Toolkit for interacting with a JSON spec.""" + + spec: JsonSpec + + def get_tools(self) -> List[BaseTool]: + """Get the tools in the toolkit.""" + return [ + JsonListKeysTool(spec=self.spec), + JsonGetValueTool(spec=self.spec), + ] diff --git a/langchain/agents/agent_toolkits/openapi/__init__.py b/langchain/agents/agent_toolkits/openapi/__init__.py new file mode 100644 index 00000000..5d06e271 --- /dev/null +++ b/langchain/agents/agent_toolkits/openapi/__init__.py @@ -0,0 +1 @@ +"""OpenAPI spec agent.""" diff --git a/langchain/agents/agent_toolkits/openapi/base.py b/langchain/agents/agent_toolkits/openapi/base.py new file mode 100644 index 00000000..602ee87f --- /dev/null +++ b/langchain/agents/agent_toolkits/openapi/base.py @@ -0,0 +1,46 @@ +"""OpenAPI spec agent.""" +from typing import Any, List, Optional + +from langchain.agents.agent import AgentExecutor +from langchain.agents.agent_toolkits.openapi.prompt import ( + OPENAPI_PREFIX, + OPENAPI_SUFFIX, +) +from langchain.agents.agent_toolkits.openapi.toolkit import OpenAPIToolkit +from langchain.agents.mrkl.base import ZeroShotAgent +from langchain.agents.mrkl.prompt import FORMAT_INSTRUCTIONS +from langchain.callbacks.base import BaseCallbackManager +from langchain.chains.llm import LLMChain +from langchain.llms.base import BaseLLM + + +def create_openapi_agent( + llm: BaseLLM, + toolkit: OpenAPIToolkit, + callback_manager: Optional[BaseCallbackManager] = None, + prefix: str = OPENAPI_PREFIX, + suffix: str = OPENAPI_SUFFIX, + format_instructions: str = FORMAT_INSTRUCTIONS, + input_variables: Optional[List[str]] = None, + verbose: bool = False, + **kwargs: Any, +) -> AgentExecutor: + """Construct a json agent from an LLM and tools.""" + tools = toolkit.get_tools() + prompt = ZeroShotAgent.create_prompt( + tools, + prefix=prefix, + suffix=suffix, + format_instructions=format_instructions, + input_variables=input_variables, + ) + llm_chain = LLMChain( + llm=llm, + prompt=prompt, + callback_manager=callback_manager, + ) + tool_names = [tool.name for tool in tools] + agent = ZeroShotAgent(llm_chain=llm_chain, allowed_tools=tool_names, **kwargs) + return AgentExecutor.from_agent_and_tools( + agent=agent, tools=toolkit.get_tools(), verbose=verbose + ) diff --git a/langchain/agents/agent_toolkits/openapi/prompt.py b/langchain/agents/agent_toolkits/openapi/prompt.py new file mode 100644 index 00000000..376c3d8c --- /dev/null +++ b/langchain/agents/agent_toolkits/openapi/prompt.py @@ -0,0 +1,29 @@ +# flake8: noqa + +OPENAPI_PREFIX = """You are an agent designed to answer questions by making web requests to an API given the openapi spec. + +If the question does not seem related to the API, return I don't know. Do not make up an answer. +Only use information provided by the tools to construct your response. + +First, find the base URL needed to make the request. + +Second, find the relevant paths needed to answer the question. Take note that, sometimes, you might need to make more than one request to more than one path to answer the question. + +Third, find the required parameters needed to make the request. For GET requests, these are usually URL parameters and for POST requests, these are request body parameters. + +Fourth, make the requests needed to answer the question. Ensure that you are sending the correct parameters to the request by checking which parameters are required. For parameters with a fixed set of values, please use the spec to look at which values are allowed. + +Use the exact parameter names as listed in the spec, do not make up any names or abbreviate the names of parameters. +If you get a not found error, ensure that you are using a path that actually exists in the spec. +""" +OPENAPI_SUFFIX = """Begin!" + +Question: {input} +Thought: I should explore the spec to find the base url for the API. +{agent_scratchpad}""" + +DESCRIPTION = """Can be used to answer questions about the openapi spec for the API. Always use this tool before trying to make a request. +Example inputs to this tool: + 'What are the required query parameters for a GET request to the /bar endpoint?` + 'What are the required parameters in the request body for a POST request to the /foo endpoint?' +Always give this tool a specific question.""" diff --git a/langchain/agents/agent_toolkits/openapi/toolkit.py b/langchain/agents/agent_toolkits/openapi/toolkit.py new file mode 100644 index 00000000..25d48634 --- /dev/null +++ b/langchain/agents/agent_toolkits/openapi/toolkit.py @@ -0,0 +1,67 @@ +"""Requests toolkit.""" +from __future__ import annotations + +from typing import Any, List + +from langchain.agents.agent import AgentExecutor +from langchain.agents.agent_toolkits.base import BaseToolkit +from langchain.agents.agent_toolkits.json.base import create_json_agent +from langchain.agents.agent_toolkits.json.toolkit import JsonToolkit +from langchain.agents.agent_toolkits.openapi.prompt import DESCRIPTION +from langchain.agents.tools import Tool +from langchain.llms.base import BaseLLM +from langchain.requests import RequestsWrapper +from langchain.tools import BaseTool +from langchain.tools.json.tool import JsonSpec +from langchain.tools.requests.tool import ( + RequestsDeleteTool, + RequestsGetTool, + RequestsPatchTool, + RequestsPostTool, + RequestsPutTool, +) + + +class RequestsToolkit(BaseToolkit): + """Toolkit for making requests.""" + + requests_wrapper: RequestsWrapper + + def get_tools(self) -> List[BaseTool]: + """Return a list of tools.""" + return [ + RequestsGetTool(requests_wrapper=self.requests_wrapper), + RequestsPostTool(requests_wrapper=self.requests_wrapper), + RequestsPatchTool(requests_wrapper=self.requests_wrapper), + RequestsPutTool(requests_wrapper=self.requests_wrapper), + RequestsDeleteTool(requests_wrapper=self.requests_wrapper), + ] + + +class OpenAPIToolkit(BaseToolkit): + """Toolkit for interacting with a OpenAPI api.""" + + json_agent: AgentExecutor + requests_wrapper: RequestsWrapper + + def get_tools(self) -> List[BaseTool]: + """Get the tools in the toolkit.""" + json_agent_tool = Tool( + name="json_explorer", + func=self.json_agent.run, + description=DESCRIPTION, + ) + request_toolkit = RequestsToolkit(requests_wrapper=self.requests_wrapper) + return [*request_toolkit.get_tools(), json_agent_tool] + + @classmethod + def from_llm( + cls, + llm: BaseLLM, + json_spec: JsonSpec, + requests_wrapper: RequestsWrapper, + **kwargs: Any, + ) -> OpenAPIToolkit: + """Create json agent from llm, then initialize.""" + json_agent = create_json_agent(llm, JsonToolkit(spec=json_spec), **kwargs) + return cls(json_agent=json_agent, requests_wrapper=requests_wrapper) diff --git a/langchain/agents/agent_toolkits/python/__init__.py b/langchain/agents/agent_toolkits/python/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/langchain/agents/agent_toolkits/python/base.py b/langchain/agents/agent_toolkits/python/base.py new file mode 100644 index 00000000..ab643c60 --- /dev/null +++ b/langchain/agents/agent_toolkits/python/base.py @@ -0,0 +1,31 @@ +"""Python agent.""" + +from typing import Any, Optional + +from langchain.agents.agent import AgentExecutor +from langchain.agents.agent_toolkits.python.prompt import PREFIX +from langchain.agents.mrkl.base import ZeroShotAgent +from langchain.callbacks.base import BaseCallbackManager +from langchain.chains.llm import LLMChain +from langchain.llms.base import BaseLLM +from langchain.tools.python.tool import PythonREPLTool + + +def create_python_agent( + llm: BaseLLM, + tool: PythonREPLTool, + callback_manager: Optional[BaseCallbackManager] = None, + verbose: bool = False, + **kwargs: Any, +) -> AgentExecutor: + """Construct a python agent from an LLM and tool.""" + tools = [tool] + prompt = ZeroShotAgent.create_prompt(tools, prefix=PREFIX) + llm_chain = LLMChain( + llm=llm, + prompt=prompt, + callback_manager=callback_manager, + ) + tool_names = [tool.name for tool in tools] + agent = ZeroShotAgent(llm_chain=llm_chain, allowed_tools=tool_names, **kwargs) + return AgentExecutor.from_agent_and_tools(agent=agent, tools=tools, verbose=verbose) diff --git a/langchain/agents/agent_toolkits/python/prompt.py b/langchain/agents/agent_toolkits/python/prompt.py new file mode 100644 index 00000000..fc97e791 --- /dev/null +++ b/langchain/agents/agent_toolkits/python/prompt.py @@ -0,0 +1,9 @@ +# flake8: noqa + +PREFIX = """You are an agent designed to write and execute python code to answer questions. +You have access to a python REPL, which you can use to execute python code. +If you get an error, debug your code and try again. +Only use the output of your code to answer the question. +You might know the answer without running any code, but you should still run the code to get the answer. +If it does not seem like you can write code to answer the question, just return "I don't know" as the answer. +""" diff --git a/langchain/agents/agent_toolkits/sql/__init__.py b/langchain/agents/agent_toolkits/sql/__init__.py new file mode 100644 index 00000000..74293a52 --- /dev/null +++ b/langchain/agents/agent_toolkits/sql/__init__.py @@ -0,0 +1 @@ +"""SQL agent.""" diff --git a/langchain/agents/agent_toolkits/sql/base.py b/langchain/agents/agent_toolkits/sql/base.py new file mode 100644 index 00000000..bf358cab --- /dev/null +++ b/langchain/agents/agent_toolkits/sql/base.py @@ -0,0 +1,45 @@ +"""SQL agent.""" +from typing import Any, List, Optional + +from langchain.agents.agent import AgentExecutor +from langchain.agents.agent_toolkits.sql.prompt import SQL_PREFIX, SQL_SUFFIX +from langchain.agents.agent_toolkits.sql.toolkit import SQLDatabaseToolkit +from langchain.agents.mrkl.base import ZeroShotAgent +from langchain.agents.mrkl.prompt import FORMAT_INSTRUCTIONS +from langchain.callbacks.base import BaseCallbackManager +from langchain.chains.llm import LLMChain +from langchain.llms.base import BaseLLM + + +def create_sql_agent( + llm: BaseLLM, + toolkit: SQLDatabaseToolkit, + callback_manager: Optional[BaseCallbackManager] = None, + prefix: str = SQL_PREFIX, + suffix: str = SQL_SUFFIX, + format_instructions: str = FORMAT_INSTRUCTIONS, + input_variables: Optional[List[str]] = None, + top_k: int = 10, + verbose: bool = False, + **kwargs: Any, +) -> AgentExecutor: + """Construct a sql agent from an LLM and tools.""" + tools = toolkit.get_tools() + prefix = prefix.format(dialect=toolkit.dialect, top_k=top_k) + prompt = ZeroShotAgent.create_prompt( + tools, + prefix=prefix, + suffix=suffix, + format_instructions=format_instructions, + input_variables=input_variables, + ) + llm_chain = LLMChain( + llm=llm, + prompt=prompt, + callback_manager=callback_manager, + ) + tool_names = [tool.name for tool in tools] + agent = ZeroShotAgent(llm_chain=llm_chain, allowed_tools=tool_names, **kwargs) + return AgentExecutor.from_agent_and_tools( + agent=agent, tools=toolkit.get_tools(), verbose=verbose + ) diff --git a/langchain/agents/agent_toolkits/sql/prompt.py b/langchain/agents/agent_toolkits/sql/prompt.py new file mode 100644 index 00000000..2d5f7e24 --- /dev/null +++ b/langchain/agents/agent_toolkits/sql/prompt.py @@ -0,0 +1,21 @@ +# flake8: noqa + +SQL_PREFIX = """You are an agent designed to interact with a SQL database. +Given an input question, create a syntactically correct {dialect} query to run, then look at the results of the query and return the answer. +Unless the user specifies a specific number of examples they wish to obtain, always limit your query to at most {top_k} results using the LIMIT clause. +You can order the results by a relevant column to return the most interesting examples in the database. +Never query for all the columns from a specific table, only ask for a the few relevant columns given the question. +You have access to tools for interacting with the database. +Only use the below tools. Only use the information returned by the below tools to construct your final answer. +You MUST double check your query before executing it. If you get an error while executing a query, rewrite the query and try again. + +DO NOT make any DML statements (INSERT, UPDATE, DELETE, DROP etc.) to the database. + +If the question does not seem related to the database, just return "I don't know" as the answer. +""" + +SQL_SUFFIX = """Begin! + +Question: {input} +Thought: I should look at the tables in the database to see what I can query. +{agent_scratchpad}""" diff --git a/langchain/agents/agent_toolkits/sql/toolkit.py b/langchain/agents/agent_toolkits/sql/toolkit.py new file mode 100644 index 00000000..2b662d6b --- /dev/null +++ b/langchain/agents/agent_toolkits/sql/toolkit.py @@ -0,0 +1,39 @@ +"""Toolkit for interacting with a SQL database.""" +from typing import List + +from pydantic import Field + +from langchain.agents.agent_toolkits.base import BaseToolkit +from langchain.sql_database import SQLDatabase +from langchain.tools import BaseTool +from langchain.tools.sql_database.tool import ( + InfoSQLDatabaseTool, + ListSQLDatabaseTool, + QueryCheckerTool, + QuerySQLDataBaseTool, +) + + +class SQLDatabaseToolkit(BaseToolkit): + """Toolkit for interacting with SQL databases.""" + + db: SQLDatabase = Field(exclude=True) + + @property + def dialect(self) -> str: + """Return string representation of dialect to use.""" + return self.db.dialect + + class Config: + """Configuration for this pydantic object.""" + + arbitrary_types_allowed = True + + def get_tools(self) -> List[BaseTool]: + """Get the tools in the toolkit.""" + return [ + QuerySQLDataBaseTool(db=self.db), + InfoSQLDatabaseTool(db=self.db), + ListSQLDatabaseTool(db=self.db), + QueryCheckerTool(db=self.db), + ] diff --git a/langchain/agents/agent_toolkits/vectorstore/__init__.py b/langchain/agents/agent_toolkits/vectorstore/__init__.py new file mode 100644 index 00000000..ee15a97e --- /dev/null +++ b/langchain/agents/agent_toolkits/vectorstore/__init__.py @@ -0,0 +1 @@ +"""Agent toolkit for interacting with vector stores.""" diff --git a/langchain/agents/agent_toolkits/vectorstore/base.py b/langchain/agents/agent_toolkits/vectorstore/base.py new file mode 100644 index 00000000..3b31fab8 --- /dev/null +++ b/langchain/agents/agent_toolkits/vectorstore/base.py @@ -0,0 +1,55 @@ +"""VectorStore agent.""" +from typing import Any, Optional + +from langchain.agents.agent import AgentExecutor +from langchain.agents.agent_toolkits.vectorstore.prompt import PREFIX, ROUTER_PREFIX +from langchain.agents.agent_toolkits.vectorstore.toolkit import ( + VectorStoreRouterToolkit, + VectorStoreToolkit, +) +from langchain.agents.mrkl.base import ZeroShotAgent +from langchain.callbacks.base import BaseCallbackManager +from langchain.chains.llm import LLMChain +from langchain.llms.base import BaseLLM + + +def create_vectorstore_agent( + llm: BaseLLM, + toolkit: VectorStoreToolkit, + callback_manager: Optional[BaseCallbackManager] = None, + prefix: str = PREFIX, + verbose: bool = False, + **kwargs: Any, +) -> AgentExecutor: + """Construct a vectorstore agent from an LLM and tools.""" + tools = toolkit.get_tools() + prompt = ZeroShotAgent.create_prompt(tools, prefix=prefix) + llm_chain = LLMChain( + llm=llm, + prompt=prompt, + callback_manager=callback_manager, + ) + tool_names = [tool.name for tool in tools] + agent = ZeroShotAgent(llm_chain=llm_chain, allowed_tools=tool_names, **kwargs) + return AgentExecutor.from_agent_and_tools(agent=agent, tools=tools, verbose=verbose) + + +def create_vectorstore_router_agent( + llm: BaseLLM, + toolkit: VectorStoreRouterToolkit, + callback_manager: Optional[BaseCallbackManager] = None, + prefix: str = ROUTER_PREFIX, + verbose: bool = False, + **kwargs: Any, +) -> AgentExecutor: + """Construct a vectorstore router agent from an LLM and tools.""" + tools = toolkit.get_tools() + prompt = ZeroShotAgent.create_prompt(tools, prefix=prefix) + llm_chain = LLMChain( + llm=llm, + prompt=prompt, + callback_manager=callback_manager, + ) + tool_names = [tool.name for tool in tools] + agent = ZeroShotAgent(llm_chain=llm_chain, allowed_tools=tool_names, **kwargs) + return AgentExecutor.from_agent_and_tools(agent=agent, tools=tools, verbose=verbose) diff --git a/langchain/agents/agent_toolkits/vectorstore/prompt.py b/langchain/agents/agent_toolkits/vectorstore/prompt.py new file mode 100644 index 00000000..a2837e56 --- /dev/null +++ b/langchain/agents/agent_toolkits/vectorstore/prompt.py @@ -0,0 +1,13 @@ +# flake8: noqa + +PREFIX = """You are an agent designed to answer questions about sets of documents. +You have access to tools for interacting with the documents, and the inputs to the tools are questions. +Sometimes, you will be asked to provide sources for your questions, in which case you should use the appropriate tool to do so. +If the question does not seem relevant to any of the tools provided, just return "I don't know" as the answer. +""" + +ROUTER_PREFIX = """You are an agent designed to answer questions. +You have access to tools for interacting with different sources, and the inputs to the tools are questions. +Your main task is to decide which of the tools is relevant for answering question at hand. +For complex questions, you can break the question down into sub questions and use tools to answers the sub questions. +""" diff --git a/langchain/agents/agent_toolkits/vectorstore/toolkit.py b/langchain/agents/agent_toolkits/vectorstore/toolkit.py new file mode 100644 index 00000000..72ad1262 --- /dev/null +++ b/langchain/agents/agent_toolkits/vectorstore/toolkit.py @@ -0,0 +1,89 @@ +"""Toolkit for interacting with a vector store.""" +from typing import List + +from pydantic import BaseModel, Field + +from langchain.agents.agent_toolkits.base import BaseToolkit +from langchain.llms.base import BaseLLM +from langchain.llms.openai import OpenAI +from langchain.tools import BaseTool +from langchain.tools.vectorstore.tool import ( + VectorStoreQATool, + VectorStoreQAWithSourcesTool, +) +from langchain.vectorstores.base import VectorStore + + +class VectorStoreInfo(BaseModel): + """Information about a vectorstore.""" + + vectorstore: VectorStore = Field(exclude=True) + name: str + description: str + + class Config: + """Configuration for this pydantic object.""" + + arbitrary_types_allowed = True + + +class VectorStoreToolkit(BaseToolkit): + """Toolkit for interacting with a vector store.""" + + vectorstore_info: VectorStoreInfo = Field(exclude=True) + llm: BaseLLM = Field(default_factory=lambda: OpenAI(temperature=0)) + + class Config: + """Configuration for this pydantic object.""" + + arbitrary_types_allowed = True + + def get_tools(self) -> List[BaseTool]: + """Get the tools in the toolkit.""" + description = VectorStoreQATool.get_description( + self.vectorstore_info.name, self.vectorstore_info.description + ) + qa_tool = VectorStoreQATool( + name=self.vectorstore_info.name, + description=description, + vectorstore=self.vectorstore_info.vectorstore, + llm=self.llm, + ) + description = VectorStoreQAWithSourcesTool.get_description( + self.vectorstore_info.name, self.vectorstore_info.description + ) + qa_with_sources_tool = VectorStoreQAWithSourcesTool( + name=f"{self.vectorstore_info.name}_with_sources", + description=description, + vectorstore=self.vectorstore_info.vectorstore, + llm=self.llm, + ) + return [qa_tool, qa_with_sources_tool] + + +class VectorStoreRouterToolkit(BaseToolkit): + """Toolkit for routing between vectorstores.""" + + vectorstores: List[VectorStoreInfo] = Field(exclude=True) + llm: BaseLLM = Field(default_factory=lambda: OpenAI(temperature=0)) + + class Config: + """Configuration for this pydantic object.""" + + arbitrary_types_allowed = True + + def get_tools(self) -> List[BaseTool]: + """Get the tools in the toolkit.""" + tools: List[BaseTool] = [] + for vectorstore_info in self.vectorstores: + description = VectorStoreQATool.get_description( + vectorstore_info.name, vectorstore_info.description + ) + qa_tool = VectorStoreQATool( + name=vectorstore_info.name, + description=description, + vectorstore=vectorstore_info.vectorstore, + llm=self.llm, + ) + tools.append(qa_tool) + return tools diff --git a/langchain/agents/load_tools.py b/langchain/agents/load_tools.py index b71f7f02..5633680d 100644 --- a/langchain/agents/load_tools.py +++ b/langchain/agents/load_tools.py @@ -9,11 +9,12 @@ from langchain.chains.api.base import APIChain from langchain.chains.llm_math.base import LLMMathChain from langchain.chains.pal.base import PALChain from langchain.llms.base import BaseLLM -from langchain.python import PythonREPL +from langchain.tools.python.tool import PythonREPLTool from langchain.requests import RequestsWrapper from langchain.tools.base import BaseTool from langchain.tools.bing_search.tool import BingSearchRun from langchain.tools.google_search.tool import GoogleSearchResults, GoogleSearchRun +from langchain.tools.requests.tool import RequestsGetTool from langchain.tools.wolfram_alpha.tool import WolframAlphaQueryRun from langchain.utilities.bash import BashProcess from langchain.utilities.bing_search import BingSearchAPIWrapper @@ -25,19 +26,11 @@ from langchain.utilities.wolfram_alpha import WolframAlphaAPIWrapper def _get_python_repl() -> BaseTool: - return Tool( - name="Python REPL", - description="A Python shell. Use this to execute python commands. Input should be a valid python command. If you expect output it should be printed out.", - func=PythonREPL().run, - ) + return PythonREPLTool() def _get_requests() -> BaseTool: - return Tool( - name="Requests", - description="A portal to the internet. Use this when you need to get specific content from a site. Input should be a specific url, and the output will be all the text on that page.", - func=RequestsWrapper().run, - ) + return RequestsGetTool(requests_wrapper=RequestsWrapper()) def _get_terminal() -> BaseTool: diff --git a/langchain/chains/api/base.py b/langchain/chains/api/base.py index 0dcbe853..5036a72c 100644 --- a/langchain/chains/api/base.py +++ b/langchain/chains/api/base.py @@ -69,7 +69,7 @@ class APIChain(Chain, BaseModel): self.callback_manager.on_text( api_url, color="green", end="\n", verbose=self.verbose ) - api_response = self.requests_wrapper.run(api_url) + api_response = self.requests_wrapper.get(api_url) self.callback_manager.on_text( api_response, color="yellow", end="\n", verbose=self.verbose ) diff --git a/langchain/chains/llm_requests.py b/langchain/chains/llm_requests.py index f7b0dc4e..2374a42e 100644 --- a/langchain/chains/llm_requests.py +++ b/langchain/chains/llm_requests.py @@ -67,7 +67,7 @@ class LLMRequestsChain(Chain, BaseModel): # Other keys are assumed to be needed for LLM prediction other_keys = {k: v for k, v in inputs.items() if k != self.input_key} url = inputs[self.input_key] - res = self.requests_wrapper.run(url) + res = self.requests_wrapper.get(url) # extract the text from the html soup = BeautifulSoup(res, "html.parser") other_keys[self.requests_key] = soup.get_text()[: self.text_length] diff --git a/langchain/python.py b/langchain/python.py index f31decbe..a14bc2e1 100644 --- a/langchain/python.py +++ b/langchain/python.py @@ -3,21 +3,21 @@ import sys from io import StringIO from typing import Dict, Optional +from pydantic import BaseModel, Field -class PythonREPL: + +class PythonREPL(BaseModel): """Simulates a standalone Python REPL.""" - def __init__(self, _globals: Optional[Dict] = None, _locals: Optional[Dict] = None): - """Initialize with optional globals and locals.""" - self._globals = _globals if _globals is not None else {} - self._locals = _locals if _locals is not None else {} + globals: Optional[Dict] = Field(default_factory=dict, alias="_globals") + locals: Optional[Dict] = Field(default_factory=dict, alias="_locals") def run(self, command: str) -> str: """Run command with own globals/locals and returns anything printed.""" old_stdout = sys.stdout sys.stdout = mystdout = StringIO() try: - exec(command, self._globals, self._locals) + exec(command, self.globals, self.locals) sys.stdout = old_stdout output = mystdout.getvalue() except Exception as e: diff --git a/langchain/requests.py b/langchain/requests.py index 70bf5913..60efc634 100644 --- a/langchain/requests.py +++ b/langchain/requests.py @@ -1,15 +1,73 @@ -"""Lightweight wrapper around request library.""" -from typing import Optional +"""Lightweight wrapper around requests library, with async support.""" +from typing import Any, Dict, Optional +import aiohttp import requests -from pydantic import BaseModel +from pydantic import BaseModel, Extra class RequestsWrapper(BaseModel): - """Lightweight wrapper to partial out everything except the url to hit.""" + """Lightweight wrapper around requests library.""" - headers: Optional[dict] = None + headers: Optional[Dict[str, str]] = None + aiosession: Optional[aiohttp.ClientSession] = None - def run(self, url: str) -> str: - """Hit the URL and return the text.""" + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + arbitrary_types_allowed = True + + def get(self, url: str) -> str: + """GET the URL and return the text.""" return requests.get(url, headers=self.headers).text + + def post(self, url: str, data: Dict[str, Any]) -> str: + """POST to the URL and return the text.""" + return requests.post(url, json=data, headers=self.headers).text + + def patch(self, url: str, data: Dict[str, Any]) -> str: + """PATCH the URL and return the text.""" + return requests.patch(url, json=data, headers=self.headers).text + + def put(self, url: str, data: Dict[str, Any]) -> str: + """PUT the URL and return the text.""" + return requests.put(url, json=data, headers=self.headers).text + + def delete(self, url: str) -> str: + """DELETE the URL and return the text.""" + return requests.delete(url, headers=self.headers).text + + async def _arequest(self, method: str, url: str, **kwargs: Any) -> str: + """Make an async request.""" + if not self.aiosession: + async with aiohttp.ClientSession() as session: + async with session.request( + method, url, headers=self.headers, **kwargs + ) as response: + return await response.text() + else: + async with self.aiosession.request( + method, url, headers=self.headers, **kwargs + ) as response: + return await response.text() + + async def aget(self, url: str) -> str: + """GET the URL and return the text asynchronously.""" + return await self._arequest("GET", url) + + async def apost(self, url: str, data: Dict[str, Any]) -> str: + """POST to the URL and return the text asynchronously.""" + return await self._arequest("POST", url, json=data) + + async def apatch(self, url: str, data: Dict[str, Any]) -> str: + """PATCH the URL and return the text asynchronously.""" + return await self._arequest("PATCH", url, json=data) + + async def aput(self, url: str, data: Dict[str, Any]) -> str: + """PUT the URL and return the text asynchronously.""" + return await self._arequest("PUT", url, json=data) + + async def adelete(self, url: str) -> str: + """DELETE the URL and return the text asynchronously.""" + return await self._arequest("DELETE", url) diff --git a/langchain/sql_database.py b/langchain/sql_database.py index 190c5072..832a4184 100644 --- a/langchain/sql_database.py +++ b/langchain/sql_database.py @@ -5,7 +5,7 @@ from typing import Any, Iterable, List, Optional from sqlalchemy import MetaData, create_engine, inspect, select from sqlalchemy.engine import Engine -from sqlalchemy.exc import ProgrammingError +from sqlalchemy.exc import ProgrammingError, SQLAlchemyError from sqlalchemy.schema import CreateTable @@ -187,3 +187,33 @@ class SQLDatabase: raise ValueError("Fetch parameter must be either 'one' or 'all'") return str(result) return "" + + def get_table_info_no_throw(self, table_names: Optional[List[str]] = None) -> str: + """Get information about specified tables. + + Follows best practices as specified in: Rajkumar et al, 2022 + (https://arxiv.org/abs/2204.00498) + + If `sample_rows_in_table_info`, the specified number of sample rows will be + appended to each table description. This can increase performance as + demonstrated in the paper. + """ + try: + return self.get_table_info(table_names) + except ValueError as e: + """Format the error message""" + return f"Error: {e}" + + def run_no_throw(self, command: str, fetch: str = "all") -> str: + """Execute a SQL command and return a string representing the results. + + If the statement returns rows, a string of the results is returned. + If the statement returns no rows, an empty string is returned. + + If the statement throws an error, the error message is returned. + """ + try: + return self.run(command, fetch) + except SQLAlchemyError as e: + """Format the error message""" + return f"Error: {e}" diff --git a/langchain/tools/base.py b/langchain/tools/base.py index b3853818..9a40cde1 100644 --- a/langchain/tools/base.py +++ b/langchain/tools/base.py @@ -1,7 +1,7 @@ """Base implementation for tools or skills.""" from abc import abstractmethod -from typing import Any, List, Optional +from typing import Any, Optional from pydantic import BaseModel, Extra, Field, validator @@ -119,11 +119,3 @@ class BaseTool(BaseModel): observation, verbose=verbose, color=color, **kwargs ) return observation - - -class BaseToolkit(BaseModel): - """Class responsible for defining a collection of related tools.""" - - @abstractmethod - def get_tools(self) -> List[BaseTool]: - """Get the tools in the toolkit.""" diff --git a/langchain/tools/bing_search/toolkit.py b/langchain/tools/bing_search/toolkit.py deleted file mode 100644 index 1540698e..00000000 --- a/langchain/tools/bing_search/toolkit.py +++ /dev/null @@ -1,26 +0,0 @@ -"""Tool for the Bing search API.""" - -from typing import List - -from langchain.tools.base import BaseTool, BaseToolkit -from langchain.tools.bing_search.tool import BingSearchRun -from langchain.utilities.bing_search import BingSearchAPIWrapper - - -class BingSearchToolkit(BaseToolkit): - """Tool that adds the capability to query the Bing search API.""" - - bing_subscription_key: str - bing_search_url: str - - def get_tools(self) -> List[BaseTool]: - """Get the tools in the toolkit.""" - wrapper = BingSearchAPIWrapper( - bing_subscription_key=self.bing_subscription_key, - bing_search_url=self.bing_search_url, - ) - return [ - BingSearchRun( - api_wrapper=wrapper, - ) - ] diff --git a/langchain/tools/google_search/toolkit.py b/langchain/tools/google_search/toolkit.py deleted file mode 100644 index 18e87e0a..00000000 --- a/langchain/tools/google_search/toolkit.py +++ /dev/null @@ -1,26 +0,0 @@ -"""Tool for the Google search API.""" - -from typing import List - -from langchain.tools.base import BaseTool, BaseToolkit -from langchain.tools.google_search.tool import GoogleSearchRun -from langchain.utilities.google_search import GoogleSearchAPIWrapper - - -class GoogleSearchToolkit(BaseToolkit): - """Tool that adds the capability to query the Google search API.""" - - google_subscription_key: str - google_search_url: str - - def get_tools(self) -> List[BaseTool]: - """Get the tools in the toolkit.""" - wrapper = GoogleSearchAPIWrapper( - google_api_key=self.google_subscription_key, - google_search_url=self.google_search_url, - ) - return [ - GoogleSearchRun( - api_wrapper=wrapper, - ) - ] diff --git a/langchain/tools/json/__init__.py b/langchain/tools/json/__init__.py new file mode 100644 index 00000000..d13302f0 --- /dev/null +++ b/langchain/tools/json/__init__.py @@ -0,0 +1 @@ +"""Tools for interacting with a JSON file.""" diff --git a/langchain/tools/json/tool.py b/langchain/tools/json/tool.py new file mode 100644 index 00000000..9f1bdac7 --- /dev/null +++ b/langchain/tools/json/tool.py @@ -0,0 +1,113 @@ +# flake8: noqa +"""Tools for working with JSON specs.""" +from __future__ import annotations + +import json +import re +from pathlib import Path +from typing import Dict, List, Union + +from pydantic import BaseModel + +from langchain.tools.base import BaseTool + + +def _parse_input(text: str) -> List[Union[str, int]]: + """Parse input of the form data["key1"][0]["key2"] into a list of keys.""" + _res = re.findall(r"\[.*?]", text) + # strip the brackets and quotes, convert to int if possible + res = [i[1:-1].replace('"', "") for i in _res] + res = [int(i) if i.isdigit() else i for i in res] + return res + + +class JsonSpec(BaseModel): + """Base class for JSON spec.""" + + dict_: Dict + max_value_length: int = 200 + + @classmethod + def from_file(cls, path: Path) -> JsonSpec: + """Create a JsonSpec from a file.""" + if not path.exists(): + raise FileNotFoundError(f"File not found: {path}") + dict_ = json.loads(path.read_text()) + return cls(dict_=dict_) + + def keys(self, text: str) -> str: + """Return the keys of the dict at the given path. + + Args: + text: Python representation of the path to the dict (e.g. data["key1"][0]["key2"]). + """ + try: + items = _parse_input(text) + val = self.dict_ + for i in items: + if i: + val = val[i] + if not isinstance(val, dict): + raise ValueError( + f"Value at path `{text}` is not a dict, get the value directly." + ) + return str(list(val.keys())) + except Exception as e: + return repr(e) + + def value(self, text: str) -> str: + """Return the value of the dict at the given path. + + Args: + text: Python representation of the path to the dict (e.g. data["key1"][0]["key2"]). + """ + try: + items = _parse_input(text) + val = self.dict_ + for i in items: + val = val[i] + + if isinstance(val, dict) and len(str(val)) > self.max_value_length: + return "Value is a large dictionary, should explore its keys directly" + str_val = str(val) + if len(str_val) > self.max_value_length: + str_val = str_val[: self.max_value_length] + "..." + return str_val + except Exception as e: + return repr(e) + + +class JsonListKeysTool(BaseTool): + """Tool for listing keys in a JSON spec.""" + + name = "json_spec_list_keys" + description = """ + Can be used to list all keys at a given path. + Before calling this you should be SURE that the path to this exists. + The input is a text representation of the path to the dict in Python syntax (e.g. data["key1"][0]["key2"]). + """ + spec: JsonSpec + + def _run(self, tool_input: str) -> str: + return self.spec.keys(tool_input) + + async def _arun(self, tool_input: str) -> str: + return self._run(tool_input) + + +class JsonGetValueTool(BaseTool): + """Tool for getting a value in a JSON spec.""" + + name = "json_spec_get_value" + description = """ + Can be used to see value in string format at a given path. + Before calling this you should be SURE that the path to this exists. + The input is a text representation of the path to the dict in Python syntax (e.g. data["key1"][0]["key2"]). + """ + spec: JsonSpec + + def _run(self, tool_input: str) -> str: + return self.spec.value(tool_input) + + async def _arun(self, tool_input: str) -> str: + return self._run(tool_input) diff --git a/langchain/tools/python/__init__.py b/langchain/tools/python/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/langchain/tools/python/tool.py b/langchain/tools/python/tool.py new file mode 100644 index 00000000..fa16ff86 --- /dev/null +++ b/langchain/tools/python/tool.py @@ -0,0 +1,30 @@ +"""A tool for running python code in a REPL.""" + +from pydantic import Field + +from langchain.python import PythonREPL +from langchain.tools.base import BaseTool + + +def _get_default_python_repl() -> PythonREPL: + return PythonREPL(_globals=globals(), _locals=None) + + +class PythonREPLTool(BaseTool): + """A tool for running python code in a REPL.""" + + name = "Python REPL" + description = ( + "A Python shell. Use this to execute python commands. " + "Input should be a valid python command. " + "If you expect output it should be printed out." + ) + python_repl: PythonREPL = Field(default_factory=_get_default_python_repl) + + def _run(self, query: str) -> str: + """Use the tool.""" + return self.python_repl.run(query) + + async def _arun(self, query: str) -> str: + """Use the tool asynchronously.""" + raise NotImplementedError("PythonReplTool does not support async") diff --git a/langchain/tools/requests/__init__.py b/langchain/tools/requests/__init__.py new file mode 100644 index 00000000..ec421f18 --- /dev/null +++ b/langchain/tools/requests/__init__.py @@ -0,0 +1 @@ +"""Tools for making requests to an API endpoint.""" diff --git a/langchain/tools/requests/tool.py b/langchain/tools/requests/tool.py new file mode 100644 index 00000000..7802f598 --- /dev/null +++ b/langchain/tools/requests/tool.py @@ -0,0 +1,137 @@ +# flake8: noqa +"""Tools for making requests to an API endpoint.""" +import json +from typing import Any, Dict + +from pydantic import BaseModel + +from langchain.requests import RequestsWrapper +from langchain.tools.base import BaseTool + + +def _parse_input(text: str) -> Dict[str, Any]: + """Parse the json string into a dict.""" + return json.loads(text) + + +class BaseRequestsTool(BaseModel): + """Base class for requests tools.""" + + requests_wrapper: RequestsWrapper + + +class RequestsGetTool(BaseRequestsTool, BaseTool): + """Tool for making a GET request to an API endpoint.""" + + name = "requests_get" + description = "A portal to the internet. Use this when you need to get specific content from a website. Input should be a url (i.e. https://www.google.com). The output will be the text response of the GET request." + + def _run(self, url: str) -> str: + """Run the tool.""" + return self.requests_wrapper.get(url) + + async def _arun(self, url: str) -> str: + """Run the tool asynchronously.""" + return await self.requests_wrapper.aget(url) + + +class RequestsPostTool(BaseRequestsTool, BaseTool): + """Tool for making a POST request to an API endpoint.""" + + name = "requests_post" + description = """Use this when you want to POST to a website. + Input should be a json string with two keys: "url" and "data". + The value of "url" should be a string, and the value of "data" should be a dictionary of + key-value pairs you want to POST to the url. + Be careful to always use double quotes for strings in the json string + The output will be the text response of the POST request. + """ + + def _run(self, text: str) -> str: + """Run the tool.""" + try: + data = _parse_input(text) + return self.requests_wrapper.post(data["url"], data["data"]) + except Exception as e: + return repr(e) + + async def _arun(self, text: str) -> str: + """Run the tool asynchronously.""" + try: + data = _parse_input(text) + return await self.requests_wrapper.apost(data["url"], data["data"]) + except Exception as e: + return repr(e) + + +class RequestsPatchTool(BaseRequestsTool, BaseTool): + """Tool for making a PATCH request to an API endpoint.""" + + name = "requests_patch" + description = """Use this when you want to PATCH to a website. + Input should be a json string with two keys: "url" and "data". + The value of "url" should be a string, and the value of "data" should be a dictionary of + key-value pairs you want to PATCH to the url. + Be careful to always use double quotes for strings in the json string + The output will be the text response of the PATCH request. + """ + + def _run(self, text: str) -> str: + """Run the tool.""" + try: + data = _parse_input(text) + return self.requests_wrapper.patch(data["url"], data["data"]) + except Exception as e: + return repr(e) + + async def _arun(self, text: str) -> str: + """Run the tool asynchronously.""" + try: + data = _parse_input(text) + return await self.requests_wrapper.apatch(data["url"], data["data"]) + except Exception as e: + return repr(e) + + +class RequestsPutTool(BaseRequestsTool, BaseTool): + """Tool for making a PUT request to an API endpoint.""" + + name = "requests_put" + description = """Use this when you want to PUT to a website. + Input should be a json string with two keys: "url" and "data". + The value of "url" should be a string, and the value of "data" should be a dictionary of + key-value pairs you want to PUT to the url. + Be careful to always use double quotes for strings in the json string. + The output will be the text response of the PUT request. + """ + + def _run(self, text: str) -> str: + """Run the tool.""" + try: + data = _parse_input(text) + return self.requests_wrapper.put(data["url"], data["data"]) + except Exception as e: + return repr(e) + + async def _arun(self, text: str) -> str: + """Run the tool asynchronously.""" + try: + data = _parse_input(text) + return await self.requests_wrapper.aput(data["url"], data["data"]) + except Exception as e: + return repr(e) + + +class RequestsDeleteTool(BaseRequestsTool, BaseTool): + """Tool for making a DELETE request to an API endpoint.""" + + name = "requests_delete" + description = "A portal to the internet. Use this when you need to make a DELETE request to a URL. Input should be a specific url, and the output will be the text response of the DELETE request." + + def _run(self, url: str) -> str: + """Run the tool.""" + return self.requests_wrapper.delete(url) + + async def _arun(self, url: str) -> str: + """Run the tool asynchronously.""" + return await self.requests_wrapper.adelete(url) diff --git a/langchain/tools/sql_database/__init__.py b/langchain/tools/sql_database/__init__.py new file mode 100644 index 00000000..90fb3be1 --- /dev/null +++ b/langchain/tools/sql_database/__init__.py @@ -0,0 +1 @@ +"""Tools for interacting with a SQL database.""" diff --git a/langchain/tools/sql_database/prompt.py b/langchain/tools/sql_database/prompt.py new file mode 100644 index 00000000..8d2b0973 --- /dev/null +++ b/langchain/tools/sql_database/prompt.py @@ -0,0 +1,14 @@ +# flake8: noqa +QUERY_CHECKER = """ +{query} +Double check the {dialect} query above for common mistakes, including: +- Using NOT IN with NULL values +- Using UNION when UNION ALL should have been used +- Using BETWEEN for exclusive ranges +- Data type mismatch in predicates +- Properly quoting identifiers +- Using the correct number of arguments for functions +- Casting to the correct data type +- Using the proper columns for joins + +If there are any of the above mistakes, rewrite the query. If there are no mistakes, just reproduce the original query.""" diff --git a/langchain/tools/sql_database/tool.py b/langchain/tools/sql_database/tool.py new file mode 100644 index 00000000..2c555d17 --- /dev/null +++ b/langchain/tools/sql_database/tool.py @@ -0,0 +1,111 @@ +# flake8: noqa +"""Tools for interacting with a SQL database.""" +from pydantic import BaseModel, Extra, Field, validator + +from langchain.chains.llm import LLMChain +from langchain.llms.openai import OpenAI +from langchain.prompts import PromptTemplate +from langchain.sql_database import SQLDatabase +from langchain.tools.base import BaseTool +from langchain.tools.sql_database.prompt import QUERY_CHECKER + + +class BaseSQLDatabaseTool(BaseModel): + """Base tool for interacting with a SQL database.""" + + db: SQLDatabase = Field(exclude=True) + + # Override BaseTool.Config to appease mypy + # See https://github.com/pydantic/pydantic/issues/4173 + class Config(BaseTool.Config): + """Configuration for this pydantic object.""" + + arbitrary_types_allowed = True + extra = Extra.forbid + + +class QuerySQLDataBaseTool(BaseSQLDatabaseTool, BaseTool): + """Tool for querying a SQL database.""" + + name = "query_sql_db" + description = """ + Input to this tool is a detailed and correct SQL query, output is a result from the database. + If the query is not correct, an error message will be returned. + If an error is returned, rewrite the query, check the query, and try again. + """ + + def _run(self, query: str) -> str: + """Execute the query, return the results or an error message.""" + return self.db.run_no_throw(query) + + async def _arun(self, query: str) -> str: + raise NotImplementedError("QuerySqlDbTool does not support async") + + +class InfoSQLDatabaseTool(BaseSQLDatabaseTool, BaseTool): + """Tool for getting metadata about a SQL database.""" + + name = "schema_sql_db" + description = """ + Input to this tool is a comma-separated list of tables, output is the schema and sample rows for those tables. + Be sure that the tables actually exist by calling list_tables_sql_db first! + + Example Input: "table1, table2, table3" + """ + + def _run(self, table_names: str) -> str: + """Get the schema for tables in a comma-separated list.""" + return self.db.get_table_info_no_throw(table_names.split(", ")) + + async def _arun(self, table_name: str) -> str: + raise NotImplementedError("SchemaSqlDbTool does not support async") + + +class ListSQLDatabaseTool(BaseSQLDatabaseTool, BaseTool): + """Tool for getting tables names.""" + + name = "list_tables_sql_db" + description = "Input is an empty string, output is a comma separated list of tables in the database." + + def _run(self, tool_input: str = "") -> str: + """Get the schema for a specific table.""" + return ", ".join(self.db.get_table_names()) + + async def _arun(self, tool_input: str = "") -> str: + raise NotImplementedError("ListTablesSqlDbTool does not support async") + + +class QueryCheckerTool(BaseSQLDatabaseTool, BaseTool): + """Use an LLM to check if a query is correct. + Adapted from https://www.patterns.app/blog/2023/01/18/crunchbot-sql-analyst-gpt/""" + + template: str = QUERY_CHECKER + llm_chain: LLMChain = Field( + default_factory=lambda: LLMChain( + llm=OpenAI(temperature=0), + prompt=PromptTemplate( + template=QUERY_CHECKER, input_variables=["query", "dialect"] + ), + ) + ) + name = "query_checker_sql_db" + description = """ + Use this tool to double check if your query is correct before executing it. + Always use this tool before executing a query with query_sql_db! + """ + + @validator("llm_chain") + def validate_llm_chain_input_variables(cls, llm_chain: LLMChain) -> LLMChain: + """Make sure the LLM chain has the correct input variables.""" + if llm_chain.prompt.input_variables != ["query", "dialect"]: + raise ValueError( + "LLM chain for QueryCheckerTool must have input variables ['query', 'dialect']" + ) + return llm_chain + + def _run(self, query: str) -> str: + """Use the LLM to check the query.""" + return self.llm_chain.predict(query=query, dialect=self.db.dialect) + + async def _arun(self, query: str) -> str: + return await self.llm_chain.apredict(query=query, dialect=self.db.dialect) diff --git a/langchain/tools/vectorstore/__init__.py b/langchain/tools/vectorstore/__init__.py new file mode 100644 index 00000000..2bb63810 --- /dev/null +++ b/langchain/tools/vectorstore/__init__.py @@ -0,0 +1 @@ +"""Simple tool wrapper around VectorDBQA chain.""" diff --git a/langchain/tools/vectorstore/tool.py b/langchain/tools/vectorstore/tool.py new file mode 100644 index 00000000..ee5a0695 --- /dev/null +++ b/langchain/tools/vectorstore/tool.py @@ -0,0 +1,81 @@ +"""Tools for interacting with vectorstores.""" + +import json +from typing import Any, Dict + +from pydantic import BaseModel, Field + +from langchain.chains.qa_with_sources.vector_db import VectorDBQAWithSourcesChain +from langchain.chains.vector_db_qa.base import VectorDBQA +from langchain.llms.base import BaseLLM +from langchain.llms.openai import OpenAI +from langchain.tools.base import BaseTool +from langchain.vectorstores.base import VectorStore + + +class BaseVectorStoreTool(BaseModel): + """Base class for tools that use a VectorStore.""" + + vectorstore: VectorStore = Field(exclude=True) + llm: BaseLLM = Field(default_factory=lambda: OpenAI(temperature=0)) + + class Config(BaseTool.Config): + """Configuration for this pydantic object.""" + + arbitrary_types_allowed = True + + +def _create_description_from_template(values: Dict[str, Any]) -> Dict[str, Any]: + values["description"] = values["template"].format(name=values["name"]) + return values + + +class VectorStoreQATool(BaseVectorStoreTool, BaseTool): + """Tool for the VectorDBQA chain. To be initialized with name and chain.""" + + @staticmethod + def get_description(name: str, description: str) -> str: + template: str = ( + "Useful for when you need to answer questions about {name}. " + "Whenever you need information about {description} " + "you should ALWAYS use this. " + "Input should be a fully formed question." + ) + return template.format(name=name, description=description) + + def _run(self, query: str) -> str: + """Use the tool.""" + chain = VectorDBQA.from_chain_type(self.llm, vectorstore=self.vectorstore) + return chain.run(query) + + async def _arun(self, query: str) -> str: + """Use the tool asynchronously.""" + raise NotImplementedError("VectorDBQATool does not support async") + + +class VectorStoreQAWithSourcesTool(BaseVectorStoreTool, BaseTool): + """Tool for the VectorDBQAWithSources chain.""" + + @staticmethod + def get_description(name: str, description: str) -> str: + template: str = ( + "Useful for when you need to answer questions about {name} and the sources " + "used to construct the answer. " + "Whenever you need information about {description} " + "you should ALWAYS use this. " + " Input should be a fully formed question. " + "Output is a json serialized dictionary with keys `answer` and `sources`. " + "Only use this tool if the user explicitly asks for sources." + ) + return template.format(name=name, description=description) + + def _run(self, query: str) -> str: + """Use the tool.""" + chain = VectorDBQAWithSourcesChain.from_chain_type( + self.llm, vectorstore=self.vectorstore + ) + return json.dumps(chain({chain.question_key: query}, return_only_outputs=True)) + + async def _arun(self, query: str) -> str: + """Use the tool asynchronously.""" + raise NotImplementedError("VectorDBQATool does not support async") diff --git a/langchain/tools/wolfram_alpha/toolkit.py b/langchain/tools/wolfram_alpha/toolkit.py deleted file mode 100644 index 0576b680..00000000 --- a/langchain/tools/wolfram_alpha/toolkit.py +++ /dev/null @@ -1,22 +0,0 @@ -"""Toolkit for the Wolfram Alpha API.""" - -from typing import List - -from langchain.tools.base import BaseTool, BaseToolkit -from langchain.tools.wolfram_alpha.tool import WolframAlphaQueryRun -from langchain.utilities.wolfram_alpha import WolframAlphaAPIWrapper - - -class WolframAlphaToolkit(BaseToolkit): - """Tool that adds the capability to interact with Wolfram Alpha.""" - - wolfram_alpha_appid: str - - def get_tools(self) -> List[BaseTool]: - """Get the tools in the toolkit.""" - wrapper = WolframAlphaAPIWrapper(wolfram_alpha_appid=self.wolfram_alpha_appid) - return [ - WolframAlphaQueryRun( - api_wrapper=wrapper, - ) - ] diff --git a/tests/unit_tests/chains/test_api.py b/tests/unit_tests/chains/test_api.py index 7a643632..8749d100 100644 --- a/tests/unit_tests/chains/test_api.py +++ b/tests/unit_tests/chains/test_api.py @@ -16,7 +16,7 @@ class FakeRequestsChain(RequestsWrapper): output: str - def run(self, url: str) -> str: + def get(self, url: str) -> str: """Just return the specified output.""" return self.output diff --git a/tests/unit_tests/test_python.py b/tests/unit_tests/test_python.py index 5d5461d3..d1eb3ae9 100644 --- a/tests/unit_tests/test_python.py +++ b/tests/unit_tests/test_python.py @@ -9,11 +9,13 @@ def test_python_repl() -> None: # Run a simple initial command. repl.run("foo = 1") - assert repl._locals["foo"] == 1 + assert repl.locals is not None + assert repl.locals["foo"] == 1 # Now run a command that accesses `foo` to make sure it still has it. repl.run("bar = foo * 2") - assert repl._locals["bar"] == 2 + assert repl.locals is not None + assert repl.locals["bar"] == 2 def test_python_repl_no_previous_variables() -> None: @@ -29,7 +31,8 @@ def test_python_repl_pass_in_locals() -> None: _locals = {"foo": 4} repl = PythonREPL(_locals=_locals) repl.run("bar = foo * 2") - assert repl._locals["bar"] == 8 + assert repl.locals is not None + assert repl.locals["bar"] == 8 def test_functionality() -> None: @@ -38,3 +41,15 @@ def test_functionality() -> None: code = "print(1 + 1)" output = chain.run(code) assert output == "2\n" + + +def test_function() -> None: + """Test correct functionality.""" + chain = PythonREPL() + code = "def add(a, b): " " return a + b" + output = chain.run(code) + assert output == "" + + code = "print(add(1, 2))" + output = chain.run(code) + assert output == "3\n" diff --git a/tests/unit_tests/tools/__init__.py b/tests/unit_tests/tools/__init__.py new file mode 100644 index 00000000..9c521669 --- /dev/null +++ b/tests/unit_tests/tools/__init__.py @@ -0,0 +1 @@ +"""Test suite for the tools module.""" diff --git a/tests/unit_tests/tools/test_json.py b/tests/unit_tests/tools/test_json.py new file mode 100644 index 00000000..36a96595 --- /dev/null +++ b/tests/unit_tests/tools/test_json.py @@ -0,0 +1,49 @@ +"""Test functionality of JSON tools.""" +from pathlib import Path + +from langchain.tools.json.tool import JsonSpec + + +def test_json_spec_from_file(tmp_path: Path) -> None: + """Test JsonSpec can be constructed from a file.""" + path = tmp_path / "test.json" + path.write_text('{"foo": "bar"}') + spec = JsonSpec.from_file(path) + assert spec.dict_ == {"foo": "bar"} + + +def test_json_spec_keys() -> None: + """Test JsonSpec can return keys of a dict at given path.""" + spec = JsonSpec(dict_={"foo": "bar", "baz": {"test": {"foo": [1, 2, 3]}}}) + assert spec.keys("data") == "['foo', 'baz']" + assert "ValueError" in spec.keys('data["foo"]') + assert spec.keys('data["baz"]') == "['test']" + assert spec.keys('data["baz"]["test"]') == "['foo']" + assert "ValueError" in spec.keys('data["baz"]["test"]["foo"]') + + +def test_json_spec_value() -> None: + """Test JsonSpec can return value of a dict at given path.""" + spec = JsonSpec(dict_={"foo": "bar", "baz": {"test": {"foo": [1, 2, 3]}}}) + assert spec.value("data") == "{'foo': 'bar', 'baz': {'test': {'foo': [1, 2, 3]}}}" + assert spec.value('data["foo"]') == "bar" + assert spec.value('data["baz"]') == "{'test': {'foo': [1, 2, 3]}}" + assert spec.value('data["baz"]["test"]') == "{'foo': [1, 2, 3]}" + assert spec.value('data["baz"]["test"]["foo"]') == "[1, 2, 3]" + + +def test_json_spec_value_max_length() -> None: + """Test JsonSpec can return value of a dict at given path.""" + spec = JsonSpec( + dict_={"foo": "bar", "baz": {"test": {"foo": [1, 2, 3]}}}, max_value_length=5 + ) + assert spec.value('data["foo"]') == "bar" + assert ( + spec.value('data["baz"]') + == "Value is a large dictionary, should explore its keys directly" + ) + assert ( + spec.value('data["baz"]["test"]') + == "Value is a large dictionary, should explore its keys directly" + ) + assert spec.value('data["baz"]["test"]["foo"]') == "[1, 2..."