mirror of
https://github.com/hwchase17/langchain
synced 2024-10-29 17:07:25 +00:00
470 lines
14 KiB
Plaintext
470 lines
14 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "0ed6aab1",
|
|
"metadata": {
|
|
"pycharm": {
|
|
"name": "#%% md\n"
|
|
}
|
|
},
|
|
"source": [
|
|
"# SQLite example\n",
|
|
"\n",
|
|
"This example showcases hooking up an LLM to answer questions over a database."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "b2f66479",
|
|
"metadata": {
|
|
"pycharm": {
|
|
"name": "#%% md\n"
|
|
}
|
|
},
|
|
"source": [
|
|
"This uses the example Chinook database.\n",
|
|
"To set it up follow the instructions on https://database.guide/2-sample-databases-sqlite/, placing the `.db` file in a notebooks folder at the root of this repository."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"id": "d0e27d88",
|
|
"metadata": {
|
|
"pycharm": {
|
|
"name": "#%%\n"
|
|
}
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"from langchain import OpenAI, SQLDatabase, SQLDatabaseChain"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"id": "72ede462",
|
|
"metadata": {
|
|
"pycharm": {
|
|
"name": "#%%\n"
|
|
}
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"db = SQLDatabase.from_uri(\"sqlite:///../../../../notebooks/Chinook.db\")\n",
|
|
"llm = OpenAI(temperature=0)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"id": "a8fc8f23",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"db_chain = SQLDatabaseChain(llm=llm, database=db, verbose=True)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 4,
|
|
"id": "15ff81df",
|
|
"metadata": {
|
|
"pycharm": {
|
|
"name": "#%%\n"
|
|
}
|
|
},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"\n",
|
|
"\n",
|
|
"\u001b[1m> Entering new SQLDatabaseChain chain...\u001b[0m\n",
|
|
"How many employees are there? \n",
|
|
"SQLQuery:\u001b[32;1m\u001b[1;3m SELECT COUNT(*) FROM Employee;\u001b[0m\n",
|
|
"SQLResult: \u001b[33;1m\u001b[1;3m[(9,)]\u001b[0m\n",
|
|
"Answer:\u001b[32;1m\u001b[1;3m There are 9 employees.\u001b[0m\n",
|
|
"\u001b[1m> Finished chain.\u001b[0m\n"
|
|
]
|
|
},
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"' There are 9 employees.'"
|
|
]
|
|
},
|
|
"execution_count": 4,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"db_chain.run(\"How many employees are there?\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "aad2cba6",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Customize Prompt\n",
|
|
"You can also customize the prompt that is used. Here is an example prompting it to understand that foobar is the same as the Employee table"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 5,
|
|
"id": "8ca7bafb",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from langchain.prompts.prompt import PromptTemplate\n",
|
|
"\n",
|
|
"_DEFAULT_TEMPLATE = \"\"\"Given an input question, first create a syntactically correct {dialect} query to run, then look at the results of the query and return the answer.\n",
|
|
"Use the following format:\n",
|
|
"\n",
|
|
"Question: \"Question here\"\n",
|
|
"SQLQuery: \"SQL Query to run\"\n",
|
|
"SQLResult: \"Result of the SQLQuery\"\n",
|
|
"Answer: \"Final answer here\"\n",
|
|
"\n",
|
|
"Only use the following tables:\n",
|
|
"\n",
|
|
"{table_info}\n",
|
|
"\n",
|
|
"If someone asks for the table foobar, they really mean the employee table.\n",
|
|
"\n",
|
|
"Question: {input}\"\"\"\n",
|
|
"PROMPT = PromptTemplate(\n",
|
|
" input_variables=[\"input\", \"table_info\", \"dialect\"], template=_DEFAULT_TEMPLATE\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 6,
|
|
"id": "ec47a2bf",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"db_chain = SQLDatabaseChain(llm=llm, database=db, prompt=PROMPT, verbose=True)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 7,
|
|
"id": "ebb0674e",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"\n",
|
|
"\n",
|
|
"\u001b[1m> Entering new SQLDatabaseChain chain...\u001b[0m\n",
|
|
"How many employees are there in the foobar table? \n",
|
|
"SQLQuery:\u001b[32;1m\u001b[1;3m SELECT COUNT(*) FROM Employee;\u001b[0m\n",
|
|
"SQLResult: \u001b[33;1m\u001b[1;3m[(9,)]\u001b[0m\n",
|
|
"Answer:\u001b[32;1m\u001b[1;3m There are 9 employees in the foobar table.\u001b[0m\n",
|
|
"\u001b[1m> Finished chain.\u001b[0m\n"
|
|
]
|
|
},
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"' There are 9 employees in the foobar table.'"
|
|
]
|
|
},
|
|
"execution_count": 7,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"db_chain.run(\"How many employees are there in the foobar table?\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "88d8b969",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Return Intermediate Steps\n",
|
|
"\n",
|
|
"You can also return the intermediate steps of the SQLDatabaseChain. This allows you to access the SQL statement that was generated, as well as the result of running that against the SQL Database."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 8,
|
|
"id": "38559487",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"db_chain = SQLDatabaseChain(llm=llm, database=db, prompt=PROMPT, verbose=True, return_intermediate_steps=True)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 10,
|
|
"id": "78b6af4d",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"\n",
|
|
"\n",
|
|
"\u001b[1m> Entering new SQLDatabaseChain chain...\u001b[0m\n",
|
|
"How many employees are there in the foobar table? \n",
|
|
"SQLQuery:\u001b[32;1m\u001b[1;3m SELECT COUNT(*) FROM Employee;\u001b[0m\n",
|
|
"SQLResult: \u001b[33;1m\u001b[1;3m[(9,)]\u001b[0m\n",
|
|
"Answer:\u001b[32;1m\u001b[1;3m There are 9 employees in the foobar table.\u001b[0m\n",
|
|
"\u001b[1m> Finished chain.\u001b[0m\n"
|
|
]
|
|
},
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"[' SELECT COUNT(*) FROM Employee;', '[(9,)]']"
|
|
]
|
|
},
|
|
"execution_count": 10,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"result = db_chain(\"How many employees are there in the foobar table?\")\n",
|
|
"result[\"intermediate_steps\"]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "b408f800",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Choosing how to limit the number of rows returned\n",
|
|
"If you are querying for several rows of a table you can select the maximum number of results you want to get by using the 'top_k' parameter (default is 10). This is useful for avoiding query results that exceed the prompt max length or consume tokens unnecessarily."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"id": "6adaa799",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"db_chain = SQLDatabaseChain(llm=llm, database=db, verbose=True, top_k=3)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 8,
|
|
"id": "edfc8a8e",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"\n",
|
|
"\n",
|
|
"\u001b[1m> Entering new SQLDatabaseChain chain...\u001b[0m\n",
|
|
"What are some example tracks by composer Johann Sebastian Bach? \n",
|
|
"SQLQuery:\u001b[32;1m\u001b[1;3m SELECT Name FROM Track WHERE Composer = 'Johann Sebastian Bach' LIMIT 3;\u001b[0m\n",
|
|
"SQLResult: \u001b[33;1m\u001b[1;3m[('Concerto for 2 Violins in D Minor, BWV 1043: I. Vivace',), ('Aria Mit 30 Veränderungen, BWV 988 \"Goldberg Variations\": Aria',), ('Suite for Solo Cello No. 1 in G Major, BWV 1007: I. Prélude',)]\u001b[0m\n",
|
|
"Answer:\u001b[32;1m\u001b[1;3m Examples of tracks by Johann Sebastian Bach include 'Concerto for 2 Violins in D Minor, BWV 1043: I. Vivace', 'Aria Mit 30 Veränderungen, BWV 988 \"Goldberg Variations\": Aria', and 'Suite for Solo Cello No. 1 in G Major, BWV 1007: I. Prélude'.\u001b[0m\n",
|
|
"\u001b[1m> Finished chain.\u001b[0m\n"
|
|
]
|
|
},
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"' Examples of tracks by Johann Sebastian Bach include \\'Concerto for 2 Violins in D Minor, BWV 1043: I. Vivace\\', \\'Aria Mit 30 Veränderungen, BWV 988 \"Goldberg Variations\": Aria\\', and \\'Suite for Solo Cello No. 1 in G Major, BWV 1007: I. Prélude\\'.'"
|
|
]
|
|
},
|
|
"execution_count": 8,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"db_chain.run(\"What are some example tracks by composer Johann Sebastian Bach?\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "bcc5e936",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Adding first row of each table\n",
|
|
"Sometimes, the format of the data is not obvious and it is optimal to include the first row of the table in the prompt to allow the LLM to understand the data before providing a final query. Here we will use this feature to let the LLM know that artists are saved with their full names."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 11,
|
|
"id": "9a22ee47",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"db = SQLDatabase.from_uri(\n",
|
|
" \"sqlite:///../../../../notebooks/Chinook.db\", \n",
|
|
" include_tables=['Track'], # we include only one table to save tokens in the prompt :)\n",
|
|
" sample_row_in_table_info=True)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 12,
|
|
"id": "bcb7a489",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"db_chain = SQLDatabaseChain(llm=llm, database=db, verbose=True)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 13,
|
|
"id": "81e05d82",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"\n",
|
|
"\n",
|
|
"\u001b[1m> Entering new SQLDatabaseChain chain...\u001b[0m\n",
|
|
"What are some example tracks by Bach? \n",
|
|
"SQLQuery:Table 'Track' has columns: TrackId (INTEGER), Name (NVARCHAR(200)), AlbumId (INTEGER), MediaTypeId (INTEGER), GenreId (INTEGER), Composer (NVARCHAR(220)), Milliseconds (INTEGER), Bytes (INTEGER), UnitPrice (NUMERIC(10, 2)). Here is an example row for this table (long strings are truncated): ['1', 'For Those About To Rock (We Salute You)', '1', '1', '1', 'Angus Young, Malcolm Young, Brian Johnson', '343719', '11170334', '0.99'].\n",
|
|
"\u001b[32;1m\u001b[1;3m SELECT TrackId, Name, Composer FROM Track WHERE Composer LIKE '%Bach%' ORDER BY Name LIMIT 5;\u001b[0m\n",
|
|
"SQLResult: \u001b[33;1m\u001b[1;3m[(1709, 'American Woman', 'B. Cummings/G. Peterson/M.J. Kale/R. Bachman'), (3408, 'Aria Mit 30 Veränderungen, BWV 988 \"Goldberg Variations\": Aria', 'Johann Sebastian Bach'), (3433, 'Concerto No.2 in F Major, BWV1047, I. Allegro', 'Johann Sebastian Bach'), (3407, 'Concerto for 2 Violins in D Minor, BWV 1043: I. Vivace', 'Johann Sebastian Bach'), (3490, 'Partita in E Major, BWV 1006A: I. Prelude', 'Johann Sebastian Bach')]\u001b[0m\n",
|
|
"Answer:\u001b[32;1m\u001b[1;3m Some example tracks by Bach are 'American Woman', 'Aria Mit 30 Veränderungen, BWV 988 \"Goldberg Variations\": Aria', 'Concerto No.2 in F Major, BWV1047, I. Allegro', 'Concerto for 2 Violins in D Minor, BWV 1043: I. Vivace', and 'Partita in E Major, BWV 1006A: I. Prelude'.\u001b[0m\n",
|
|
"\u001b[1m> Finished chain.\u001b[0m\n"
|
|
]
|
|
},
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"' Some example tracks by Bach are \\'American Woman\\', \\'Aria Mit 30 Veränderungen, BWV 988 \"Goldberg Variations\": Aria\\', \\'Concerto No.2 in F Major, BWV1047, I. Allegro\\', \\'Concerto for 2 Violins in D Minor, BWV 1043: I. Vivace\\', and \\'Partita in E Major, BWV 1006A: I. Prelude\\'.'"
|
|
]
|
|
},
|
|
"execution_count": 13,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"db_chain.run(\"What are some example tracks by Bach?\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "c12ae15a",
|
|
"metadata": {},
|
|
"source": [
|
|
"## SQLDatabaseSequentialChain\n",
|
|
"\n",
|
|
"Chain for querying SQL database that is a sequential chain.\n",
|
|
"\n",
|
|
"The chain is as follows:\n",
|
|
"\n",
|
|
" 1. Based on the query, determine which tables to use.\n",
|
|
" 2. Based on those tables, call the normal SQL database chain.\n",
|
|
"\n",
|
|
"This is useful in cases where the number of tables in the database is large."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"id": "e59a4740",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from langchain.chains import SQLDatabaseSequentialChain"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 4,
|
|
"id": "58bb49b6",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"chain = SQLDatabaseSequentialChain.from_llm(llm, db, verbose=True)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 5,
|
|
"id": "95017b1a",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"\n",
|
|
"\n",
|
|
"\u001b[1m> Entering new SQLDatabaseSequentialChain chain...\u001b[0m\n",
|
|
"Table names to use:\n",
|
|
"\u001b[33;1m\u001b[1;3m['Customer', 'Employee']\u001b[0m\n",
|
|
"\n",
|
|
"\u001b[1m> Entering new SQLDatabaseChain chain...\u001b[0m\n",
|
|
"How many employees are also customers? \n",
|
|
"SQLQuery:\u001b[32;1m\u001b[1;3m SELECT COUNT(*) FROM Customer c INNER JOIN Employee e ON c.SupportRepId = e.EmployeeId;\u001b[0m\n",
|
|
"SQLResult: \u001b[33;1m\u001b[1;3m[(59,)]\u001b[0m\n",
|
|
"Answer:\u001b[32;1m\u001b[1;3m There are 59 employees who are also customers.\u001b[0m\n",
|
|
"\u001b[1m> Finished chain.\u001b[0m\n",
|
|
"\n",
|
|
"\u001b[1m> Finished chain.\u001b[0m\n"
|
|
]
|
|
},
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"' There are 59 employees who are also customers.'"
|
|
]
|
|
},
|
|
"execution_count": 5,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"chain.run(\"How many employees are also customers?\")"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3 (ipykernel)",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.10.9"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|