forked from Archives/langchain
6be5f4e4c4
Co-authored-by: Bruno Bornsztein <bruno.bornsztein@gmail.com>
354 lines
9.5 KiB
Plaintext
354 lines
9.5 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "0ed6aab1",
|
|
"metadata": {
|
|
"pycharm": {
|
|
"name": "#%% md\n"
|
|
}
|
|
},
|
|
"source": [
|
|
"# SQLite example\n",
|
|
"\n",
|
|
"This example showcases hooking up an LLM to answer questions over a database."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "b2f66479",
|
|
"metadata": {
|
|
"pycharm": {
|
|
"name": "#%% md\n"
|
|
}
|
|
},
|
|
"source": [
|
|
"This uses the example Chinook database.\n",
|
|
"To set it up follow the instructions on https://database.guide/2-sample-databases-sqlite/, placing the `.db` file in a notebooks folder at the root of this repository."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"id": "d0e27d88",
|
|
"metadata": {
|
|
"pycharm": {
|
|
"name": "#%%\n"
|
|
}
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"from langchain import OpenAI, SQLDatabase, SQLDatabaseChain"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"id": "72ede462",
|
|
"metadata": {
|
|
"pycharm": {
|
|
"name": "#%%\n"
|
|
}
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"db = SQLDatabase.from_uri(\"sqlite:///../../../../notebooks/Chinook.db\")\n",
|
|
"llm = OpenAI(temperature=0)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "a8fc8f23",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"db_chain = SQLDatabaseChain(llm=llm, database=db, verbose=True)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"id": "15ff81df",
|
|
"metadata": {
|
|
"pycharm": {
|
|
"name": "#%%\n"
|
|
}
|
|
},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"\n",
|
|
"\n",
|
|
"\u001b[1m> Entering new SQLDatabaseChain chain...\u001b[0m\n",
|
|
"How many employees are there? \n",
|
|
"SQLQuery:\u001b[32;1m\u001b[1;3m SELECT COUNT(*) FROM Employee;\u001b[0m\n",
|
|
"SQLResult: \u001b[33;1m\u001b[1;3m[(9,)]\u001b[0m\n",
|
|
"Answer:\u001b[32;1m\u001b[1;3m There are 9 employees.\u001b[0m\n",
|
|
"\u001b[1m> Finished chain.\u001b[0m\n"
|
|
]
|
|
},
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"' There are 9 employees.'"
|
|
]
|
|
},
|
|
"execution_count": 3,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"db_chain.run(\"How many employees are there?\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "aad2cba6",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Customize Prompt\n",
|
|
"You can also customize the prompt that is used. Here is an example prompting it to understand that foobar is the same as the Employee table"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 5,
|
|
"id": "8ca7bafb",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from langchain.prompts.prompt import PromptTemplate\n",
|
|
"\n",
|
|
"_DEFAULT_TEMPLATE = \"\"\"Given an input question, first create a syntactically correct {dialect} query to run, then look at the results of the query and return the answer.\n",
|
|
"Use the following format:\n",
|
|
"\n",
|
|
"Question: \"Question here\"\n",
|
|
"SQLQuery: \"SQL Query to run\"\n",
|
|
"SQLResult: \"Result of the SQLQuery\"\n",
|
|
"Answer: \"Final answer here\"\n",
|
|
"\n",
|
|
"Only use the following tables:\n",
|
|
"\n",
|
|
"{table_info}\n",
|
|
"\n",
|
|
"If someone asks for the table foobar, they really mean the employee table.\n",
|
|
"\n",
|
|
"Question: {input}\"\"\"\n",
|
|
"PROMPT = PromptTemplate(\n",
|
|
" input_variables=[\"input\", \"table_info\", \"dialect\"], template=_DEFAULT_TEMPLATE\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 6,
|
|
"id": "ec47a2bf",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"db_chain = SQLDatabaseChain(llm=llm, database=db, prompt=PROMPT, verbose=True)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 7,
|
|
"id": "ebb0674e",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"\n",
|
|
"\n",
|
|
"\u001b[1m> Entering new SQLDatabaseChain chain...\u001b[0m\n",
|
|
"How many employees are there in the foobar table? \n",
|
|
"SQLQuery:\u001b[32;1m\u001b[1;3m SELECT COUNT(*) FROM Employee;\u001b[0m\n",
|
|
"SQLResult: \u001b[33;1m\u001b[1;3m[(9,)]\u001b[0m\n",
|
|
"Answer:\u001b[32;1m\u001b[1;3m There are 9 employees in the foobar table.\u001b[0m\n",
|
|
"\u001b[1m> Finished chain.\u001b[0m\n"
|
|
]
|
|
},
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"' There are 9 employees in the foobar table.'"
|
|
]
|
|
},
|
|
"execution_count": 7,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"db_chain.run(\"How many employees are there in the foobar table?\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "b408f800",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Choosing how to limit the number of rows returned\n",
|
|
"If you are querying for several rows of a table you can select the maximum number of results you want to get by using the 'top_k' parameter (default is 10). This is useful for avoiding query results that exceed the prompt max length or consume tokens unnecessarily."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"id": "6adaa799",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"db_chain = SQLDatabaseChain(llm=llm, database=db, verbose=True, top_k=3)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 8,
|
|
"id": "edfc8a8e",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"\n",
|
|
"\n",
|
|
"\u001b[1m> Entering new SQLDatabaseChain chain...\u001b[0m\n",
|
|
"What are some example tracks by composer Johann Sebastian Bach? \n",
|
|
"SQLQuery:\u001b[32;1m\u001b[1;3m SELECT Name FROM Track WHERE Composer = 'Johann Sebastian Bach' LIMIT 3;\u001b[0m\n",
|
|
"SQLResult: \u001b[33;1m\u001b[1;3m[('Concerto for 2 Violins in D Minor, BWV 1043: I. Vivace',), ('Aria Mit 30 Veränderungen, BWV 988 \"Goldberg Variations\": Aria',), ('Suite for Solo Cello No. 1 in G Major, BWV 1007: I. Prélude',)]\u001b[0m\n",
|
|
"Answer:\u001b[32;1m\u001b[1;3m Examples of tracks by Johann Sebastian Bach include 'Concerto for 2 Violins in D Minor, BWV 1043: I. Vivace', 'Aria Mit 30 Veränderungen, BWV 988 \"Goldberg Variations\": Aria', and 'Suite for Solo Cello No. 1 in G Major, BWV 1007: I. Prélude'.\u001b[0m\n",
|
|
"\u001b[1m> Finished chain.\u001b[0m\n"
|
|
]
|
|
},
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"' Examples of tracks by Johann Sebastian Bach include \\'Concerto for 2 Violins in D Minor, BWV 1043: I. Vivace\\', \\'Aria Mit 30 Veränderungen, BWV 988 \"Goldberg Variations\": Aria\\', and \\'Suite for Solo Cello No. 1 in G Major, BWV 1007: I. Prélude\\'.'"
|
|
]
|
|
},
|
|
"execution_count": 8,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"db_chain.run(\"What are some example tracks by composer Johann Sebastian Bach?\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "c12ae15a",
|
|
"metadata": {},
|
|
"source": [
|
|
"## SQLDatabaseSequentialChain\n",
|
|
"\n",
|
|
"Chain for querying SQL database that is a sequential chain.\n",
|
|
"\n",
|
|
"The chain is as follows:\n",
|
|
"\n",
|
|
" 1. Based on the query, determine which tables to use.\n",
|
|
" 2. Based on those tables, call the normal SQL database chain.\n",
|
|
"\n",
|
|
"This is useful in cases where the number of tables in the database is large."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"id": "e59a4740",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from langchain.chains import SQLDatabaseSequentialChain"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 4,
|
|
"id": "58bb49b6",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"chain = SQLDatabaseSequentialChain.from_llm(llm, db, verbose=True)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 5,
|
|
"id": "95017b1a",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"\n",
|
|
"\n",
|
|
"\u001b[1m> Entering new SQLDatabaseSequentialChain chain...\u001b[0m\n",
|
|
"Table names to use:\n",
|
|
"\u001b[33;1m\u001b[1;3m['Customer', 'Employee']\u001b[0m\n",
|
|
"\n",
|
|
"\u001b[1m> Entering new SQLDatabaseChain chain...\u001b[0m\n",
|
|
"How many employees are also customers? \n",
|
|
"SQLQuery:\u001b[32;1m\u001b[1;3m SELECT COUNT(*) FROM Customer c INNER JOIN Employee e ON c.SupportRepId = e.EmployeeId;\u001b[0m\n",
|
|
"SQLResult: \u001b[33;1m\u001b[1;3m[(59,)]\u001b[0m\n",
|
|
"Answer:\u001b[32;1m\u001b[1;3m There are 59 employees who are also customers.\u001b[0m\n",
|
|
"\u001b[1m> Finished chain.\u001b[0m\n",
|
|
"\n",
|
|
"\u001b[1m> Finished chain.\u001b[0m\n"
|
|
]
|
|
},
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"' There are 59 employees who are also customers.'"
|
|
]
|
|
},
|
|
"execution_count": 5,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"chain.run(\"How many employees are also customers?\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "b2998b03",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3 (ipykernel)",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.10.9"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|