Harrison/new api chain (#623)

Co-authored-by: Francisco Ingham <fpingham@gmail.com> Co-authored-by: lesscomfortable <pancho_ingham@hotmail.com>
1 year ago · 3d43906572
parent 1c71fadfdc
commit 3d43906572
6 changed files with 263 additions and 6 deletions
--- a/docs/modules/chains/examples/api.ipynb
+++ b/docs/modules/chains/examples/api.ipynb
--- a/docs/modules/chains/examples/sqlite.ipynb
+++ b/docs/modules/chains/examples/sqlite.ipynb
@ -53,7 +53,16 @@
   "outputs": [],
   "source": [
    "db = SQLDatabase.from_uri(\"sqlite:///../../../../notebooks/Chinook.db\")\n",
-    "llm = OpenAI(temperature=0)\n",
+    "llm = OpenAI(temperature=0)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a8fc8f23",
+   "metadata": {},
+   "outputs": [],
+   "source": [
    "db_chain = SQLDatabaseChain(llm=llm, database=db, verbose=True)"
   ]
  },
@ -179,6 +188,60 @@
    "db_chain.run(\"How many employees are there in the foobar table?\")"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "id": "b408f800",
+   "metadata": {},
+   "source": [
+    "## Choosing how to limit the number of rows returned\n",
+    "If you are querying for several rows of a table you can select the maximum number of results you want to get by using the 'top_k' parameter (default is 10). This is useful for avoiding query results that exceed the prompt max length or consume tokens unnecessarily."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "6adaa799",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "db_chain = SQLDatabaseChain(llm=llm, database=db, verbose=True, top_k=3)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "edfc8a8e",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "\u001b[1m> Entering new SQLDatabaseChain chain...\u001b[0m\n",
+      "What are some example tracks by composer Johann Sebastian Bach? \n",
+      "SQLQuery:\u001b[32;1m\u001b[1;3m SELECT Name FROM Track WHERE Composer = 'Johann Sebastian Bach' LIMIT 3;\u001b[0m\n",
+      "SQLResult: \u001b[33;1m\u001b[1;3m[('Concerto for 2 Violins in D Minor, BWV 1043: I. Vivace',), ('Aria Mit 30 Veränderungen, BWV 988 \"Goldberg Variations\": Aria',), ('Suite for Solo Cello No. 1 in G Major, BWV 1007: I. Prélude',)]\u001b[0m\n",
+      "Answer:\u001b[32;1m\u001b[1;3m Examples of tracks by Johann Sebastian Bach include 'Concerto for 2 Violins in D Minor, BWV 1043: I. Vivace', 'Aria Mit 30 Veränderungen, BWV 988 \"Goldberg Variations\": Aria', and 'Suite for Solo Cello No. 1 in G Major, BWV 1007: I. Prélude'.\u001b[0m\n",
+      "\u001b[1m> Finished chain.\u001b[0m\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "' Examples of tracks by Johann Sebastian Bach include \\'Concerto for 2 Violins in D Minor, BWV 1043: I. Vivace\\', \\'Aria Mit 30 Veränderungen, BWV 988 \"Goldberg Variations\": Aria\\', and \\'Suite for Solo Cello No. 1 in G Major, BWV 1007: I. Prélude\\'.'"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "db_chain.run(\"What are some example tracks by composer Johann Sebastian Bach?\")"
+   ]
+  },
  {
   "cell_type": "markdown",
   "id": "c12ae15a",
--- a/docs/modules/chains/utility_how_to.rst
+++ b/docs/modules/chains/utility_how_to.rst
@ -24,6 +24,12 @@ The examples here are all end-to-end chains for specific applications, focused o
 - **Notes**: This chain takes user input (a question), uses a first LLM chain to construct a SQL query to run against the SQL database, and then uses another LLMChain to take the results of that query and use it to answer the original question.
 - `Example Notebook <./examples/sqlite.html>`_

+**API Chain**
+
+- **Links Used**: LLMChain, Requests
+- **Notes**: This chain first uses a LLM to construct the url to hit, then makes that request with the Requests wrapper, and finally runs that result through the language model again in order to product a natural language response.
+- `Example Notebook <./examples/api.html>`_
+
 **LLMBash Chain**

 - **Links Used**: BashProcess, LLMChain
--- a/langchain/chains/api/prompt.py
+++ b/langchain/chains/api/prompt.py
@ -2,12 +2,13 @@
 from langchain.prompts.prompt import PromptTemplate

 API_URL_PROMPT_TEMPLATE = """You are given the below API Documentation:
-
 {api_docs}
+Using this documentation, generate the full API url to call for answering the user question.
+You should build the API url so as to get a response that is as short as possible, while still getting the necessary information to answer the question. Pay attention in the following example, how there are many pieces of information that are deliberately not included in the API call. You should do the same.

-Using this documentation, generate the full API url to call for answering this question: {question}
+Question:{question}
+API url:"""

-API url: """
 API_URL_PROMPT = PromptTemplate(
    input_variables=[
        "api_docs",
--- a/langchain/chains/sql_database/base.py
+++ b/langchain/chains/sql_database/base.py
@ -30,6 +30,8 @@ class SQLDatabaseChain(Chain, BaseModel):
    """SQL Database to connect to."""
    prompt: BasePromptTemplate = PROMPT
    """Prompt to use to translate natural language to SQL."""
+    top_k: int = 5
+    """Number of results to return from the query"""
    input_key: str = "query"  #: :meta private:
    output_key: str = "result"  #: :meta private:

@ -65,10 +67,12 @@ class SQLDatabaseChain(Chain, BaseModel):
        table_info = self.database.get_table_info(table_names=table_names_to_use)
        llm_inputs = {
            "input": input_text,
+            "top_k": self.top_k,
            "dialect": self.database.dialect,
            "table_info": table_info,
            "stop": ["\nSQLResult:"],
        }
+
        sql_cmd = llm_chain.predict(**llm_inputs)
        if self.verbose:
            self.callback_manager.on_text(sql_cmd, color="green")
--- a/langchain/chains/sql_database/prompt.py
+++ b/langchain/chains/sql_database/prompt.py
@ -2,7 +2,7 @@
 from langchain.prompts.base import CommaSeparatedListOutputParser
 from langchain.prompts.prompt import PromptTemplate

-_DEFAULT_TEMPLATE = """Given an input question, first create a syntactically correct {dialect} query to run, then look at the results of the query and return the answer.
+_DEFAULT_TEMPLATE = """Given an input question, first create a syntactically correct {dialect} query to run, then look at the results of the query and return the answer. Unless the user specifies in his question a specific number of examples he wishes to obtain, always limit your query to at most {top_k} results using the LIMIT clause. You can order the results by a relevant column to return the most interesting examples in the database.
 Use the following format:

 Question: "Question here"
@ -15,8 +15,10 @@ Only use the following tables:
 {table_info}

 Question: {input}"""
+
 PROMPT = PromptTemplate(
-    input_variables=["input", "table_info", "dialect"], template=_DEFAULT_TEMPLATE
+    input_variables=["input", "table_info", "dialect", "top_k"],
+    template=_DEFAULT_TEMPLATE,
 )

 _DECIDER_TEMPLATE = """Given the below input question and list of potential tables, output a comma separated list of the table names that may be neccessary to answer this question.