diff --git a/docs/modules/chains/examples/sqlite.ipynb b/docs/modules/chains/examples/sqlite.ipynb index b4b621ae..257e1e4e 100644 --- a/docs/modules/chains/examples/sqlite.ipynb +++ b/docs/modules/chains/examples/sqlite.ipynb @@ -434,6 +434,131 @@ "db_chain.run(\"What are some example tracks by Bach?\")" ] }, + { + "cell_type": "markdown", + "id": "ef94e948", + "metadata": {}, + "source": [ + "### Custom Table Info\n", + "In some cases, it can be useful to provide custom table information instead of using the automatically generated table definitions and the first `sample_rows_in_table_info` sample rows. For example, if you know that the first few rows of a table are uninformative, it could help to manually provide example rows that are more diverse or provide more information to the model. It is also possible to limit the columns that will be visible to the model if there are unnecessary columns. \n", + "\n", + "This information can be provided as a dictionary with table names as the keys and table information as the values. For example, let's provide a custom definition and sample rows for the Track table with only a few columns:" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "2ad33ab1", + "metadata": {}, + "outputs": [], + "source": [ + "custom_table_info = {\n", + " \"Track\": \"\"\"CREATE TABLE Track (\n", + "\t\"TrackId\" INTEGER NOT NULL, \n", + "\t\"Name\" NVARCHAR(200) NOT NULL,\n", + "\t\"Composer\" NVARCHAR(220),\n", + "\tPRIMARY KEY (\"TrackId\")\n", + ")\n", + "\n", + "SELECT * FROM 'Track' LIMIT 3;\n", + "TrackId Name Composer\n", + "1 For Those About To Rock (We Salute You) Angus Young, Malcolm Young, Brian Johnson\n", + "2 Balls to the Wall None\n", + "3 My favorite song ever The coolest composer of all time\"\"\"\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "db144352", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "CREATE TABLE \"Playlist\" (\n", + "\t\"PlaylistId\" INTEGER NOT NULL, \n", + "\t\"Name\" NVARCHAR(120), \n", + "\tPRIMARY KEY (\"PlaylistId\")\n", + ")\n", + "\n", + "SELECT * FROM 'Playlist' LIMIT 2;\n", + "PlaylistId Name\n", + "1 Music\n", + "2 Movies\n", + "\n", + "CREATE TABLE Track (\n", + "\t\"TrackId\" INTEGER NOT NULL, \n", + "\t\"Name\" NVARCHAR(200) NOT NULL,\n", + "\t\"Composer\" NVARCHAR(220),\n", + "\tPRIMARY KEY (\"TrackId\")\n", + ")\n", + "\n", + "SELECT * FROM 'Track' LIMIT 3;\n", + "TrackId Name Composer\n", + "1 For Those About To Rock (We Salute You) Angus Young, Malcolm Young, Brian Johnson\n", + "2 Balls to the Wall None\n", + "3 My favorite song ever The coolest composer of all time\n" + ] + } + ], + "source": [ + "db = SQLDatabase.from_uri(\n", + " \"sqlite:///../../../../notebooks/Chinook.db\",\n", + " include_tables=['Track', 'Playlist'],\n", + " sample_rows_in_table_info=2,\n", + " custom_table_info=custom_table_info)\n", + "\n", + "print(db.table_info)" + ] + }, + { + "cell_type": "markdown", + "id": "5fc6f507", + "metadata": {}, + "source": [ + "Note how our custom table definition and sample rows for `Track` overrides the `sample_rows_in_table_info` parameter. Tables that are not overriden by `custom_table_info`, in this example `Playlist`, will have their table info gathered automatically as usual." + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "dfbda4e6", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\u001b[1m> Entering new SQLDatabaseChain chain...\u001b[0m\n", + "What are some example tracks by Bach? \n", + "SQLQuery:\u001b[32;1m\u001b[1;3m SELECT Name, Composer FROM Track WHERE Composer LIKE '%Bach%' LIMIT 5;\u001b[0m\n", + "SQLResult: \u001b[33;1m\u001b[1;3m[('American Woman', 'B. Cummings/G. Peterson/M.J. Kale/R. Bachman'), ('Concerto for 2 Violins in D Minor, BWV 1043: I. Vivace', 'Johann Sebastian Bach'), ('Aria Mit 30 Veränderungen, BWV 988 \"Goldberg Variations\": Aria', 'Johann Sebastian Bach'), ('Suite for Solo Cello No. 1 in G Major, BWV 1007: I. Prélude', 'Johann Sebastian Bach'), ('Toccata and Fugue in D Minor, BWV 565: I. Toccata', 'Johann Sebastian Bach')]\u001b[0m\n", + "Answer:\u001b[32;1m\u001b[1;3m Some example tracks by Bach are 'American Woman', 'Concerto for 2 Violins in D Minor, BWV 1043: I. Vivace', 'Aria Mit 30 Veränderungen, BWV 988 \"Goldberg Variations\": Aria', 'Suite for Solo Cello No. 1 in G Major, BWV 1007: I. Prélude', and 'Toccata and Fugue in D Minor, BWV 565: I. Toccata'.\u001b[0m\n", + "\u001b[1m> Finished chain.\u001b[0m\n" + ] + }, + { + "data": { + "text/plain": [ + "' Some example tracks by Bach are \\'American Woman\\', \\'Concerto for 2 Violins in D Minor, BWV 1043: I. Vivace\\', \\'Aria Mit 30 Veränderungen, BWV 988 \"Goldberg Variations\": Aria\\', \\'Suite for Solo Cello No. 1 in G Major, BWV 1007: I. Prélude\\', and \\'Toccata and Fugue in D Minor, BWV 565: I. Toccata\\'.'" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "db_chain = SQLDatabaseChain(llm=llm, database=db, verbose=True)\n", + "db_chain.run(\"What are some example tracks by Bach?\")" + ] + }, { "cell_type": "markdown", "id": "c12ae15a", @@ -542,7 +667,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.1" + "version": "3.10.9" } }, "nbformat": 4, diff --git a/langchain/sql_database.py b/langchain/sql_database.py index 3cb96dea..c7041f6c 100644 --- a/langchain/sql_database.py +++ b/langchain/sql_database.py @@ -20,6 +20,7 @@ class SQLDatabase: ignore_tables: Optional[List[str]] = None, include_tables: Optional[List[str]] = None, sample_rows_in_table_info: int = 3, + custom_table_info: Optional[dict] = None, ): """Create engine from database URI.""" self._engine = engine @@ -49,6 +50,21 @@ class SQLDatabase: self._sample_rows_in_table_info = sample_rows_in_table_info + self._custom_table_info = custom_table_info + if self._custom_table_info: + if not isinstance(self._custom_table_info, dict): + raise TypeError( + "table_info must be a dictionary with table names as keys and the " + "desired table info as values" + ) + # only keep the tables that are also present in the database + intersection = set(self._custom_table_info).intersection(self._all_tables) + self._custom_table_info = dict( + (table, self._custom_table_info[table]) + for table in self._custom_table_info + if table in intersection + ) + self._metadata = metadata or MetaData() self._metadata.reflect(bind=self._engine) @@ -99,6 +115,10 @@ class SQLDatabase: tables = [] for table in meta_tables: + if self._custom_table_info and table.name in self._custom_table_info: + tables.append(self._custom_table_info[table.name]) + continue + # add create table command create_table = str(CreateTable(table).compile(self._engine))