mirror of
https://github.com/hwchase17/langchain
synced 2024-11-06 03:20:49 +00:00
0a1b1806e9
Seeing a lot of issues in Discord in which the LLM is not using the correct LIMIT clause for different SQL dialects. ie, it's using `LIMIT` for mssql instead of `TOP`, or instead of `ROWNUM` for Oracle, etc. I think this could be due to us specifying the LIMIT statement in the example rows portion of `table_info`. So the LLM is seeing the `LIMIT` statement used in the prompt. Since we can't specify each dialect's method here, I think it's fine to just replace the `SELECT... LIMIT 3;` statement with `3 rows from table_name table:`, and wrap everything in a block comment directly following the `CREATE` statement. The Rajkumar et al paper wrapped the example rows and `SELECT` statement in a block comment as well anyway. Thoughts @fpingham?
78 lines
2.0 KiB
Python
78 lines
2.0 KiB
Python
# flake8: noqa
|
|
"""Test SQL database wrapper with schema support.
|
|
|
|
Using DuckDB as SQLite does not support schemas.
|
|
"""
|
|
|
|
from sqlalchemy import (
|
|
Column,
|
|
Integer,
|
|
MetaData,
|
|
Sequence,
|
|
String,
|
|
Table,
|
|
create_engine,
|
|
event,
|
|
insert,
|
|
schema,
|
|
)
|
|
|
|
from langchain.sql_database import SQLDatabase
|
|
|
|
metadata_obj = MetaData()
|
|
|
|
event.listen(metadata_obj, "before_create", schema.CreateSchema("schema_a"))
|
|
event.listen(metadata_obj, "before_create", schema.CreateSchema("schema_b"))
|
|
|
|
user = Table(
|
|
"user",
|
|
metadata_obj,
|
|
Column("user_id", Integer, Sequence("user_id_seq"), primary_key=True),
|
|
Column("user_name", String, nullable=False),
|
|
schema="schema_a",
|
|
)
|
|
|
|
company = Table(
|
|
"company",
|
|
metadata_obj,
|
|
Column("company_id", Integer, Sequence("company_id_seq"), primary_key=True),
|
|
Column("company_location", String, nullable=False),
|
|
schema="schema_b",
|
|
)
|
|
|
|
|
|
def test_table_info() -> None:
|
|
"""Test that table info is constructed properly."""
|
|
engine = create_engine("duckdb:///:memory:")
|
|
metadata_obj.create_all(engine)
|
|
|
|
db = SQLDatabase(engine, schema="schema_a", metadata=metadata_obj)
|
|
output = db.table_info
|
|
expected_output = """
|
|
CREATE TABLE schema_a."user" (
|
|
user_id INTEGER NOT NULL,
|
|
user_name VARCHAR NOT NULL,
|
|
PRIMARY KEY (user_id)
|
|
)
|
|
/*
|
|
3 rows from user table:
|
|
user_id user_name
|
|
*/
|
|
"""
|
|
|
|
assert sorted(" ".join(output.split())) == sorted(" ".join(expected_output.split()))
|
|
|
|
|
|
def test_sql_database_run() -> None:
|
|
"""Test that commands can be run successfully and returned in correct format."""
|
|
engine = create_engine("duckdb:///:memory:")
|
|
metadata_obj.create_all(engine)
|
|
stmt = insert(user).values(user_id=13, user_name="Harrison")
|
|
with engine.begin() as conn:
|
|
conn.execute(stmt)
|
|
db = SQLDatabase(engine, schema="schema_a")
|
|
command = 'select user_name from "user" where user_id = 13'
|
|
output = db.run(command)
|
|
expected_output = "[('Harrison',)]"
|
|
assert output == expected_output
|