WIP: sql research assistant (#14240)

pull/14869/head
Harrison Chase 5 months ago committed by GitHub
parent 5f839beab9
commit d2cce54bf1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -176,7 +176,7 @@
"\n",
"\n",
"async def asplit_into_list(\n",
" input: AsyncIterator[str]\n",
" input: AsyncIterator[str],\n",
") -> AsyncIterator[List[str]]: # async def\n",
" buffer = \"\"\n",
" async for (\n",

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2023 LangChain, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

@ -0,0 +1,62 @@
# sql-research-assistant
This package does research over a SQL database
## Usage
To use this package, you should first have the LangChain CLI installed:
```shell
pip install -U langchain-cli
```
To create a new LangChain project and install this as the only package, you can do:
```shell
langchain app new my-app --package sql-research-assistant
```
If you want to add this to an existing project, you can just run:
```shell
langchain app add sql-research-assistant
```
And add the following code to your `server.py` file:
```python
from sql_research_assistant import chain as sql_research_assistant_chain
add_routes(app, sql_research_assistant_chain, path="/sql-research-assistant")
```
(Optional) Let's now configure LangSmith.
LangSmith will help us trace, monitor and debug LangChain applications.
LangSmith is currently in private beta, you can sign up [here](https://smith.langchain.com/).
If you don't have access, you can skip this section
```shell
export LANGCHAIN_TRACING_V2=true
export LANGCHAIN_API_KEY=<your-api-key>
export LANGCHAIN_PROJECT=<your-project> # if not specified, defaults to "default"
```
If you are inside this directory, then you can spin up a LangServe instance directly by:
```shell
langchain serve
```
This will start the FastAPI app with a server is running locally at
[http://localhost:8000](http://localhost:8000)
We can see all templates at [http://127.0.0.1:8000/docs](http://127.0.0.1:8000/docs)
We can access the playground at [http://127.0.0.1:8000/sql-research-assistant/playground](http://127.0.0.1:8000/sql-research-assistant/playground)
We can access the template from code with:
```python
from langserve.client import RemoteRunnable
runnable = RemoteRunnable("http://localhost:8000/sql-research-assistant")
```

@ -0,0 +1,24 @@
[tool.poetry]
name = "sql-research-assistant"
version = "0.0.1"
description = ""
authors = []
readme = "README.md"
[tool.poetry.dependencies]
python = ">=3.8.1,<4.0"
langchain = ">=0.0.313, <0.1"
openai = "^0.28.1"
[tool.poetry.group.dev.dependencies]
langchain-cli = ">=0.0.4"
fastapi = "^0.104.0"
sse-starlette = "^1.6.5"
[tool.langserve]
export_module = "sql_research_assistant"
export_attr = "chain"
[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"

@ -0,0 +1,3 @@
from sql_research_assistant.chain import chain
__all__ = ["chain"]

@ -0,0 +1,22 @@
from langchain.pydantic_v1 import BaseModel
from langchain.schema.runnable import RunnablePassthrough
from sql_research_assistant.search.web import chain as search_chain
from sql_research_assistant.writer import chain as writer_chain
chain_notypes = (
RunnablePassthrough().assign(research_summary=search_chain) | writer_chain
)
class InputType(BaseModel):
question: str
chain = chain_notypes.with_types(input_type=InputType)
if __name__ == "__main__":
print(
chain.invoke({"question": "who is typically older: point guards or centers?"})
)

@ -0,0 +1,93 @@
from pathlib import Path
from langchain.chat_models import ChatOllama, ChatOpenAI
from langchain.memory import ConversationBufferMemory
from langchain.prompts import ChatPromptTemplate
from langchain.pydantic_v1 import BaseModel
from langchain.schema.output_parser import StrOutputParser
from langchain.schema.runnable import RunnablePassthrough
from langchain.utilities import SQLDatabase
# Add the LLM downloaded from Ollama
ollama_llm = "llama2"
llm = ChatOllama(model=ollama_llm)
db_path = Path(__file__).parent / "nba_roster.db"
rel = db_path.relative_to(Path.cwd())
db_string = f"sqlite:///{rel}"
db = SQLDatabase.from_uri(db_string, sample_rows_in_table_info=2)
def get_schema(_):
return db.get_table_info()
def run_query(query):
return db.run(query)
# Prompt
template = """Based on the table schema below, write a SQL query that would answer the user's question:
{schema}
Question: {question}
SQL Query:""" # noqa: E501
prompt = ChatPromptTemplate.from_messages(
[
("system", "Given an input question, convert it to a SQL query. No pre-amble."),
("human", template),
]
)
memory = ConversationBufferMemory(return_messages=True)
# Chain to query with memory
sql_chain = (
RunnablePassthrough.assign(
schema=get_schema,
)
| prompt
| llm.bind(stop=["\nSQLResult:"])
| StrOutputParser()
| (lambda x: x.split("\n\n")[0])
)
# Chain to answer
template = """Based on the table schema below, question, sql query, and sql response, write a natural language response:
{schema}
Question: {question}
SQL Query: {query}
SQL Response: {response}""" # noqa: E501
prompt_response = ChatPromptTemplate.from_messages(
[
(
"system",
"Given an input question and SQL response, convert it to a natural "
"language answer. No pre-amble.",
),
("human", template),
]
)
# Supply the input types to the prompt
class InputType(BaseModel):
question: str
sql_answer_chain = (
RunnablePassthrough.assign(query=sql_chain).with_types(input_type=InputType)
| RunnablePassthrough.assign(
schema=get_schema,
response=lambda x: db.run(x["query"]),
)
| RunnablePassthrough.assign(
answer=prompt_response | ChatOpenAI() | StrOutputParser()
)
| (lambda x: f"Question: {x['question']}\n\nAnswer: {x['answer']}")
)

@ -0,0 +1,151 @@
import json
from typing import Any
import requests
from bs4 import BeautifulSoup
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.schema.messages import SystemMessage
from langchain.schema.output_parser import StrOutputParser
from langchain.schema.runnable import (
Runnable,
RunnableLambda,
RunnableParallel,
RunnablePassthrough,
)
from langchain.utilities import DuckDuckGoSearchAPIWrapper
from sql_research_assistant.search.sql import sql_answer_chain
RESULTS_PER_QUESTION = 3
ddg_search = DuckDuckGoSearchAPIWrapper()
def scrape_text(url: str):
# Send a GET request to the webpage
try:
response = requests.get(url)
# Check if the request was successful
if response.status_code == 200:
# Parse the content of the request with BeautifulSoup
soup = BeautifulSoup(response.text, "html.parser")
# Extract all text from the webpage
page_text = soup.get_text(separator=" ", strip=True)
# Print the extracted text
return page_text
else:
return f"Failed to retrieve the webpage: Status code {response.status_code}"
except Exception as e:
print(e)
return f"Failed to retrieve the webpage: {e}"
def web_search(query: str, num_results: int):
results = ddg_search.results(query, num_results)
return [r["link"] for r in results]
SEARCH_PROMPT = ChatPromptTemplate.from_messages(
[
("system", "{agent_prompt}"),
(
"user",
"Write 3 google search queries to search online that form an "
"objective opinion from the following: {question}\n"
"You must respond with a list of strings in the following format: "
'["query 1", "query 2", "query 3"].',
),
]
)
AUTO_AGENT_INSTRUCTIONS = """
This task involves researching a given topic, regardless of its complexity or the availability of a definitive answer. The research is conducted by a specific agent, defined by its type and role, with each agent requiring distinct instructions.
Agent
The agent is determined by the field of the topic and the specific name of the agent that could be utilized to research the topic provided. Agents are categorized by their area of expertise, and each agent type is associated with a corresponding emoji.
examples:
task: "should I invest in apple stocks?"
response:
{
"agent": "💰 Finance Agent",
"agent_role_prompt: "You are a seasoned finance analyst AI assistant. Your primary goal is to compose comprehensive, astute, impartial, and methodically arranged financial reports based on provided data and trends."
}
task: "could reselling sneakers become profitable?"
response:
{
"agent": "📈 Business Analyst Agent",
"agent_role_prompt": "You are an experienced AI business analyst assistant. Your main objective is to produce comprehensive, insightful, impartial, and systematically structured business reports based on provided business data, market trends, and strategic analysis."
}
task: "what are the most interesting sites in Tel Aviv?"
response:
{
"agent: "🌍 Travel Agent",
"agent_role_prompt": "You are a world-travelled AI tour guide assistant. Your main purpose is to draft engaging, insightful, unbiased, and well-structured travel reports on given locations, including history, attractions, and cultural insights."
}
""" # noqa: E501
CHOOSE_AGENT_PROMPT = ChatPromptTemplate.from_messages(
[SystemMessage(content=AUTO_AGENT_INSTRUCTIONS), ("user", "task: {task}")]
)
SUMMARY_TEMPLATE = """{text}
-----------
Using the above text, answer in short the following question:
> {question}
-----------
if the question cannot be answered using the text, imply summarize the text. Include all factual information, numbers, stats etc if available.""" # noqa: E501
SUMMARY_PROMPT = ChatPromptTemplate.from_template(SUMMARY_TEMPLATE)
scrape_and_summarize: Runnable[Any, Any] = (
RunnableParallel(
{
"question": lambda x: x["question"],
"text": lambda x: scrape_text(x["url"])[:10000],
"url": lambda x: x["url"],
}
)
| RunnableParallel(
{
"summary": SUMMARY_PROMPT | ChatOpenAI(temperature=0) | StrOutputParser(),
"url": lambda x: x["url"],
}
)
| RunnableLambda(lambda x: f"Source Url: {x['url']}\nSummary: {x['summary']}")
)
def load_json(s):
try:
return json.loads(s)
except Exception:
return {}
search_query = SEARCH_PROMPT | ChatOpenAI(temperature=0) | StrOutputParser() | load_json
choose_agent = (
CHOOSE_AGENT_PROMPT | ChatOpenAI(temperature=0) | StrOutputParser() | load_json
)
get_search_queries = (
RunnablePassthrough().assign(
agent_prompt=RunnableParallel({"task": lambda x: x})
| choose_agent
| (lambda x: x.get("agent_role_prompt"))
)
| search_query
)
chain = (
get_search_queries
| (lambda x: [{"question": q} for q in x])
| sql_answer_chain.map()
| (lambda x: "\n\n".join(x))
)

@ -0,0 +1,75 @@
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.schema.output_parser import StrOutputParser
from langchain.schema.runnable import ConfigurableField
WRITER_SYSTEM_PROMPT = "You are an AI critical thinker research assistant. Your sole purpose is to write well written, critically acclaimed, objective and structured reports on given text." # noqa: E501
# Report prompts from https://github.com/assafelovic/gpt-researcher/blob/master/gpt_researcher/master/prompts.py
RESEARCH_REPORT_TEMPLATE = """Information:
--------
{research_summary}
--------
Using the above information, answer the following question or topic: "{question}" in a detailed report -- \
The report should focus on the answer to the question, should be well structured, informative, \
in depth, with facts and numbers if available and a minimum of 1,200 words.
You should strive to write the report as long as you can using all relevant and necessary information provided.
You must write the report with markdown syntax.
You MUST determine your own concrete and valid opinion based on the given information. Do NOT deter to general and meaningless conclusions.
Write all used source urls at the end of the report, and make sure to not add duplicated sources, but only one reference for each.
You must write the report in apa format.
Please do your best, this is very important to my career.""" # noqa: E501
RESOURCE_REPORT_TEMPLATE = """Information:
--------
{research_summary}
--------
Based on the above information, generate a bibliography recommendation report for the following question or topic: "{question}". \
The report should provide a detailed analysis of each recommended resource, explaining how each source can contribute to finding answers to the research question. \
Focus on the relevance, reliability, and significance of each source. \
Ensure that the report is well-structured, informative, in-depth, and follows Markdown syntax. \
Include relevant facts, figures, and numbers whenever available. \
The report should have a minimum length of 1,200 words.
Please do your best, this is very important to my career.""" # noqa: E501
OUTLINE_REPORT_TEMPLATE = """Information:
--------
{research_summary}
--------
Using the above information, generate an outline for a research report in Markdown syntax for the following question or topic: "{question}". \
The outline should provide a well-structured framework for the research report, including the main sections, subsections, and key points to be covered. \
The research report should be detailed, informative, in-depth, and a minimum of 1,200 words. \
Use appropriate Markdown syntax to format the outline and ensure readability.
Please do your best, this is very important to my career.""" # noqa: E501
model = ChatOpenAI(temperature=0)
prompt = ChatPromptTemplate.from_messages(
[
("system", WRITER_SYSTEM_PROMPT),
("user", RESEARCH_REPORT_TEMPLATE),
]
).configurable_alternatives(
ConfigurableField("report_type"),
default_key="research_report",
resource_report=ChatPromptTemplate.from_messages(
[
("system", WRITER_SYSTEM_PROMPT),
("user", RESOURCE_REPORT_TEMPLATE),
]
),
outline_report=ChatPromptTemplate.from_messages(
[
("system", WRITER_SYSTEM_PROMPT),
("user", OUTLINE_REPORT_TEMPLATE),
]
),
)
chain = prompt | model | StrOutputParser()
Loading…
Cancel
Save