forked from Archives/langchain
Add example for question answering over documents with OpenAI Function Agent (#6448)
This PR adds an example of doing question answering over documents using OpenAI Function Agents. #### Who can review? @hwchase17 --------- Co-authored-by: Harrison Chase <hw.chase.17@gmail.com>
This commit is contained in:
parent
68a675cc68
commit
d4e8e0f5ab
@ -0,0 +1,183 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ec1d7a9a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Document Comparison\n",
|
||||
"\n",
|
||||
"This notebook shows how to use an agent to compare two documents.\n",
|
||||
"\n",
|
||||
"The high level idea is we will create a question-answering chain for each document, and then use that "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "8632a37c",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/Users/harrisonchase/.pyenv/versions/3.9.1/envs/langchain/lib/python3.9/site-packages/deeplake/util/check_latest_version.py:32: UserWarning: A newer version of deeplake (3.6.4) is available. It's recommended that you update to the latest version using `pip install -U deeplake`.\n",
|
||||
" warnings.warn(\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from pydantic import BaseModel, Field\n",
|
||||
"\n",
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
"from langchain.agents import Tool\n",
|
||||
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain.vectorstores import FAISS\n",
|
||||
"from langchain.document_loaders import PyPDFLoader\n",
|
||||
"from langchain.chains import RetrievalQA"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "64f19917",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"class DocumentInput(BaseModel):\n",
|
||||
" question: str = Field()\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"llm = ChatOpenAI(temperature=0, model=\"gpt-3.5-turbo-0613\")\n",
|
||||
"\n",
|
||||
"tools = []\n",
|
||||
"files = [\n",
|
||||
" # https://abc.xyz/investor/static/pdf/2023Q1_alphabet_earnings_release.pdf\n",
|
||||
" {\n",
|
||||
" \"name\": \"alphabet-earnings\", \n",
|
||||
" \"path\": \"/Users/harrisonchase/Downloads/2023Q1_alphabet_earnings_release.pdf\",\n",
|
||||
" }, \n",
|
||||
" # https://digitalassets.tesla.com/tesla-contents/image/upload/IR/TSLA-Q1-2023-Update\n",
|
||||
" {\n",
|
||||
" \"name\": \"tesla-earnings\", \n",
|
||||
" \"path\": \"/Users/harrisonchase/Downloads/TSLA-Q1-2023-Update.pdf\"\n",
|
||||
" }\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"for file in files:\n",
|
||||
" loader = PyPDFLoader(file[\"path\"])\n",
|
||||
" pages = loader.load_and_split()\n",
|
||||
" text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
||||
" docs = text_splitter.split_documents(pages)\n",
|
||||
" embeddings = OpenAIEmbeddings()\n",
|
||||
" retriever = FAISS.from_documents(docs, embeddings).as_retriever()\n",
|
||||
" \n",
|
||||
" # Wrap retrievers in a Tool\n",
|
||||
" tools.append(\n",
|
||||
" Tool(\n",
|
||||
" args_schema=DocumentInput,\n",
|
||||
" name=file[\"name\"], \n",
|
||||
" description=f\"useful when you want to answer questions about {file['name']}\",\n",
|
||||
" func=RetrievalQA.from_chain_type(llm=llm, retriever=retriever)\n",
|
||||
" )\n",
|
||||
" )"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "eca02549",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.agents import initialize_agent\n",
|
||||
"from langchain.agents import AgentType"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "c4d56c25",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m\n",
|
||||
"Invoking: `alphabet-earnings` with `{'question': 'revenue'}`\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[0m\u001b[36;1m\u001b[1;3m{'query': 'revenue', 'result': 'The revenue for Alphabet Inc. in the first quarter of 2023 was $69,787 million.'}\u001b[0m\u001b[32;1m\u001b[1;3m\n",
|
||||
"Invoking: `tesla-earnings` with `{'question': 'revenue'}`\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[0m\u001b[33;1m\u001b[1;3m{'query': 'revenue', 'result': 'Total revenue for Q1-2023 was $23.3 billion.'}\u001b[0m\u001b[32;1m\u001b[1;3mAlphabet Inc. had more revenue than Tesla. In the first quarter of 2023, Alphabet Inc. had a revenue of $69,787 million, while Tesla had a revenue of $23.3 billion.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'input': 'did alphabet or tesla have more revenue?',\n",
|
||||
" 'output': 'Alphabet Inc. had more revenue than Tesla. In the first quarter of 2023, Alphabet Inc. had a revenue of $69,787 million, while Tesla had a revenue of $23.3 billion.'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"llm = ChatOpenAI(\n",
|
||||
" temperature=0,\n",
|
||||
" model=\"gpt-3.5-turbo-0613\", \n",
|
||||
")\n",
|
||||
"\n",
|
||||
"agent = initialize_agent(\n",
|
||||
" agent=AgentType.OPENAI_FUNCTIONS,\n",
|
||||
" tools=tools,\n",
|
||||
" llm=llm,\n",
|
||||
" verbose=True,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"agent({\"input\": \"did alphabet or tesla have more revenue?\"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "6db4c853",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
Loading…
Reference in New Issue
Block a user