forked from Archives/langchain
Add example for question answering over documents with OpenAI Function Agent (#6448)
This PR adds an example of doing question answering over documents using OpenAI Function Agents. #### Who can review? @hwchase17 --------- Co-authored-by: Harrison Chase <hw.chase.17@gmail.com>
This commit is contained in:
parent
68a675cc68
commit
d4e8e0f5ab
@ -0,0 +1,183 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "ec1d7a9a",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Document Comparison\n",
|
||||||
|
"\n",
|
||||||
|
"This notebook shows how to use an agent to compare two documents.\n",
|
||||||
|
"\n",
|
||||||
|
"The high level idea is we will create a question-answering chain for each document, and then use that "
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 1,
|
||||||
|
"id": "8632a37c",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stderr",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"/Users/harrisonchase/.pyenv/versions/3.9.1/envs/langchain/lib/python3.9/site-packages/deeplake/util/check_latest_version.py:32: UserWarning: A newer version of deeplake (3.6.4) is available. It's recommended that you update to the latest version using `pip install -U deeplake`.\n",
|
||||||
|
" warnings.warn(\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"from pydantic import BaseModel, Field\n",
|
||||||
|
"\n",
|
||||||
|
"from langchain.chat_models import ChatOpenAI\n",
|
||||||
|
"from langchain.agents import Tool\n",
|
||||||
|
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
|
||||||
|
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||||
|
"from langchain.vectorstores import FAISS\n",
|
||||||
|
"from langchain.document_loaders import PyPDFLoader\n",
|
||||||
|
"from langchain.chains import RetrievalQA"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 2,
|
||||||
|
"id": "64f19917",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"class DocumentInput(BaseModel):\n",
|
||||||
|
" question: str = Field()\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"llm = ChatOpenAI(temperature=0, model=\"gpt-3.5-turbo-0613\")\n",
|
||||||
|
"\n",
|
||||||
|
"tools = []\n",
|
||||||
|
"files = [\n",
|
||||||
|
" # https://abc.xyz/investor/static/pdf/2023Q1_alphabet_earnings_release.pdf\n",
|
||||||
|
" {\n",
|
||||||
|
" \"name\": \"alphabet-earnings\", \n",
|
||||||
|
" \"path\": \"/Users/harrisonchase/Downloads/2023Q1_alphabet_earnings_release.pdf\",\n",
|
||||||
|
" }, \n",
|
||||||
|
" # https://digitalassets.tesla.com/tesla-contents/image/upload/IR/TSLA-Q1-2023-Update\n",
|
||||||
|
" {\n",
|
||||||
|
" \"name\": \"tesla-earnings\", \n",
|
||||||
|
" \"path\": \"/Users/harrisonchase/Downloads/TSLA-Q1-2023-Update.pdf\"\n",
|
||||||
|
" }\n",
|
||||||
|
"]\n",
|
||||||
|
"\n",
|
||||||
|
"for file in files:\n",
|
||||||
|
" loader = PyPDFLoader(file[\"path\"])\n",
|
||||||
|
" pages = loader.load_and_split()\n",
|
||||||
|
" text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
||||||
|
" docs = text_splitter.split_documents(pages)\n",
|
||||||
|
" embeddings = OpenAIEmbeddings()\n",
|
||||||
|
" retriever = FAISS.from_documents(docs, embeddings).as_retriever()\n",
|
||||||
|
" \n",
|
||||||
|
" # Wrap retrievers in a Tool\n",
|
||||||
|
" tools.append(\n",
|
||||||
|
" Tool(\n",
|
||||||
|
" args_schema=DocumentInput,\n",
|
||||||
|
" name=file[\"name\"], \n",
|
||||||
|
" description=f\"useful when you want to answer questions about {file['name']}\",\n",
|
||||||
|
" func=RetrievalQA.from_chain_type(llm=llm, retriever=retriever)\n",
|
||||||
|
" )\n",
|
||||||
|
" )"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 3,
|
||||||
|
"id": "eca02549",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from langchain.agents import initialize_agent\n",
|
||||||
|
"from langchain.agents import AgentType"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 4,
|
||||||
|
"id": "c4d56c25",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"\u001b[1m> Entering new chain...\u001b[0m\n",
|
||||||
|
"\u001b[32;1m\u001b[1;3m\n",
|
||||||
|
"Invoking: `alphabet-earnings` with `{'question': 'revenue'}`\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"\u001b[0m\u001b[36;1m\u001b[1;3m{'query': 'revenue', 'result': 'The revenue for Alphabet Inc. in the first quarter of 2023 was $69,787 million.'}\u001b[0m\u001b[32;1m\u001b[1;3m\n",
|
||||||
|
"Invoking: `tesla-earnings` with `{'question': 'revenue'}`\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"\u001b[0m\u001b[33;1m\u001b[1;3m{'query': 'revenue', 'result': 'Total revenue for Q1-2023 was $23.3 billion.'}\u001b[0m\u001b[32;1m\u001b[1;3mAlphabet Inc. had more revenue than Tesla. In the first quarter of 2023, Alphabet Inc. had a revenue of $69,787 million, while Tesla had a revenue of $23.3 billion.\u001b[0m\n",
|
||||||
|
"\n",
|
||||||
|
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"{'input': 'did alphabet or tesla have more revenue?',\n",
|
||||||
|
" 'output': 'Alphabet Inc. had more revenue than Tesla. In the first quarter of 2023, Alphabet Inc. had a revenue of $69,787 million, while Tesla had a revenue of $23.3 billion.'}"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 4,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"llm = ChatOpenAI(\n",
|
||||||
|
" temperature=0,\n",
|
||||||
|
" model=\"gpt-3.5-turbo-0613\", \n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"agent = initialize_agent(\n",
|
||||||
|
" agent=AgentType.OPENAI_FUNCTIONS,\n",
|
||||||
|
" tools=tools,\n",
|
||||||
|
" llm=llm,\n",
|
||||||
|
" verbose=True,\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"agent({\"input\": \"did alphabet or tesla have more revenue?\"})"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "6db4c853",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3 (ipykernel)",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.9.1"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 5
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user