diff --git a/docs/extras/modules/agents/toolkits/document_comparison_toolkit.ipynb b/docs/extras/modules/agents/toolkits/document_comparison_toolkit.ipynb new file mode 100644 index 00000000..26aa8325 --- /dev/null +++ b/docs/extras/modules/agents/toolkits/document_comparison_toolkit.ipynb @@ -0,0 +1,183 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "ec1d7a9a", + "metadata": {}, + "source": [ + "# Document Comparison\n", + "\n", + "This notebook shows how to use an agent to compare two documents.\n", + "\n", + "The high level idea is we will create a question-answering chain for each document, and then use that " + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "8632a37c", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/harrisonchase/.pyenv/versions/3.9.1/envs/langchain/lib/python3.9/site-packages/deeplake/util/check_latest_version.py:32: UserWarning: A newer version of deeplake (3.6.4) is available. It's recommended that you update to the latest version using `pip install -U deeplake`.\n", + " warnings.warn(\n" + ] + } + ], + "source": [ + "from pydantic import BaseModel, Field\n", + "\n", + "from langchain.chat_models import ChatOpenAI\n", + "from langchain.agents import Tool\n", + "from langchain.embeddings.openai import OpenAIEmbeddings\n", + "from langchain.text_splitter import CharacterTextSplitter\n", + "from langchain.vectorstores import FAISS\n", + "from langchain.document_loaders import PyPDFLoader\n", + "from langchain.chains import RetrievalQA" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "64f19917", + "metadata": {}, + "outputs": [], + "source": [ + "class DocumentInput(BaseModel):\n", + " question: str = Field()\n", + "\n", + "\n", + "llm = ChatOpenAI(temperature=0, model=\"gpt-3.5-turbo-0613\")\n", + "\n", + "tools = []\n", + "files = [\n", + " # https://abc.xyz/investor/static/pdf/2023Q1_alphabet_earnings_release.pdf\n", + " {\n", + " \"name\": \"alphabet-earnings\", \n", + " \"path\": \"/Users/harrisonchase/Downloads/2023Q1_alphabet_earnings_release.pdf\",\n", + " }, \n", + " # https://digitalassets.tesla.com/tesla-contents/image/upload/IR/TSLA-Q1-2023-Update\n", + " {\n", + " \"name\": \"tesla-earnings\", \n", + " \"path\": \"/Users/harrisonchase/Downloads/TSLA-Q1-2023-Update.pdf\"\n", + " }\n", + "]\n", + "\n", + "for file in files:\n", + " loader = PyPDFLoader(file[\"path\"])\n", + " pages = loader.load_and_split()\n", + " text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", + " docs = text_splitter.split_documents(pages)\n", + " embeddings = OpenAIEmbeddings()\n", + " retriever = FAISS.from_documents(docs, embeddings).as_retriever()\n", + " \n", + " # Wrap retrievers in a Tool\n", + " tools.append(\n", + " Tool(\n", + " args_schema=DocumentInput,\n", + " name=file[\"name\"], \n", + " description=f\"useful when you want to answer questions about {file['name']}\",\n", + " func=RetrievalQA.from_chain_type(llm=llm, retriever=retriever)\n", + " )\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "eca02549", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.agents import initialize_agent\n", + "from langchain.agents import AgentType" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "c4d56c25", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\u001b[1m> Entering new chain...\u001b[0m\n", + "\u001b[32;1m\u001b[1;3m\n", + "Invoking: `alphabet-earnings` with `{'question': 'revenue'}`\n", + "\n", + "\n", + "\u001b[0m\u001b[36;1m\u001b[1;3m{'query': 'revenue', 'result': 'The revenue for Alphabet Inc. in the first quarter of 2023 was $69,787 million.'}\u001b[0m\u001b[32;1m\u001b[1;3m\n", + "Invoking: `tesla-earnings` with `{'question': 'revenue'}`\n", + "\n", + "\n", + "\u001b[0m\u001b[33;1m\u001b[1;3m{'query': 'revenue', 'result': 'Total revenue for Q1-2023 was $23.3 billion.'}\u001b[0m\u001b[32;1m\u001b[1;3mAlphabet Inc. had more revenue than Tesla. In the first quarter of 2023, Alphabet Inc. had a revenue of $69,787 million, while Tesla had a revenue of $23.3 billion.\u001b[0m\n", + "\n", + "\u001b[1m> Finished chain.\u001b[0m\n" + ] + }, + { + "data": { + "text/plain": [ + "{'input': 'did alphabet or tesla have more revenue?',\n", + " 'output': 'Alphabet Inc. had more revenue than Tesla. In the first quarter of 2023, Alphabet Inc. had a revenue of $69,787 million, while Tesla had a revenue of $23.3 billion.'}" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "llm = ChatOpenAI(\n", + " temperature=0,\n", + " model=\"gpt-3.5-turbo-0613\", \n", + ")\n", + "\n", + "agent = initialize_agent(\n", + " agent=AgentType.OPENAI_FUNCTIONS,\n", + " tools=tools,\n", + " llm=llm,\n", + " verbose=True,\n", + ")\n", + "\n", + "agent({\"input\": \"did alphabet or tesla have more revenue?\"})" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6db4c853", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.1" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}