langchain/cookbook/openai_functions_retrieval_...

{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "71a43144",
   "metadata": {},
   "source": [
    "# Structure answers with OpenAI functions\n",
    "\n",
    "OpenAI functions allows for structuring of response output. This is often useful in question answering when you want to not only get the final answer but also supporting evidence, citations, etc.\n",
    "\n",
    "In this notebook we show how to use an LLM chain which uses OpenAI functions as part of an overall retrieval pipeline."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "f059012e",
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain.chains import RetrievalQA\n",
    "from langchain_community.document_loaders import TextLoader\n",
    "from langchain_community.vectorstores import Chroma\n",
    "from langchain_openai import OpenAIEmbeddings\n",
    "from langchain_text_splitters import CharacterTextSplitter"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "f10b831c",
   "metadata": {},
   "outputs": [],
   "source": [
    "loader = TextLoader(\"../../state_of_the_union.txt\", encoding=\"utf-8\")\n",
    "documents = loader.load()\n",
    "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
    "texts = text_splitter.split_documents(documents)\n",
    "for i, text in enumerate(texts):\n",
    "    text.metadata[\"source\"] = f\"{i}-pl\"\n",
    "embeddings = OpenAIEmbeddings()\n",
    "docsearch = Chroma.from_documents(texts, embeddings)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "70f3a38c",
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain.chains import create_qa_with_sources_chain\n",
    "from langchain.chains.combine_documents.stuff import StuffDocumentsChain\n",
    "from langchain.prompts import PromptTemplate\n",
    "from langchain_openai import ChatOpenAI"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "7b3e1731",
   "metadata": {},
   "outputs": [],
   "source": [
    "llm = ChatOpenAI(temperature=0, model=\"gpt-3.5-turbo-0613\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "id": "70a9ccff",
   "metadata": {},
   "outputs": [],
   "source": [
    "qa_chain = create_qa_with_sources_chain(llm)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "id": "efcdb6fb",
   "metadata": {},
   "outputs": [],
   "source": [
    "doc_prompt = PromptTemplate(\n",
    "    template=\"Content: {page_content}\\nSource: {source}\",\n",
    "    input_variables=[\"page_content\", \"source\"],\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "id": "64a08263",
   "metadata": {},
   "outputs": [],
   "source": [
    "final_qa_chain = StuffDocumentsChain(\n",
    "    llm_chain=qa_chain,\n",
    "    document_variable_name=\"context\",\n",
    "    document_prompt=doc_prompt,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "cb876c97",
   "metadata": {},
   "outputs": [],
   "source": [
    "retrieval_qa = RetrievalQA(\n",
    "    retriever=docsearch.as_retriever(), combine_documents_chain=final_qa_chain\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "id": "a75bad9b",
   "metadata": {},
   "outputs": [],
   "source": [
    "query = \"What did the president say about russia\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "id": "9a60f109",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'{\\n  \"answer\": \"The President expressed strong condemnation of Russia\\'s actions in Ukraine and announced measures to isolate Russia and provide support to Ukraine. He stated that Russia\\'s invasion of Ukraine will have long-term consequences for Russia and emphasized the commitment to defend NATO countries. The President also mentioned taking robust action through sanctions and releasing oil reserves to mitigate gas prices. Overall, the President conveyed a message of solidarity with Ukraine and determination to protect American interests.\",\\n  \"sources\": [\"0-pl\", \"4-pl\", \"5-pl\", \"6-pl\"]\\n}'"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "retrieval_qa.run(query)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a60f93a4",
   "metadata": {},
   "source": [
    "## Using Pydantic\n",
    "\n",
    "If we want to, we can set the chain to return in Pydantic. Note that if downstream chains consume the output of this chain - including memory - they will generally expect it to be in string format, so you should only use this chain when it is the final chain."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "id": "3559727f",
   "metadata": {},
   "outputs": [],
   "source": [
    "qa_chain_pydantic = create_qa_with_sources_chain(llm, output_parser=\"pydantic\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "id": "5a7997d1",
   "metadata": {},
   "outputs": [],
   "source": [
    "final_qa_chain_pydantic = StuffDocumentsChain(\n",
    "    llm_chain=qa_chain_pydantic,\n",
    "    document_variable_name=\"context\",\n",
    "    document_prompt=doc_prompt,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "id": "79368e40",
   "metadata": {},
   "outputs": [],
   "source": [
    "retrieval_qa_pydantic = RetrievalQA(\n",
    "    retriever=docsearch.as_retriever(), combine_documents_chain=final_qa_chain_pydantic\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "id": "6b8641de",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "AnswerWithSources(answer=\"The President expressed strong condemnation of Russia's actions in Ukraine and announced measures to isolate Russia and provide support to Ukraine. He stated that Russia's invasion of Ukraine will have long-term consequences for Russia and emphasized the commitment to defend NATO countries. The President also mentioned taking robust action through sanctions and releasing oil reserves to mitigate gas prices. Overall, the President conveyed a message of solidarity with Ukraine and determination to protect American interests.\", sources=['0-pl', '4-pl', '5-pl', '6-pl'])"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "retrieval_qa_pydantic.run(query)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e4c15395",
   "metadata": {},
   "source": [
    "## Using in ConversationalRetrievalChain\n",
    "\n",
    "We can also show what it's like to use this in the ConversationalRetrievalChain. Note that because this chain involves memory, we will NOT use the Pydantic return type."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "id": "18e5f090",
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain.chains import ConversationalRetrievalChain, LLMChain\n",
    "from langchain.memory import ConversationBufferMemory\n",
    "\n",
    "memory = ConversationBufferMemory(memory_key=\"chat_history\", return_messages=True)\n",
    "_template = \"\"\"Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.\\\n",
    "Make sure to avoid using any unclear pronouns.\n",
    "\n",
    "Chat History:\n",
    "{chat_history}\n",
    "Follow Up Input: {question}\n",
    "Standalone question:\"\"\"\n",
    "CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)\n",
    "condense_question_chain = LLMChain(\n",
    "    llm=llm,\n",
    "    prompt=CONDENSE_QUESTION_PROMPT,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "id": "975c3c2b",
   "metadata": {},
   "outputs": [],
   "source": [
    "qa = ConversationalRetrievalChain(\n",
    "    question_generator=condense_question_chain,\n",
    "    retriever=docsearch.as_retriever(),\n",
    "    memory=memory,\n",
    "    combine_docs_chain=final_qa_chain,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "id": "784aee3a",
   "metadata": {},
   "outputs": [],
   "source": [
    "query = \"What did the president say about Ketanji Brown Jackson\"\n",
    "result = qa({\"question\": query})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "id": "dfd0ccc1",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'question': 'What did the president say about Ketanji Brown Jackson',\n",
       " 'chat_history': [HumanMessage(content='What did the president say about Ketanji Brown Jackson', additional_kwargs={}, example=False),\n",
       "  AIMessage(content='{\\n  \"answer\": \"The President nominated Ketanji Brown Jackson as a Circuit Court of Appeals Judge and praised her as one of the nation\\'s top legal minds who will continue Justice Breyer\\'s legacy of excellence.\",\\n  \"sources\": [\"31-pl\"]\\n}', additional_kwargs={}, example=False)],\n",
       " 'answer': '{\\n  \"answer\": \"The President nominated Ketanji Brown Jackson as a Circuit Court of Appeals Judge and praised her as one of the nation\\'s top legal minds who will continue Justice Breyer\\'s legacy of excellence.\",\\n  \"sources\": [\"31-pl\"]\\n}'}"
      ]
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "id": "c93f805b",
   "metadata": {},
   "outputs": [],
   "source": [
    "query = \"what did he say about her predecessor?\"\n",
    "result = qa({\"question\": query})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "id": "5d8612c0",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'question': 'what did he say about her predecessor?',\n",
       " 'chat_history': [HumanMessage(content='What did the president say about Ketanji Brown Jackson', additional_kwargs={}, example=False),\n",
       "  AIMessage(content='{\\n  \"answer\": \"The President nominated Ketanji Brown Jackson as a Circuit Court of Appeals Judge and praised her as one of the nation\\'s top legal minds who will continue Justice Breyer\\'s legacy of excellence.\",\\n  \"sources\": [\"31-pl\"]\\n}', additional_kwargs={}, example=False),\n",
       "  HumanMessage(content='what did he say about her predecessor?', additional_kwargs={}, example=False),\n",
       "  AIMessage(content='{\\n  \"answer\": \"The President honored Justice Stephen Breyer for his service as an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court.\",\\n  \"sources\": [\"31-pl\"]\\n}', additional_kwargs={}, example=False)],\n",
       " 'answer': '{\\n  \"answer\": \"The President honored Justice Stephen Breyer for his service as an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court.\",\\n  \"sources\": [\"31-pl\"]\\n}'}"
      ]
     },
     "execution_count": 44,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ac9e4626",
   "metadata": {},
   "source": [
    "## Using your own output schema\n",
    "\n",
    "We can change the outputs of our chain by passing in our own schema. The values and descriptions of this schema will inform the function we pass to the OpenAI API, meaning it won't just affect how we parse outputs but will also change the OpenAI output itself. For example we can add a `countries_referenced` parameter to our schema and describe what we want this parameter to mean, and that'll cause the OpenAI output to include a description of a speaker in the response.\n",
    "\n",
    "In addition to the previous example, we can also add a custom prompt to the chain. This will allow you to add additional context to the response, which can be useful for question answering."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "id": "f34a48f8",
   "metadata": {},
   "outputs": [],
   "source": [
    "from typing import List\n",
    "\n",
    "from langchain.chains.openai_functions import create_qa_with_structure_chain\n",
    "from langchain.prompts.chat import ChatPromptTemplate, HumanMessagePromptTemplate\n",
    "from langchain_core.messages import HumanMessage, SystemMessage\n",
    "from pydantic import BaseModel, Field"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "id": "5647c161",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "CustomResponseSchema(answer=\"He announced that American airspace will be closed off to all Russian flights, further isolating Russia and adding an additional squeeze on their economy. The Ruble has lost 30% of its value and the Russian stock market has lost 40% of its value. He also mentioned that Putin alone is to blame for Russia's reeling economy. The United States and its allies are providing support to Ukraine in their fight for freedom, including military, economic, and humanitarian assistance. The United States is giving more than $1 billion in direct assistance to Ukraine. He made it clear that American forces are not engaged and will not engage in conflict with Russian forces in Ukraine, but they are deployed to defend NATO allies in case Putin decides to keep moving west. He also mentioned that Putin's attack on Ukraine was premeditated and unprovoked, and that the West and NATO responded by building a coalition of freedom-loving nations to confront Putin. The free world is holding Putin accountable through powerful economic sanctions, cutting off Russia's largest banks from the international financial system, and preventing Russia's central bank from defending the Russian Ruble. The U.S. Department of Justice is also assembling a task force to go after the crimes of Russian oligarchs.\", countries_referenced=['AMERICA', 'RUSSIA', 'UKRAINE'], sources=['4-pl', '5-pl', '2-pl', '3-pl'])"
      ]
     },
     "execution_count": 46,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "class CustomResponseSchema(BaseModel):\n",
    "    \"\"\"An answer to the question being asked, with sources.\"\"\"\n",
    "\n",
    "    answer: str = Field(..., description=\"Answer to the question that was asked\")\n",
    "    countries_referenced: List[str] = Field(\n",
    "        ..., description=\"All of the countries mentioned in the sources\"\n",
    "    )\n",
    "    sources: List[str] = Field(\n",
    "        ..., description=\"List of sources used to answer the question\"\n",
    "    )\n",
    "\n",
    "\n",
    "prompt_messages = [\n",
    "    SystemMessage(\n",
    "        content=(\n",
    "            \"You are a world class algorithm to answer \"\n",
    "            \"questions in a specific format.\"\n",
    "        )\n",
    "    ),\n",
    "    HumanMessage(content=\"Answer question using the following context\"),\n",
    "    HumanMessagePromptTemplate.from_template(\"{context}\"),\n",
    "    HumanMessagePromptTemplate.from_template(\"Question: {question}\"),\n",
    "    HumanMessage(\n",
    "        content=\"Tips: Make sure to answer in the correct format. Return all of the countries mentioned in the sources in uppercase characters.\"\n",
    "    ),\n",
    "]\n",
    "\n",
    "chain_prompt = ChatPromptTemplate(messages=prompt_messages)\n",
    "\n",
    "qa_chain_pydantic = create_qa_with_structure_chain(\n",
    "    llm, CustomResponseSchema, output_parser=\"pydantic\", prompt=chain_prompt\n",
    ")\n",
    "final_qa_chain_pydantic = StuffDocumentsChain(\n",
    "    llm_chain=qa_chain_pydantic,\n",
    "    document_variable_name=\"context\",\n",
    "    document_prompt=doc_prompt,\n",
    ")\n",
    "retrieval_qa_pydantic = RetrievalQA(\n",
    "    retriever=docsearch.as_retriever(), combine_documents_chain=final_qa_chain_pydantic\n",
    ")\n",
    "query = \"What did he say about russia\"\n",
    "retrieval_qa_pydantic.run(query)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}