mirror of
https://github.com/hwchase17/langchain
synced 2024-10-29 17:07:25 +00:00
444 lines
15 KiB
Plaintext
444 lines
15 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "71a43144",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Retrieval QA using OpenAI functions\n",
|
|
"\n",
|
|
"OpenAI functions allows for structuring of response output. This is often useful in question answering when you want to not only get the final answer but also supporting evidence, citations, etc.\n",
|
|
"\n",
|
|
"In this notebook we show how to use an LLM chain which uses OpenAI functions as part of an overall retrieval pipeline."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"id": "f059012e",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from langchain.chains import RetrievalQA\n",
|
|
"from langchain.document_loaders import TextLoader\n",
|
|
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
|
|
"from langchain.text_splitter import CharacterTextSplitter\n",
|
|
"from langchain.vectorstores import Chroma"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"id": "f10b831c",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Using embedded DuckDB without persistence: data will be transient\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"loader = TextLoader(\"../../state_of_the_union.txt\")\n",
|
|
"documents = loader.load()\n",
|
|
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
|
"texts = text_splitter.split_documents(documents)\n",
|
|
"for i, text in enumerate(texts):\n",
|
|
" text.metadata['source'] = f\"{i}-pl\"\n",
|
|
"embeddings = OpenAIEmbeddings()\n",
|
|
"docsearch = Chroma.from_documents(texts, embeddings)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"id": "70f3a38c",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from langchain.chat_models import ChatOpenAI\n",
|
|
"from langchain.chains.combine_documents.stuff import StuffDocumentsChain\n",
|
|
"from langchain.prompts import PromptTemplate\n",
|
|
"from langchain.chains import create_qa_with_sources_chain"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 4,
|
|
"id": "7b3e1731",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"llm = ChatOpenAI(temperature=0, model=\"gpt-3.5-turbo-0613\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 5,
|
|
"id": "70a9ccff",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"qa_chain = create_qa_with_sources_chain(llm)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 6,
|
|
"id": "efcdb6fb",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"doc_prompt = PromptTemplate(\n",
|
|
" template=\"Content: {page_content}\\nSource: {source}\",\n",
|
|
" input_variables=[\"page_content\", \"source\"],\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 7,
|
|
"id": "64a08263",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"final_qa_chain = StuffDocumentsChain(\n",
|
|
" llm_chain=qa_chain, \n",
|
|
" document_variable_name='context',\n",
|
|
" document_prompt=doc_prompt,\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 8,
|
|
"id": "cb876c97",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"retrieval_qa = RetrievalQA(\n",
|
|
" retriever=docsearch.as_retriever(),\n",
|
|
" combine_documents_chain=final_qa_chain\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 9,
|
|
"id": "a75bad9b",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"query = \"What did the president say about russia\""
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 10,
|
|
"id": "9a60f109",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"'{\\n \"answer\": \"The President expressed strong condemnation of Russia\\'s actions in Ukraine and announced measures to isolate Russia and provide support to Ukraine. He stated that Russia\\'s invasion of Ukraine will have long-term consequences for Russia and emphasized the commitment of the United States and its allies to defend NATO countries. The President also mentioned the imposition of sanctions on Russia and the release of oil reserves to help mitigate gas prices. Overall, the President\\'s message conveyed a firm stance against Russia\\'s aggression and a commitment to supporting Ukraine and protecting American interests.\",\\n \"sources\": [\"0-pl\", \"4-pl\", \"5-pl\", \"6-pl\"]\\n}'"
|
|
]
|
|
},
|
|
"execution_count": 10,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"retrieval_qa.run(query)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "a60f93a4",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Using Pydantic\n",
|
|
"\n",
|
|
"If we want to, we can set the chain to return in Pydantic. Note that if downstream chains consume the output of this chain - including memory - they will generally expect it to be in string format, so you should only use this chain when it is the final chain."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 11,
|
|
"id": "3559727f",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"qa_chain_pydantic = create_qa_with_sources_chain(llm, output_parser=\"pydantic\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 12,
|
|
"id": "5a7997d1",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"final_qa_chain_pydantic = StuffDocumentsChain(\n",
|
|
" llm_chain=qa_chain_pydantic, \n",
|
|
" document_variable_name='context',\n",
|
|
" document_prompt=doc_prompt,\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 13,
|
|
"id": "79368e40",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"retrieval_qa_pydantic = RetrievalQA(\n",
|
|
" retriever=docsearch.as_retriever(),\n",
|
|
" combine_documents_chain=final_qa_chain_pydantic\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 14,
|
|
"id": "6b8641de",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"AnswerWithSources(answer=\"The President expressed strong condemnation of Russia's actions in Ukraine and announced measures to isolate Russia and provide support to Ukraine. He stated that Russia's invasion of Ukraine will have long-term consequences for Russia and emphasized the commitment of the United States and its allies to defend NATO countries. The President also mentioned the imposition of sanctions on Russia and the release of oil reserves to help mitigate gas prices. Overall, the President's message conveyed a firm stance against Russia's aggression and support for Ukraine.\", sources=['0-pl', '4-pl', '5-pl', '6-pl'])"
|
|
]
|
|
},
|
|
"execution_count": 14,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"retrieval_qa_pydantic.run(query)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "e4c15395",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Using in ConversationalRetrievalChain\n",
|
|
"\n",
|
|
"We can also show what it's like to use this in the ConversationalRetrievalChain. Note that because this chain involves memory, we will NOT use the Pydantic return type."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 15,
|
|
"id": "18e5f090",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from langchain.chains import ConversationalRetrievalChain\n",
|
|
"from langchain.memory import ConversationBufferMemory\n",
|
|
"from langchain.chains import LLMChain\n",
|
|
"memory = ConversationBufferMemory(memory_key=\"chat_history\", return_messages=True)\n",
|
|
"_template = \"\"\"Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.\\\n",
|
|
"Make sure to avoid using any unclear pronouns.\n",
|
|
"\n",
|
|
"Chat History:\n",
|
|
"{chat_history}\n",
|
|
"Follow Up Input: {question}\n",
|
|
"Standalone question:\"\"\"\n",
|
|
"CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)\n",
|
|
"condense_question_chain = LLMChain(\n",
|
|
" llm=llm,\n",
|
|
" prompt=CONDENSE_QUESTION_PROMPT,\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 16,
|
|
"id": "975c3c2b",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"qa = ConversationalRetrievalChain(\n",
|
|
" question_generator=condense_question_chain, \n",
|
|
" retriever=docsearch.as_retriever(),\n",
|
|
" memory=memory, \n",
|
|
" combine_docs_chain=final_qa_chain\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 17,
|
|
"id": "784aee3a",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
|
"result = qa({\"question\": query})"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 18,
|
|
"id": "dfd0ccc1",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"{'question': 'What did the president say about Ketanji Brown Jackson',\n",
|
|
" 'chat_history': [HumanMessage(content='What did the president say about Ketanji Brown Jackson', additional_kwargs={}, example=False),\n",
|
|
" AIMessage(content='{\\n \"answer\": \"The President nominated Ketanji Brown Jackson as a Circuit Court of Appeals Judge and praised her as one of the nation\\'s top legal minds who will continue Justice Breyer\\'s legacy of excellence.\",\\n \"sources\": [\"31-pl\"]\\n}', additional_kwargs={}, example=False)],\n",
|
|
" 'answer': '{\\n \"answer\": \"The President nominated Ketanji Brown Jackson as a Circuit Court of Appeals Judge and praised her as one of the nation\\'s top legal minds who will continue Justice Breyer\\'s legacy of excellence.\",\\n \"sources\": [\"31-pl\"]\\n}'}"
|
|
]
|
|
},
|
|
"execution_count": 18,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"result"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 19,
|
|
"id": "c93f805b",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"query = \"what did he say about her predecessor?\"\n",
|
|
"result = qa({\"question\": query})"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 20,
|
|
"id": "5d8612c0",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"{'question': 'what did he say about her predecessor?',\n",
|
|
" 'chat_history': [HumanMessage(content='What did the president say about Ketanji Brown Jackson', additional_kwargs={}, example=False),\n",
|
|
" AIMessage(content='{\\n \"answer\": \"The President nominated Ketanji Brown Jackson as a Circuit Court of Appeals Judge and praised her as one of the nation\\'s top legal minds who will continue Justice Breyer\\'s legacy of excellence.\",\\n \"sources\": [\"31-pl\"]\\n}', additional_kwargs={}, example=False),\n",
|
|
" HumanMessage(content='what did he say about her predecessor?', additional_kwargs={}, example=False),\n",
|
|
" AIMessage(content='{\\n \"answer\": \"The President honored Justice Stephen Breyer for his service as an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court.\",\\n \"sources\": [\"31-pl\"]\\n}', additional_kwargs={}, example=False)],\n",
|
|
" 'answer': '{\\n \"answer\": \"The President honored Justice Stephen Breyer for his service as an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court.\",\\n \"sources\": [\"31-pl\"]\\n}'}"
|
|
]
|
|
},
|
|
"execution_count": 20,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"result"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "ac9e4626",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Using your own output schema\n",
|
|
"\n",
|
|
"We can change the outputs of our chain by passing in our own schema. The values and descriptions of this schema will inform the function we pass to the OpenAI API, meaning it won't just affect how we parse outputs but will also change the OpenAI output itself. For example we can add a `countries_referenced` parameter to our schema and describe what we want this parameter to mean, and that'll cause the OpenAI output to include a description of a speaker in the response."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 21,
|
|
"id": "f34a48f8",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from typing import List\n",
|
|
"\n",
|
|
"from pydantic import BaseModel, Field\n",
|
|
"\n",
|
|
"from langchain.chains.openai_functions import create_qa_with_structure_chain"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 24,
|
|
"id": "5647c161",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"CustomResponseSchema(answer=\"He announced that American airspace will be closed off to all Russian flights, further isolating Russia and adding economic pressure. The Ruble has lost 30% of its value and the Russian stock market has lost 40% of its value. He also mentioned providing support to Ukraine in terms of military, economic, and humanitarian assistance. The US is giving more than $1 billion in direct assistance to Ukraine. He clarified that US forces are not engaged in conflict with Russian forces in Ukraine but are deployed to defend NATO allies. He emphasized that Putin's actions have consequences and that the free world is holding him accountable through economic sanctions and targeting Russian oligarchs.\", countries_referenced=['Russia', 'Ukraine'], sources=['4-pl', '5-pl', '2-pl', '3-pl'])"
|
|
]
|
|
},
|
|
"execution_count": 24,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"class CustomResponseSchema(BaseModel):\n",
|
|
" \"\"\"An answer to the question being asked, with sources.\"\"\"\n",
|
|
"\n",
|
|
" answer: str = Field(..., description=\"Answer to the question that was asked\")\n",
|
|
" countries_referenced: List[str] = Field(..., description=\"All of the countries mentioned in the sources\")\n",
|
|
" sources: List[str] = Field(\n",
|
|
" ..., description=\"List of sources used to answer the question\"\n",
|
|
" )\n",
|
|
"\n",
|
|
"qa_chain_pydantic = create_qa_with_structure_chain(llm, CustomResponseSchema, output_parser=\"pydantic\")\n",
|
|
"final_qa_chain_pydantic = StuffDocumentsChain(\n",
|
|
" llm_chain=qa_chain_pydantic, \n",
|
|
" document_variable_name='context',\n",
|
|
" document_prompt=doc_prompt,\n",
|
|
")\n",
|
|
"retrieval_qa_pydantic = RetrievalQA(\n",
|
|
" retriever=docsearch.as_retriever(),\n",
|
|
" combine_documents_chain=final_qa_chain_pydantic\n",
|
|
")\n",
|
|
"query = \"What did he say about russia\"\n",
|
|
"retrieval_qa_pydantic.run(query)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "b822ef59",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "venv",
|
|
"language": "python",
|
|
"name": "venv"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.11.3"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|