map rerank chain

harrison/map-rerank
Harrison Chase 1 year ago
parent 7fc4b4b3e1
commit 2cd2d543e4

@ -59,24 +59,25 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 58,
"id": "d1eaf6e6",
"metadata": {},
"outputs": [],
"source": [
"query = \"What did the president say about Justice Breyer\"\n",
"query = \"what does he say about officer mora\"\n",
"docs = docsearch.similarity_search(query)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 59,
"id": "a16e3453",
"metadata": {},
"outputs": [],
"source": [
"from langchain.chains.question_answering import load_qa_chain\n",
"from langchain.llms import OpenAI"
"from langchain.llms import OpenAI\n",
"from langchain.chains import LLMChain"
]
},
{
@ -89,6 +90,121 @@
"This sections shows results of using the `stuff` Chain to do question answering."
]
},
{
"cell_type": "code",
"execution_count": 64,
"id": "1fe39f14",
"metadata": {},
"outputs": [],
"source": [
"from langchain.prompts import PromptTemplate\n",
"template = \"\"\"Use the following document to answer the given question. In addition to providing an answer, please also give your answer a score from 0-100 in terms of how good it is (higher is better). \n",
"\n",
"What decides the score? A good score is factually accurate, and FULLY answers the question in a way the user would find helpful. If the document does not contain the answer, the score should be 0. You should only give a score of 100 if you are absolutely positive this is the best answer. Keep in mind that you will also be answering this question with other documents, so one of them could have a better answer.\n",
"\n",
"Use the following format:\n",
"\n",
"Document:\n",
"---------------\n",
"Document text here\n",
"---------------\n",
"Question: Question here\n",
"Answer: Answer here\n",
"Score: Score (between 0 and 100) here\n",
"\n",
"Begin!\n",
"\n",
"Document:\n",
"---------------\n",
"{context}\n",
"---------------\n",
"Question: {question}\n",
"Answer:\"\"\"\n",
"from langchain.prompts.base import BaseOutputParser\n",
"import re\n",
"class ScoreOutputParser(BaseOutputParser):\n",
" \n",
" def parse(self, text: str):\n",
" regex = r\"(.*?)\\nScore: (.*)\"\n",
" match = re.search(regex, text)\n",
" if match:\n",
" question = match.group(1)\n",
" answer = match.group(2)\n",
" return {\"answer\": question, \"score\": int(answer)}\n",
" else:\n",
" raise ValueError(f\"Could not parse output: {text}\")\n",
"prompt = PromptTemplate(template=template, input_variables=['context', 'question'], output_parser=ScoreOutputParser())\n",
"\n",
" \n",
"llm_chain = LLMChain(llm = OpenAI(temperature=0), prompt=prompt)"
]
},
{
"cell_type": "code",
"execution_count": 65,
"id": "d341b67e",
"metadata": {},
"outputs": [],
"source": [
"results = llm_chain.apply_and_parse(\n",
" # FYI - this is parallelized and so it is fast.\n",
" [{\"context\": d.page_content, \"question\": query} for d in docs]\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 70,
"id": "823842ef",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[({'answer': ' He says that Officer Mora was 27 years old.', 'score': 100},\n",
" Document(page_content='We have lost so much to COVID-19. Time with one another. And worst of all, so much loss of life. \\n\\nLets use this moment to reset. Lets stop looking at COVID-19 as a partisan dividing line and see it for what it is: A God-awful disease. \\n\\nLets stop seeing each other as enemies, and start seeing each other for who we really are: Fellow Americans. \\n\\nWe cant change how divided weve been. But we can change how we move forward—on COVID-19 and other issues we must face together. \\n\\nI recently visited the New York City Police Department days after the funerals of Officer Wilbert Mora and his partner, Officer Jason Rivera. \\n\\nThey were responding to a 9-1-1 call when a man shot and killed them with a stolen gun. \\n\\nOfficer Mora was 27 years old. \\n\\nOfficer Rivera was 22. \\n\\nBoth Dominican Americans whod grown up on the same streets they later chose to patrol as police officers. ', lookup_str='', metadata={}, lookup_index=0)),\n",
" ({'answer': ' He does not mention officer mora in this document.',\n",
" 'score': 0},\n",
" Document(page_content='I spoke with their families and told them that we are forever in debt for their sacrifice, and we will carry on their mission to restore the trust and safety every community deserves. \\n\\nIve worked on these issues a long time. \\n\\nI know what works: Investing in crime preventionand community police officers wholl walk the beat, wholl know the neighborhood, and who can restore trust and safety. \\n\\nSo lets not abandon our streets. Or choose between safety and equal justice. \\n\\nLets come together to protect our communities, restore trust, and hold law enforcement accountable. \\n\\nThats why the Justice Department required body cameras, banned chokeholds, and restricted no-knock warrants for its officers. \\n\\nThats why the American Rescue Plan provided $350 Billion that cities, states, and counties can use to hire more police and invest in proven strategies like community violence interruption—trusted messengers breaking the cycle of violence and trauma and giving young people hope. ', lookup_str='', metadata={}, lookup_index=0)),\n",
" ({'answer': ' He does not mention Officer Mora in the document.', 'score': 0},\n",
" Document(page_content='A former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since shes been nominated, shes received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans. \\n\\nAnd if we are to advance liberty and justice, we need to secure the Border and fix the immigration system. \\n\\nWe can do both. At our border, weve installed new technology like cutting-edge scanners to better detect drug smuggling. \\n\\nWeve set up joint patrols with Mexico and Guatemala to catch more human traffickers. \\n\\nWere putting in place dedicated immigration judges so families fleeing persecution and violence can have their cases heard faster. \\n\\nWere securing commitments and supporting partners in South and Central America to host more refugees and secure their own borders. ', lookup_str='', metadata={}, lookup_index=0)),\n",
" ({'answer': ' He does not mention Officer Mora.', 'score': 0},\n",
" Document(page_content='In state after state, new laws have been passed, not only to suppress the vote, but to subvert entire elections. \\n\\nWe cannot let this happen. \\n\\nTonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while youre at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, Id like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nations top legal minds, who will continue Justice Breyers legacy of excellence. ', lookup_str='', metadata={}, lookup_index=0))]"
]
},
"execution_count": 70,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sorted(zip(results, docs), key=lambda x: -x[0]['score'])"
]
},
{
"cell_type": "code",
"execution_count": 56,
"id": "20c51441",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[Document(page_content='I spoke with their families and told them that we are forever in debt for their sacrifice, and we will carry on their mission to restore the trust and safety every community deserves. \\n\\nIve worked on these issues a long time. \\n\\nI know what works: Investing in crime preventionand community police officers wholl walk the beat, wholl know the neighborhood, and who can restore trust and safety. \\n\\nSo lets not abandon our streets. Or choose between safety and equal justice. \\n\\nLets come together to protect our communities, restore trust, and hold law enforcement accountable. \\n\\nThats why the Justice Department required body cameras, banned chokeholds, and restricted no-knock warrants for its officers. \\n\\nThats why the American Rescue Plan provided $350 Billion that cities, states, and counties can use to hire more police and invest in proven strategies like community violence interruption—trusted messengers breaking the cycle of violence and trauma and giving young people hope. ', lookup_str='', metadata={}, lookup_index=0),\n",
" Document(page_content='We should all agree: The answer is not to Defund the police. The answer is to FUND the police with the resources and training they need to protect our communities. \\n\\nI ask Democrats and Republicans alike: Pass my budget and keep our neighborhoods safe. \\n\\nAnd I will keep doing everything in my power to crack down on gun trafficking and ghost guns you can buy online and make at home—they have no serial numbers and cant be traced. \\n\\nAnd I ask Congress to pass proven measures to reduce gun violence. Pass universal background checks. Why should anyone on a terrorist list be able to purchase a weapon? \\n\\nBan assault weapons and high-capacity magazines. \\n\\nRepeal the liability shield that makes gun manufacturers the only industry in America that cant be sued. \\n\\nThese laws dont infringe on the Second Amendment. They save lives. \\n\\nThe most fundamental right in America is the right to vote and to have it counted. And its under assault. ', lookup_str='', metadata={}, lookup_index=0),\n",
" Document(page_content='We have lost so much to COVID-19. Time with one another. And worst of all, so much loss of life. \\n\\nLets use this moment to reset. Lets stop looking at COVID-19 as a partisan dividing line and see it for what it is: A God-awful disease. \\n\\nLets stop seeing each other as enemies, and start seeing each other for who we really are: Fellow Americans. \\n\\nWe cant change how divided weve been. But we can change how we move forward—on COVID-19 and other issues we must face together. \\n\\nI recently visited the New York City Police Department days after the funerals of Officer Wilbert Mora and his partner, Officer Jason Rivera. \\n\\nThey were responding to a 9-1-1 call when a man shot and killed them with a stolen gun. \\n\\nOfficer Mora was 27 years old. \\n\\nOfficer Rivera was 22. \\n\\nBoth Dominican Americans whod grown up on the same streets they later chose to patrol as police officers. ', lookup_str='', metadata={}, lookup_index=0),\n",
" Document(page_content='A former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since shes been nominated, shes received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans. \\n\\nAnd if we are to advance liberty and justice, we need to secure the Border and fix the immigration system. \\n\\nWe can do both. At our border, weve installed new technology like cutting-edge scanners to better detect drug smuggling. \\n\\nWeve set up joint patrols with Mexico and Guatemala to catch more human traffickers. \\n\\nWere putting in place dedicated immigration judges so families fleeing persecution and violence can have their cases heard faster. \\n\\nWere securing commitments and supporting partners in South and Central America to host more refugees and secure their own borders. ', lookup_str='', metadata={}, lookup_index=0)]"
]
},
"execution_count": 56,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"docs"
]
},
{
"cell_type": "code",
"execution_count": 6,

@ -21,7 +21,7 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 23,
"id": "e9db25f3",
"metadata": {},
"outputs": [],
@ -33,12 +33,12 @@
"llm = OpenAI(temperature=0)\n",
"\n",
"\n",
"text_splitter = CharacterTextSplitter()\n"
"text_splitter = CharacterTextSplitter(chunk_size=1000, separator=\"\\n\")\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 24,
"id": "99bbe19b",
"metadata": {},
"outputs": [],
@ -50,7 +50,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 25,
"id": "baa6e808",
"metadata": {},
"outputs": [],
@ -60,17 +60,17 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 26,
"id": "8dff4f43",
"metadata": {},
"outputs": [],
"source": [
"docs = [Document(page_content=t) for t in texts[:3]]"
"docs = [Document(page_content=t) for t in texts]"
]
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 6,
"id": "27989fc4",
"metadata": {},
"outputs": [],
@ -131,33 +131,57 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 36,
"id": "2851dd26",
"metadata": {},
"outputs": [],
"source": [
"class CustomLLM(OpenAI):\n",
" def _generate(\n",
" self, prompts, stop = None\n",
" ):\n",
" for p in prompts:\n",
" if self.get_num_tokens(p) > 600:\n",
" raise ValueError\n",
" return super()._generate(prompts, stop=stop)"
]
},
{
"cell_type": "code",
"execution_count": 41,
"id": "ef28e1d4",
"metadata": {},
"outputs": [],
"source": [
"llm = CustomLLM(temperature=0, model_name='text-curie-001')\n",
"chain = load_summarize_chain(llm, chain_type=\"map_reduce\")"
]
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 42,
"id": "f82c5f9f",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"\" In response to Vladimir Putin's aggression in Ukraine, the US and its allies have taken action to hold him accountable, including economic sanctions, cutting off access to technology, and seizing the assets of Russian oligarchs. They are also providing military, economic, and humanitarian assistance to the Ukrainians, and releasing 60 million barrels of oil from reserves around the world. President Biden has passed several laws to provide economic relief to Americans and create jobs, and is making sure taxpayer dollars support American jobs and businesses.\""
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
"ename": "ValueError",
"evalue": "A single document was so long it could not be combined with another document, we cannot handle this.",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[42], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mchain\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrun\u001b[49m\u001b[43m(\u001b[49m\u001b[43minput_documents\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdocs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtoken_max\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m400\u001b[39;49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/workplace/langchain/langchain/chains/base.py:141\u001b[0m, in \u001b[0;36mChain.run\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 138\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m(args[\u001b[38;5;241m0\u001b[39m])[\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moutput_keys[\u001b[38;5;241m0\u001b[39m]]\n\u001b[1;32m 140\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m kwargs \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m args:\n\u001b[0;32m--> 141\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m[\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moutput_keys[\u001b[38;5;241m0\u001b[39m]]\n\u001b[1;32m 143\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 144\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m`run` supported with either positional arguments or keyword arguments\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 145\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m but not both. Got args: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00margs\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m and kwargs: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mkwargs\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 146\u001b[0m )\n",
"File \u001b[0;32m~/workplace/langchain/langchain/chains/base.py:112\u001b[0m, in \u001b[0;36mChain.__call__\u001b[0;34m(self, inputs, return_only_outputs)\u001b[0m\n\u001b[1;32m 108\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mverbose:\n\u001b[1;32m 109\u001b[0m \u001b[38;5;28mprint\u001b[39m(\n\u001b[1;32m 110\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\033\u001b[39;00m\u001b[38;5;124m[1m> Entering new \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m chain...\u001b[39m\u001b[38;5;130;01m\\033\u001b[39;00m\u001b[38;5;124m[0m\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 111\u001b[0m )\n\u001b[0;32m--> 112\u001b[0m outputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call\u001b[49m\u001b[43m(\u001b[49m\u001b[43minputs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 113\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mverbose:\n\u001b[1;32m 114\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\033\u001b[39;00m\u001b[38;5;124m[1m> Finished \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m chain.\u001b[39m\u001b[38;5;130;01m\\033\u001b[39;00m\u001b[38;5;124m[0m\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
"File \u001b[0;32m~/workplace/langchain/langchain/chains/combine_documents/base.py:49\u001b[0m, in \u001b[0;36mBaseCombineDocumentsChain._call\u001b[0;34m(self, inputs)\u001b[0m\n\u001b[1;32m 47\u001b[0m \u001b[38;5;66;03m# Other keys are assumed to be needed for LLM prediction\u001b[39;00m\n\u001b[1;32m 48\u001b[0m other_keys \u001b[38;5;241m=\u001b[39m {k: v \u001b[38;5;28;01mfor\u001b[39;00m k, v \u001b[38;5;129;01min\u001b[39;00m inputs\u001b[38;5;241m.\u001b[39mitems() \u001b[38;5;28;01mif\u001b[39;00m k \u001b[38;5;241m!=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39minput_key}\n\u001b[0;32m---> 49\u001b[0m output \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcombine_docs\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdocs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mother_keys\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 50\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m {\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moutput_key: output}\n",
"File \u001b[0;32m~/workplace/langchain/langchain/chains/combine_documents/map_reduce.py:124\u001b[0m, in \u001b[0;36mMapReduceDocumentsChain.combine_docs\u001b[0;34m(self, docs, token_max, **kwargs)\u001b[0m\n\u001b[1;32m 122\u001b[0m num_tokens \u001b[38;5;241m=\u001b[39m length_func(result_docs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 123\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m num_tokens \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m num_tokens \u001b[38;5;241m>\u001b[39m token_max:\n\u001b[0;32m--> 124\u001b[0m new_result_doc_list \u001b[38;5;241m=\u001b[39m \u001b[43m_split_list_of_docs\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 125\u001b[0m \u001b[43m \u001b[49m\u001b[43mresult_docs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlength_func\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtoken_max\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\n\u001b[1;32m 126\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 127\u001b[0m result_docs \u001b[38;5;241m=\u001b[39m []\n\u001b[1;32m 128\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m docs \u001b[38;5;129;01min\u001b[39;00m new_result_doc_list:\n",
"File \u001b[0;32m~/workplace/langchain/langchain/chains/combine_documents/map_reduce.py:29\u001b[0m, in \u001b[0;36m_split_list_of_docs\u001b[0;34m(docs, length_func, token_max, **kwargs)\u001b[0m\n\u001b[1;32m 24\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 25\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mA single document was longer than the context length,\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 26\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m we cannot handle this.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 27\u001b[0m )\n\u001b[1;32m 28\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(_sub_result_docs) \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m2\u001b[39m:\n\u001b[0;32m---> 29\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 30\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mA single document was so long it could not be combined \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 31\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mwith another document, we cannot handle this.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 32\u001b[0m )\n\u001b[1;32m 33\u001b[0m new_result_doc_list\u001b[38;5;241m.\u001b[39mappend(_sub_result_docs[:\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m])\n\u001b[1;32m 34\u001b[0m _sub_result_docs \u001b[38;5;241m=\u001b[39m _sub_result_docs[\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m:]\n",
"\u001b[0;31mValueError\u001b[0m: A single document was so long it could not be combined with another document, we cannot handle this."
]
}
],
"source": [
"chain.run(docs)"
"chain.run(input_documents=docs, token_max=400)"
]
},
{

@ -0,0 +1,70 @@
"""Combining documents by mapping a chain over them first, then reranking results."""
from __future__ import annotations
from typing import Any, Callable, Dict, List, Optional
from pydantic import BaseModel, Extra, root_validator
from langchain.chains.combine_documents.base import BaseCombineDocumentsChain
from langchain.chains.llm import LLMChain
from langchain.docstore.document import Document
class MapRerankDocumentsChain(BaseCombineDocumentsChain, BaseModel):
"""Combining documents by mapping a chain over them, then reranking results."""
llm_chain: LLMChain
"""Chain to apply to each document individually."""
document_variable_name: str
"""The variable name in the llm_chain to put the documents in.
If only one variable in the llm_chain, this need not be provided."""
rank_key: str
"""Key in output of llm_chain to rank on."""
metadata_keys: Optional[List[str]] = None
class Config:
"""Configuration for this pydantic object."""
extra = Extra.forbid
arbitrary_types_allowed = True
@root_validator(pre=True)
def get_default_document_variable_name(cls, values: Dict) -> Dict:
"""Get default document variable name, if not provided."""
if "document_variable_name" not in values:
llm_chain_variables = values["llm_chain"].prompt.input_variables
if len(llm_chain_variables) == 1:
values["document_variable_name"] = llm_chain_variables[0]
else:
raise ValueError(
"document_variable_name must be provided if there are "
"multiple llm_chain input_variables"
)
else:
llm_chain_variables = values["llm_chain"].prompt.input_variables
if values["document_variable_name"] not in llm_chain_variables:
raise ValueError(
f"document_variable_name {values['document_variable_name']} was "
f"not found in llm_chain input_variables: {llm_chain_variables}"
)
return values
def combine_docs(
self, docs: List[Document], **kwargs: Any
) -> str:
"""Combine documents in a map rerank manner.
Combine by mapping first chain over all documents, then reranking the results.
"""
results = self.llm_chain.apply_and_parse(
# FYI - this is parallelized and so it is fast.
[{**{self.document_variable_name: d.page_content}, **kwargs} for d in docs]
)
sorted_res = sorted(zip(results, docs), key=lambda x: -x[0][self.rank_key])
output, document = sorted_res[0]
if self.metadata_keys is not None:
for key in self.metadata_keys:
output[key] = document.metadata[key]
return output

@ -0,0 +1,42 @@
from langchain.prompts import PromptTemplate
from langchain.prompts.base import BaseOutputParser
import re
template = """Use the following document to answer the given question. In addition to providing an answer, please also give your answer a score from 0-100 in terms of how good it is (higher is better).
What decides the score? A good score is factually accurate, and FULLY answers the question in a way the user would find helpful. If the document does not contain the answer, the score should be 0. You should only give a score of 100 if you are absolutely positive this is the best answer. Keep in mind that you will also be answering this question with other documents, so one of them could have a better answer.
Use the following format:
Document:
---------------
Document text here
---------------
Question: Question here
Answer: Answer here
Score: Score (between 0 and 100) here
Begin!
Document:
---------------
{context}
---------------
Question: {question}
Answer:"""
class ScoreOutputParser(BaseOutputParser):
def parse(self, text: str):
regex = r"(.*?)\nScore: (.*)"
match = re.search(regex, text)
if match:
question = match.group(1)
answer = match.group(2)
return {"answer": question, "score": int(answer)}
else:
raise ValueError(f"Could not parse output: {text}")
prompt = PromptTemplate(template=template, input_variables=['context', 'question'], output_parser=ScoreOutputParser())
Loading…
Cancel
Save