map rerank chain

1 year ago · 2cd2d543e4
parent 7fc4b4b3e1
commit 2cd2d543e4
4 changed files with 274 additions and 22 deletions
--- a/docs/examples/data_augmented_generation/question_answering.ipynb
+++ b/docs/examples/data_augmented_generation/question_answering.ipynb
@ -59,24 +59,25 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 58,
   "id": "d1eaf6e6",
   "metadata": {},
   "outputs": [],
   "source": [
-    "query = \"What did the president say about Justice Breyer\"\n",
+    "query = \"what does he say about officer mora\"\n",
    "docs = docsearch.similarity_search(query)"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 59,
   "id": "a16e3453",
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain.chains.question_answering import load_qa_chain\n",
-    "from langchain.llms import OpenAI"
+    "from langchain.llms import OpenAI\n",
+    "from langchain.chains import LLMChain"
   ]
  },
  {
@ -89,6 +90,121 @@
    "This sections shows results of using the `stuff` Chain to do question answering."
   ]
  },
+  {
+   "cell_type": "code",
+   "execution_count": 64,
+   "id": "1fe39f14",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain.prompts import PromptTemplate\n",
+    "template = \"\"\"Use the following document to answer the given question. In addition to providing an answer, please also give your answer a score from 0-100 in terms of how good it is (higher is better). \n",
+    "\n",
+    "What decides the score? A good score is factually accurate, and FULLY answers the question in a way the user would find helpful. If the document does not contain the answer, the score should be 0. You should only give a score of 100 if you are absolutely positive this is the best answer. Keep in mind that you will also be answering this question with other documents, so one of them could have a better answer.\n",
+    "\n",
+    "Use the following format:\n",
+    "\n",
+    "Document:\n",
+    "---------------\n",
+    "Document text here\n",
+    "---------------\n",
+    "Question: Question here\n",
+    "Answer: Answer here\n",
+    "Score: Score (between 0 and 100) here\n",
+    "\n",
+    "Begin!\n",
+    "\n",
+    "Document:\n",
+    "---------------\n",
+    "{context}\n",
+    "---------------\n",
+    "Question: {question}\n",
+    "Answer:\"\"\"\n",
+    "from langchain.prompts.base import BaseOutputParser\n",
+    "import re\n",
+    "class ScoreOutputParser(BaseOutputParser):\n",
+    "    \n",
+    "    def parse(self, text: str):\n",
+    "        regex = r\"(.*?)\\nScore: (.*)\"\n",
+    "        match = re.search(regex, text)\n",
+    "        if match:\n",
+    "            question = match.group(1)\n",
+    "            answer = match.group(2)\n",
+    "            return {\"answer\": question, \"score\": int(answer)}\n",
+    "        else:\n",
+    "            raise ValueError(f\"Could not parse output: {text}\")\n",
+    "prompt = PromptTemplate(template=template, input_variables=['context', 'question'], output_parser=ScoreOutputParser())\n",
+    "\n",
+    "        \n",
+    "llm_chain = LLMChain(llm = OpenAI(temperature=0), prompt=prompt)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 65,
+   "id": "d341b67e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "results = llm_chain.apply_and_parse(\n",
+    "    # FYI - this is parallelized and so it is fast.\n",
+    "    [{\"context\": d.page_content, \"question\": query} for d in docs]\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 70,
+   "id": "823842ef",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[({'answer': ' He says that Officer Mora was 27 years old.', 'score': 100},\n",
+       "  Document(page_content='We have lost so much to COVID-19. Time with one another. And worst of all, so much loss of life. \\n\\nLet’s use this moment to reset. Let’s stop looking at COVID-19 as a partisan dividing line and see it for what it is: A God-awful disease.  \\n\\nLet’s stop seeing each other as enemies, and start seeing each other for who we really are: Fellow Americans.  \\n\\nWe can’t change how divided we’ve been. But we can change how we move forward—on COVID-19 and other issues we must face together. \\n\\nI recently visited the New York City Police Department days after the funerals of Officer Wilbert Mora and his partner, Officer Jason Rivera. \\n\\nThey were responding to a 9-1-1 call when a man shot and killed them with a stolen gun. \\n\\nOfficer Mora was 27 years old. \\n\\nOfficer Rivera was 22. \\n\\nBoth Dominican Americans who’d grown up on the same streets they later chose to patrol as police officers. ', lookup_str='', metadata={}, lookup_index=0)),\n",
+       " ({'answer': ' He does not mention officer mora in this document.',\n",
+       "   'score': 0},\n",
+       "  Document(page_content='I spoke with their families and told them that we are forever in debt for their sacrifice, and we will carry on their mission to restore the trust and safety every community deserves. \\n\\nI’ve worked on these issues a long time. \\n\\nI know what works: Investing in crime preventionand community police officers who’ll walk the beat, who’ll know the neighborhood, and who can restore trust and safety. \\n\\nSo let’s not abandon our streets. Or choose between safety and equal justice. \\n\\nLet’s come together to protect our communities, restore trust, and hold law enforcement accountable. \\n\\nThat’s why the Justice Department required body cameras, banned chokeholds, and restricted no-knock warrants for its officers. \\n\\nThat’s why the American Rescue Plan provided $350 Billion that cities, states, and counties can use to hire more police and invest in proven strategies like community violence interruption—trusted messengers breaking the cycle of violence and trauma and giving young people hope.  ', lookup_str='', metadata={}, lookup_index=0)),\n",
+       " ({'answer': ' He does not mention Officer Mora in the document.', 'score': 0},\n",
+       "  Document(page_content='A former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since she’s been nominated, she’s received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans. \\n\\nAnd if we are to advance liberty and justice, we need to secure the Border and fix the immigration system. \\n\\nWe can do both. At our border, we’ve installed new technology like cutting-edge scanners to better detect drug smuggling.  \\n\\nWe’ve set up joint patrols with Mexico and Guatemala to catch more human traffickers.  \\n\\nWe’re putting in place dedicated immigration judges so families fleeing persecution and violence can have their cases heard faster. \\n\\nWe’re securing commitments and supporting partners in South and Central America to host more refugees and secure their own borders. ', lookup_str='', metadata={}, lookup_index=0)),\n",
+       " ({'answer': ' He does not mention Officer Mora.', 'score': 0},\n",
+       "  Document(page_content='In state after state, new laws have been passed, not only to suppress the vote, but to subvert entire elections. \\n\\nWe cannot let this happen. \\n\\nTonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence. ', lookup_str='', metadata={}, lookup_index=0))]"
+      ]
+     },
+     "execution_count": 70,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "sorted(zip(results, docs), key=lambda x: -x[0]['score'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 56,
+   "id": "20c51441",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[Document(page_content='I spoke with their families and told them that we are forever in debt for their sacrifice, and we will carry on their mission to restore the trust and safety every community deserves. \\n\\nI’ve worked on these issues a long time. \\n\\nI know what works: Investing in crime preventionand community police officers who’ll walk the beat, who’ll know the neighborhood, and who can restore trust and safety. \\n\\nSo let’s not abandon our streets. Or choose between safety and equal justice. \\n\\nLet’s come together to protect our communities, restore trust, and hold law enforcement accountable. \\n\\nThat’s why the Justice Department required body cameras, banned chokeholds, and restricted no-knock warrants for its officers. \\n\\nThat’s why the American Rescue Plan provided $350 Billion that cities, states, and counties can use to hire more police and invest in proven strategies like community violence interruption—trusted messengers breaking the cycle of violence and trauma and giving young people hope.  ', lookup_str='', metadata={}, lookup_index=0),\n",
+       " Document(page_content='We should all agree: The answer is not to Defund the police. The answer is to FUND the police with the resources and training they need to protect our communities. \\n\\nI ask Democrats and Republicans alike: Pass my budget and keep our neighborhoods safe.  \\n\\nAnd I will keep doing everything in my power to crack down on gun trafficking and ghost guns you can buy online and make at home—they have no serial numbers and can’t be traced. \\n\\nAnd I ask Congress to pass proven measures to reduce gun violence. Pass universal background checks. Why should anyone on a terrorist list be able to purchase a weapon? \\n\\nBan assault weapons and high-capacity magazines. \\n\\nRepeal the liability shield that makes gun manufacturers the only industry in America that can’t be sued. \\n\\nThese laws don’t infringe on the Second Amendment. They save lives. \\n\\nThe most fundamental right in America is the right to vote – and to have it counted. And it’s under assault. ', lookup_str='', metadata={}, lookup_index=0),\n",
+       " Document(page_content='We have lost so much to COVID-19. Time with one another. And worst of all, so much loss of life. \\n\\nLet’s use this moment to reset. Let’s stop looking at COVID-19 as a partisan dividing line and see it for what it is: A God-awful disease.  \\n\\nLet’s stop seeing each other as enemies, and start seeing each other for who we really are: Fellow Americans.  \\n\\nWe can’t change how divided we’ve been. But we can change how we move forward—on COVID-19 and other issues we must face together. \\n\\nI recently visited the New York City Police Department days after the funerals of Officer Wilbert Mora and his partner, Officer Jason Rivera. \\n\\nThey were responding to a 9-1-1 call when a man shot and killed them with a stolen gun. \\n\\nOfficer Mora was 27 years old. \\n\\nOfficer Rivera was 22. \\n\\nBoth Dominican Americans who’d grown up on the same streets they later chose to patrol as police officers. ', lookup_str='', metadata={}, lookup_index=0),\n",
+       " Document(page_content='A former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since she’s been nominated, she’s received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans. \\n\\nAnd if we are to advance liberty and justice, we need to secure the Border and fix the immigration system. \\n\\nWe can do both. At our border, we’ve installed new technology like cutting-edge scanners to better detect drug smuggling.  \\n\\nWe’ve set up joint patrols with Mexico and Guatemala to catch more human traffickers.  \\n\\nWe’re putting in place dedicated immigration judges so families fleeing persecution and violence can have their cases heard faster. \\n\\nWe’re securing commitments and supporting partners in South and Central America to host more refugees and secure their own borders. ', lookup_str='', metadata={}, lookup_index=0)]"
+      ]
+     },
+     "execution_count": 56,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "docs"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": 6,
--- a/docs/examples/data_augmented_generation/summarize.ipynb
+++ b/docs/examples/data_augmented_generation/summarize.ipynb
@ -21,7 +21,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 23,
   "id": "e9db25f3",
   "metadata": {},
   "outputs": [],
@ -33,12 +33,12 @@
    "llm = OpenAI(temperature=0)\n",
    "\n",
    "\n",
-    "text_splitter = CharacterTextSplitter()\n"
+    "text_splitter = CharacterTextSplitter(chunk_size=1000, separator=\"\\n\")\n"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 24,
   "id": "99bbe19b",
   "metadata": {},
   "outputs": [],
@ -50,7 +50,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 25,
   "id": "baa6e808",
   "metadata": {},
   "outputs": [],
@ -60,17 +60,17 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 26,
   "id": "8dff4f43",
   "metadata": {},
   "outputs": [],
   "source": [
-    "docs = [Document(page_content=t) for t in texts[:3]]"
+    "docs = [Document(page_content=t) for t in texts]"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 6,
   "id": "27989fc4",
   "metadata": {},
   "outputs": [],
@ -131,33 +131,57 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 36,
+   "id": "2851dd26",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class CustomLLM(OpenAI):\n",
+    "    def _generate(\n",
+    "        self, prompts, stop = None\n",
+    "    ):\n",
+    "        for p in prompts:\n",
+    "            if self.get_num_tokens(p) > 600:\n",
+    "                raise ValueError\n",
+    "        return super()._generate(prompts, stop=stop)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 41,
   "id": "ef28e1d4",
   "metadata": {},
   "outputs": [],
   "source": [
+    "llm = CustomLLM(temperature=0, model_name='text-curie-001')\n",
    "chain = load_summarize_chain(llm, chain_type=\"map_reduce\")"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 42,
   "id": "f82c5f9f",
   "metadata": {},
   "outputs": [
    {
-     "data": {
-      "text/plain": [
-       "\" In response to Vladimir Putin's aggression in Ukraine, the US and its allies have taken action to hold him accountable, including economic sanctions, cutting off access to technology, and seizing the assets of Russian oligarchs. They are also providing military, economic, and humanitarian assistance to the Ukrainians, and releasing 60 million barrels of oil from reserves around the world. President Biden has passed several laws to provide economic relief to Americans and create jobs, and is making sure taxpayer dollars support American jobs and businesses.\""
-      ]
-     },
-     "execution_count": 9,
-     "metadata": {},
-     "output_type": "execute_result"
+     "ename": "ValueError",
+     "evalue": "A single document was so long it could not be combined with another document, we cannot handle this.",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[42], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mchain\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrun\u001b[49m\u001b[43m(\u001b[49m\u001b[43minput_documents\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdocs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtoken_max\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m400\u001b[39;49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/workplace/langchain/langchain/chains/base.py:141\u001b[0m, in \u001b[0;36mChain.run\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m    138\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m(args[\u001b[38;5;241m0\u001b[39m])[\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moutput_keys[\u001b[38;5;241m0\u001b[39m]]\n\u001b[1;32m    140\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m kwargs \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m args:\n\u001b[0;32m--> 141\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m[\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moutput_keys[\u001b[38;5;241m0\u001b[39m]]\n\u001b[1;32m    143\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m    144\u001b[0m     \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m`run` supported with either positional arguments or keyword arguments\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    145\u001b[0m     \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m but not both. Got args: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00margs\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m and kwargs: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mkwargs\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    146\u001b[0m )\n",
+      "File \u001b[0;32m~/workplace/langchain/langchain/chains/base.py:112\u001b[0m, in \u001b[0;36mChain.__call__\u001b[0;34m(self, inputs, return_only_outputs)\u001b[0m\n\u001b[1;32m    108\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mverbose:\n\u001b[1;32m    109\u001b[0m     \u001b[38;5;28mprint\u001b[39m(\n\u001b[1;32m    110\u001b[0m         \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\033\u001b[39;00m\u001b[38;5;124m[1m> Entering new \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m chain...\u001b[39m\u001b[38;5;130;01m\\033\u001b[39;00m\u001b[38;5;124m[0m\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    111\u001b[0m     )\n\u001b[0;32m--> 112\u001b[0m outputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call\u001b[49m\u001b[43m(\u001b[49m\u001b[43minputs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    113\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mverbose:\n\u001b[1;32m    114\u001b[0m     \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\033\u001b[39;00m\u001b[38;5;124m[1m> Finished \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m chain.\u001b[39m\u001b[38;5;130;01m\\033\u001b[39;00m\u001b[38;5;124m[0m\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
+      "File \u001b[0;32m~/workplace/langchain/langchain/chains/combine_documents/base.py:49\u001b[0m, in \u001b[0;36mBaseCombineDocumentsChain._call\u001b[0;34m(self, inputs)\u001b[0m\n\u001b[1;32m     47\u001b[0m \u001b[38;5;66;03m# Other keys are assumed to be needed for LLM prediction\u001b[39;00m\n\u001b[1;32m     48\u001b[0m other_keys \u001b[38;5;241m=\u001b[39m {k: v \u001b[38;5;28;01mfor\u001b[39;00m k, v \u001b[38;5;129;01min\u001b[39;00m inputs\u001b[38;5;241m.\u001b[39mitems() \u001b[38;5;28;01mif\u001b[39;00m k \u001b[38;5;241m!=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39minput_key}\n\u001b[0;32m---> 49\u001b[0m output \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcombine_docs\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdocs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mother_keys\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     50\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m {\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moutput_key: output}\n",
+      "File \u001b[0;32m~/workplace/langchain/langchain/chains/combine_documents/map_reduce.py:124\u001b[0m, in \u001b[0;36mMapReduceDocumentsChain.combine_docs\u001b[0;34m(self, docs, token_max, **kwargs)\u001b[0m\n\u001b[1;32m    122\u001b[0m num_tokens \u001b[38;5;241m=\u001b[39m length_func(result_docs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m    123\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m num_tokens \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m num_tokens \u001b[38;5;241m>\u001b[39m token_max:\n\u001b[0;32m--> 124\u001b[0m     new_result_doc_list \u001b[38;5;241m=\u001b[39m \u001b[43m_split_list_of_docs\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    125\u001b[0m \u001b[43m        \u001b[49m\u001b[43mresult_docs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlength_func\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtoken_max\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\n\u001b[1;32m    126\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    127\u001b[0m     result_docs \u001b[38;5;241m=\u001b[39m []\n\u001b[1;32m    128\u001b[0m     \u001b[38;5;28;01mfor\u001b[39;00m docs \u001b[38;5;129;01min\u001b[39;00m new_result_doc_list:\n",
+      "File \u001b[0;32m~/workplace/langchain/langchain/chains/combine_documents/map_reduce.py:29\u001b[0m, in \u001b[0;36m_split_list_of_docs\u001b[0;34m(docs, length_func, token_max, **kwargs)\u001b[0m\n\u001b[1;32m     24\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m     25\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mA single document was longer than the context length,\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m     26\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m we cannot handle this.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m     27\u001b[0m     )\n\u001b[1;32m     28\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(_sub_result_docs) \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m2\u001b[39m:\n\u001b[0;32m---> 29\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m     30\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mA single document was so long it could not be combined \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m     31\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mwith another document, we cannot handle this.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m     32\u001b[0m     )\n\u001b[1;32m     33\u001b[0m new_result_doc_list\u001b[38;5;241m.\u001b[39mappend(_sub_result_docs[:\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m])\n\u001b[1;32m     34\u001b[0m _sub_result_docs \u001b[38;5;241m=\u001b[39m _sub_result_docs[\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m:]\n",
+      "\u001b[0;31mValueError\u001b[0m: A single document was so long it could not be combined with another document, we cannot handle this."
+     ]
    }
   ],
   "source": [
-    "chain.run(docs)"
+    "chain.run(input_documents=docs, token_max=400)"
   ]
  },
  {
--- a/langchain/chains/combine_documents/map_rerank.py
+++ b/langchain/chains/combine_documents/map_rerank.py
@ -0,0 +1,70 @@
+"""Combining documents by mapping a chain over them first, then reranking results."""
+
+from __future__ import annotations
+
+from typing import Any, Callable, Dict, List, Optional
+
+from pydantic import BaseModel, Extra, root_validator
+
+from langchain.chains.combine_documents.base import BaseCombineDocumentsChain
+from langchain.chains.llm import LLMChain
+from langchain.docstore.document import Document
+
+
+
+class MapRerankDocumentsChain(BaseCombineDocumentsChain, BaseModel):
+    """Combining documents by mapping a chain over them, then reranking results."""
+
+    llm_chain: LLMChain
+    """Chain to apply to each document individually."""
+    document_variable_name: str
+    """The variable name in the llm_chain to put the documents in.
+    If only one variable in the llm_chain, this need not be provided."""
+    rank_key: str
+    """Key in output of llm_chain to rank on."""
+    metadata_keys: Optional[List[str]] = None
+
+    class Config:
+        """Configuration for this pydantic object."""
+
+        extra = Extra.forbid
+        arbitrary_types_allowed = True
+
+    @root_validator(pre=True)
+    def get_default_document_variable_name(cls, values: Dict) -> Dict:
+        """Get default document variable name, if not provided."""
+        if "document_variable_name" not in values:
+            llm_chain_variables = values["llm_chain"].prompt.input_variables
+            if len(llm_chain_variables) == 1:
+                values["document_variable_name"] = llm_chain_variables[0]
+            else:
+                raise ValueError(
+                    "document_variable_name must be provided if there are "
+                    "multiple llm_chain input_variables"
+                )
+        else:
+            llm_chain_variables = values["llm_chain"].prompt.input_variables
+            if values["document_variable_name"] not in llm_chain_variables:
+                raise ValueError(
+                    f"document_variable_name {values['document_variable_name']} was "
+                    f"not found in llm_chain input_variables: {llm_chain_variables}"
+                )
+        return values
+
+    def combine_docs(
+        self, docs: List[Document], **kwargs: Any
+    ) -> str:
+        """Combine documents in a map rerank manner.
+
+        Combine by mapping first chain over all documents, then reranking the results.
+        """
+        results = self.llm_chain.apply_and_parse(
+            # FYI - this is parallelized and so it is fast.
+            [{**{self.document_variable_name: d.page_content}, **kwargs} for d in docs]
+        )
+        sorted_res = sorted(zip(results, docs), key=lambda x: -x[0][self.rank_key])
+        output, document = sorted_res[0]
+        if self.metadata_keys is not None:
+            for key in self.metadata_keys:
+                output[key] = document.metadata[key]
+        return output
--- a/langchain/chains/question_answering/map_rerank_prompt.py
+++ b/langchain/chains/question_answering/map_rerank_prompt.py
@ -0,0 +1,42 @@
+from langchain.prompts import PromptTemplate
+from langchain.prompts.base import BaseOutputParser
+import re
+
+template = """Use the following document to answer the given question. In addition to providing an answer, please also give your answer a score from 0-100 in terms of how good it is (higher is better). 
+
+What decides the score? A good score is factually accurate, and FULLY answers the question in a way the user would find helpful. If the document does not contain the answer, the score should be 0. You should only give a score of 100 if you are absolutely positive this is the best answer. Keep in mind that you will also be answering this question with other documents, so one of them could have a better answer.
+
+Use the following format:
+
+Document:
+---------------
+Document text here
+---------------
+Question: Question here
+Answer: Answer here
+Score: Score (between 0 and 100) here
+
+Begin!
+
+Document:
+---------------
+{context}
+---------------
+Question: {question}
+Answer:"""
+
+
+class ScoreOutputParser(BaseOutputParser):
+
+    def parse(self, text: str):
+        regex = r"(.*?)\nScore: (.*)"
+        match = re.search(regex, text)
+        if match:
+            question = match.group(1)
+            answer = match.group(2)
+            return {"answer": question, "score": int(answer)}
+        else:
+            raise ValueError(f"Could not parse output: {text}")
+
+
+prompt = PromptTemplate(template=template, input_variables=['context', 'question'], output_parser=ScoreOutputParser())