docs: add trim_messages to chatbot (#23061)

4 months ago · ddfbca38df
parent 931b41b30f
commit ddfbca38df
2 changed files with 196 additions and 99 deletions
--- a/docs/docs/how_to/index.mdx
+++ b/docs/docs/how_to/index.mdx
@ -259,6 +259,7 @@ For a high-level tutorial on building chatbots, check out [this guide](/docs/tut
 - [How to: manage memory](/docs/how_to/chatbots_memory)
 - [How to: do retrieval](/docs/how_to/chatbots_retrieval)
 - [How to: use tools](/docs/how_to/chatbots_tools)
+- [How to: manage large chat history](/docs/how_to/trim_messages/)

 ### Query analysis

--- a/docs/docs/tutorials/chatbot.ipynb
+++ b/docs/docs/tutorials/chatbot.ipynb
@ -145,7 +145,7 @@
    {
     "data": {
      "text/plain": [
-       "AIMessage(content='Hello Bob! How can I assist you today?', response_metadata={'token_usage': {'completion_tokens': 10, 'prompt_tokens': 12, 'total_tokens': 22}, 'model_name': 'gpt-3.5-turbo', 'system_fingerprint': 'fp_c2295e73ad', 'finish_reason': 'stop', 'logprobs': None}, id='run-be38de4a-ccef-4a48-bf82-4292510a8cbf-0')"
+       "AIMessage(content='Hello Bob! How can I assist you today?', response_metadata={'token_usage': {'completion_tokens': 10, 'prompt_tokens': 12, 'total_tokens': 22}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-8ecc8a9f-8b32-49ad-8e41-5caa26282f76-0', usage_metadata={'input_tokens': 12, 'output_tokens': 10, 'total_tokens': 22})"
      ]
     },
     "execution_count": 2,
@ -174,7 +174,7 @@
    {
     "data": {
      "text/plain": [
-       "AIMessage(content=\"I'm sorry, as an AI assistant, I do not have the capability to know your name unless you provide it to me.\", response_metadata={'token_usage': {'completion_tokens': 26, 'prompt_tokens': 12, 'total_tokens': 38}, 'model_name': 'gpt-3.5-turbo', 'system_fingerprint': 'fp_caf95bb1ae', 'finish_reason': 'stop', 'logprobs': None}, id='run-8d8a9d8b-dddb-48f1-b0ed-ce80ce5397d8-0')"
+       "AIMessage(content=\"I'm sorry, I don't have access to that information.\", response_metadata={'token_usage': {'completion_tokens': 13, 'prompt_tokens': 12, 'total_tokens': 25}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-4e0066e8-0dcc-4aea-b4f9-b9029c81724f-0', usage_metadata={'input_tokens': 12, 'output_tokens': 13, 'total_tokens': 25})"
      ]
     },
     "execution_count": 3,
@ -206,7 +206,7 @@
    {
     "data": {
      "text/plain": [
-       "AIMessage(content='Your name is Bob.', response_metadata={'token_usage': {'completion_tokens': 5, 'prompt_tokens': 35, 'total_tokens': 40}, 'model_name': 'gpt-3.5-turbo', 'system_fingerprint': 'fp_c2295e73ad', 'finish_reason': 'stop', 'logprobs': None}, id='run-5692718a-5d29-4f84-bad1-a9819a6118f1-0')"
+       "AIMessage(content='Your name is Bob. How can I assist you today, Bob?', response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 35, 'total_tokens': 49}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-c377d868-1bfe-491a-82fb-1f9122939796-0', usage_metadata={'input_tokens': 35, 'output_tokens': 14, 'total_tokens': 49})"
      ]
     },
     "execution_count": 4,
@ -268,7 +268,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
@ -297,7 +297,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
@ -306,16 +306,23 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 8,
   "metadata": {},
   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Parent run 9bdaa45d-604e-4891-9b0a-28754985f10b not found for run 271bd46a-f980-407a-af8a-9399420bce8d. Treating as a root run.\n"
+     ]
+    },
    {
     "data": {
      "text/plain": [
       "'Hello Bob! How can I assist you today?'"
      ]
     },
-     "execution_count": 15,
+     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
@ -331,16 +338,23 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 9,
   "metadata": {},
   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Parent run 16482292-535c-449d-8a9d-d0fccf5112eb not found for run 7f2e501a-d5b4-4d8c-924b-aae9eb9d7267. Treating as a root run.\n"
+     ]
+    },
    {
     "data": {
      "text/plain": [
-       "'Your name is Bob.'"
+       "'Your name is Bob. How can I assist you today, Bob?'"
      ]
     },
-     "execution_count": 16,
+     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
@ -363,16 +377,23 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 10,
   "metadata": {},
   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Parent run c14d7130-04c5-445f-9e22-442f7c7e8f07 not found for run 946beadc-5cf1-468f-bac4-ca5ddc10ea73. Treating as a root run.\n"
+     ]
+    },
    {
     "data": {
      "text/plain": [
-       "\"I'm sorry, I do not have the ability to know your name unless you tell me.\""
+       "\"I'm sorry, I don't know your name as you have not provided it.\""
      ]
     },
-     "execution_count": 17,
+     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
@ -397,16 +418,23 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 11,
   "metadata": {},
   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Parent run 4f61611c-3875-4b2d-9f89-af452866d55a not found for run 066a30b1-bbb0-4fee-a035-7fdb41c28d91. Treating as a root run.\n"
+     ]
+    },
    {
     "data": {
      "text/plain": [
-       "'Your name is Bob.'"
+       "'Your name is Bob. How can I assist you today, Bob?'"
      ]
     },
-     "execution_count": 18,
+     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
@ -444,7 +472,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
@ -472,16 +500,16 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
-       "'Hello, Bob! How can I assist you today?'"
+       "'Hello Bob! How can I assist you today?'"
      ]
     },
-     "execution_count": 21,
+     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
@ -501,7 +529,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
@ -510,7 +538,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
@ -519,16 +547,23 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": 16,
   "metadata": {},
   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Parent run 51e624b3-19fd-435f-b580-2a3e4f2d0dc9 not found for run b411f007-b2ad-48c3-968c-aa5ecbb58aea. Treating as a root run.\n"
+     ]
+    },
    {
     "data": {
      "text/plain": [
-       "'Hello, Jim! How can I assist you today?'"
+       "'Hello Jim! How can I assist you today?'"
      ]
     },
-     "execution_count": 24,
+     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
@ -544,16 +579,23 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 25,
+   "execution_count": 17,
   "metadata": {},
   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Parent run a30b22cd-698f-48a1-94a0-1a172242e292 not found for run 52b0b60d-5d2a-4610-a572-037602792ad6. Treating as a root run.\n"
+     ]
+    },
    {
     "data": {
      "text/plain": [
-       "'Your name is Jim. How can I assist you further, Jim?'"
+       "'Your name is Jim.'"
      ]
     },
-     "execution_count": 25,
+     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
@ -576,7 +618,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
@ -602,16 +644,16 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 28,
+   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
-       "'¡Hola Bob! ¿En qué puedo ayudarte hoy?'"
+       "'¡Hola, Bob! ¿En qué puedo ayudarte hoy?'"
      ]
     },
-     "execution_count": 28,
+     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
@ -633,7 +675,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
@ -646,7 +688,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 33,
+   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
@ -655,16 +697,23 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 34,
+   "execution_count": 22,
   "metadata": {},
   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Parent run d02b7778-4a91-4831-ace9-b33bb456dc90 not found for run ee0a20dd-5b9e-4862-b3c9-8e2e72b8eb82. Treating as a root run.\n"
+     ]
+    },
    {
     "data": {
      "text/plain": [
       "'¡Hola Todd! ¿En qué puedo ayudarte hoy?'"
      ]
     },
-     "execution_count": 34,
+     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
@ -680,16 +729,23 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 35,
+   "execution_count": 23,
   "metadata": {},
   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Parent run 12422d4c-6494-490e-845e-08dcc1c6a4b9 not found for run a82eb759-f51d-4488-871b-6e2d601b4128. Treating as a root run.\n"
+     ]
+    },
    {
     "data": {
      "text/plain": [
-       "'Tu nombre es Todd. ¿Hay algo más en lo que pueda ayudarte?'"
+       "'Tu nombre es Todd.'"
      ]
     },
-     "execution_count": 35,
+     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
@ -720,43 +776,47 @@
    "\n",
    "**Importantly, you will want to do this BEFORE the prompt template but AFTER you load previous messages from Message History.**\n",
    "\n",
-    "We can do this by adding a simple step in front of the prompt that modifies the `messages` key appropriately, and then wrap that new chain in the Message History class. First, let's define a function that will modify the messages passed in. Let's make it so that it selects the `k` most recent messages. We can then create a new chain by adding that at the start."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 42,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from langchain_core.runnables import RunnablePassthrough\n",
-    "\n",
-    "\n",
-    "def filter_messages(messages, k=10):\n",
-    "    return messages[-k:]\n",
+    "We can do this by adding a simple step in front of the prompt that modifies the `messages` key appropriately, and then wrap that new chain in the Message History class. \n",
    "\n",
-    "\n",
-    "chain = (\n",
-    "    RunnablePassthrough.assign(messages=lambda x: filter_messages(x[\"messages\"]))\n",
-    "    | prompt\n",
-    "    | model\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Let's now try it out! If we create a list of messages more than 10 messages long, we can see what it no longer remembers information in the early messages."
+    "LangChain comes with a few built-in helpers for [managing a list of messages](/docs/how_to/#messages). In this case we'll use the [trim_messages](/docs/how_to/trim_messages/) helper to reduce how many messages we're sending to the model. The trimmer allows us to specify how many tokens we want to keep, along with other parameters like if we want to always keep the system message and whether to allow partial messages:"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 43,
+   "execution_count": 34,
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[SystemMessage(content=\"you're a good assistant\"),\n",
+       " HumanMessage(content='whats 2 + 2'),\n",
+       " AIMessage(content='4'),\n",
+       " HumanMessage(content='thanks'),\n",
+       " AIMessage(content='no problem!'),\n",
+       " HumanMessage(content='having fun?'),\n",
+       " AIMessage(content='yes!')]"
+      ]
+     },
+     "execution_count": 34,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
   "source": [
+    "from langchain_core.messages import SystemMessage, trim_messages\n",
+    "\n",
+    "trimmer = trim_messages(\n",
+    "    max_tokens=65,\n",
+    "    strategy=\"last\",\n",
+    "    token_counter=model,\n",
+    "    include_system=True,\n",
+    "    allow_partial=False,\n",
+    "    start_on=\"human\",\n",
+    ")\n",
+    "\n",
    "messages = [\n",
+    "    SystemMessage(content=\"you're a good assistant\"),\n",
    "    HumanMessage(content=\"hi! I'm bob\"),\n",
    "    AIMessage(content=\"hi!\"),\n",
    "    HumanMessage(content=\"I like vanilla ice cream\"),\n",
@ -767,26 +827,47 @@
    "    AIMessage(content=\"no problem!\"),\n",
    "    HumanMessage(content=\"having fun?\"),\n",
    "    AIMessage(content=\"yes!\"),\n",
-    "]"
+    "]\n",
+    "\n",
+    "trimmer.invoke(messages)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "To  use it in our chain, we just need to run the trimmer before we pass the `messages` input to our prompt. \n",
+    "\n",
+    "Now if we try asking the model our name, it won't know it since we trimmed that part of the chat history:"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 47,
+   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
-       "\"I'm sorry, I don’t have access to your name. Can I help you with anything else?\""
+       "\"I'm sorry, I don't have access to personal information. How can I assist you today?\""
      ]
     },
-     "execution_count": 47,
+     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
+    "from operator import itemgetter\n",
+    "\n",
+    "from langchain_core.runnables import RunnablePassthrough\n",
+    "\n",
+    "chain = (\n",
+    "    RunnablePassthrough.assign(messages=itemgetter(\"messages\") | trimmer)\n",
+    "    | prompt\n",
+    "    | model\n",
+    ")\n",
+    "\n",
    "response = chain.invoke(\n",
    "    {\n",
    "        \"messages\": messages + [HumanMessage(content=\"what's my name?\")],\n",
@ -800,21 +881,21 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "But if we ask about information that is within the last ten messages, it still remembers it"
+    "But if we ask about information that is within the last few messages, it remembers:"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 48,
+   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
-       "'You mentioned that you like vanilla ice cream.'"
+       "'You asked \"what\\'s 2 + 2?\"'"
      ]
     },
-     "execution_count": 48,
+     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
@ -822,7 +903,7 @@
   "source": [
    "response = chain.invoke(\n",
    "    {\n",
-    "        \"messages\": messages + [HumanMessage(content=\"what's my fav ice cream\")],\n",
+    "        \"messages\": messages + [HumanMessage(content=\"what math problem did i ask\")],\n",
    "        \"language\": \"English\",\n",
    "    }\n",
    ")\n",
@ -838,7 +919,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 55,
+   "execution_count": 37,
   "metadata": {},
   "outputs": [],
   "source": [
@ -853,16 +934,23 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 57,
+   "execution_count": 38,
   "metadata": {},
   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Parent run e1bb2af3-192b-4bd1-8734-6d2dff1d80b6 not found for run 0c734998-cf16-4708-8658-043a6c7b4a91. Treating as a root run.\n"
+     ]
+    },
    {
     "data": {
      "text/plain": [
-       "\"I'm sorry, I don't know your name.\""
+       "\"I'm sorry, I don't have access to your name. How can I assist you today?\""
      ]
     },
-     "execution_count": 57,
+     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
@ -883,21 +971,28 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "There's now two new messages in the chat history. This means that even more information that used to be accessible in our conversation history is no longer available!"
+    "As expected, the first message where we stated our name has been trimmed. Plus there's now two new messages in the chat history (our latest question and the latest response). This means that even more information that used to be accessible in our conversation history is no longer available! In this case our initial math question has been trimmed from the history as well, so the model no longer knows about it:"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 58,
+   "execution_count": 39,
   "metadata": {},
   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Parent run 181a1f04-9176-4837-80e8-ce74866775a2 not found for run ad402c5a-8341-4c62-ac58-cdf923b3b9ec. Treating as a root run.\n"
+     ]
+    },
    {
     "data": {
      "text/plain": [
-       "\"I'm sorry, I don't know your favorite ice cream flavor.\""
+       "\"You haven't asked a math problem yet. Feel free to ask any math question you have, and I'll do my best to help you with it.\""
      ]
     },
-     "execution_count": 58,
+     "execution_count": 39,
     "metadata": {},
     "output_type": "execute_result"
    }
@ -905,7 +1000,7 @@
   "source": [
    "response = with_message_history.invoke(\n",
    "    {\n",
-    "        \"messages\": [HumanMessage(content=\"whats my favorite ice cream?\")],\n",
+    "        \"messages\": [HumanMessage(content=\"what math problem did i ask?\")],\n",
    "        \"language\": \"English\",\n",
    "    },\n",
    "    config=config,\n",
@ -918,7 +1013,7 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "If you take a look at LangSmith, you can see exactly what is happening under the hood in the [LangSmith trace](https://smith.langchain.com/public/fa6b00da-bcd8-4c1c-a799-6b32a3d62964/r)"
+    "If you take a look at LangSmith, you can see exactly what is happening under the hood in the [LangSmith trace](https://smith.langchain.com/public/a64b8b7c-1fd6-4dbb-b11a-47cd09a5e4f1/r)."
   ]
  },
  {
@ -936,9 +1031,16 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 40,
+   "execution_count": 31,
   "metadata": {},
   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Parent run e0ee52b6-1261-4f2d-98ca-f78c9019684b not found for run 0f6d7995-c32c-4bdb-b7a6-b3d932c13389. Treating as a root run.\n"
+     ]
+    },
    {
     "name": "stdout",
     "output_type": "stream",
@ -977,22 +1079,16 @@
    "If you want to dive deeper on specifics, some things worth checking out are:\n",
    "\n",
    "- [Streaming](/docs/how_to/streaming): streaming is *crucial* for chat applications\n",
-    "- [How to add message history](/docs/how_to/message_history): for a deeper dive into all things related to message history"
+    "- [How to add message history](/docs/how_to/message_history): for a deeper dive into all things related to message history\n",
+    "- [How to manage large message history](/docs/how_to/trim_messages/): more techniques for managing a large chat history"
   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "poetry-venv-2",
   "language": "python",
-   "name": "python3"
+   "name": "poetry-venv-2"
  },
  "language_info": {
   "codemirror_mode": {
@ -1004,7 +1100,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.11.1"
+   "version": "3.11.9"
  }
 },
 "nbformat": 4,