docs: use trim_messages in chatbot how to (#23139)

pull/23146/head
Bagatur 4 months ago committed by GitHub
parent b483bf5095
commit cf38981bb7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -71,13 +71,13 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from langchain_openai import ChatOpenAI\n",
"\n",
"chat = ChatOpenAI(model=\"gpt-3.5-turbo-1106\")"
"chat = ChatOpenAI(model=\"gpt-3.5-turbo-0125\")"
]
},
{
@ -95,19 +95,15 @@
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"AIMessage(content='I said \"J\\'adore la programmation,\" which means \"I love programming\" in French.')"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
"name": "stdout",
"output_type": "stream",
"text": [
"I said \"J'adore la programmation,\" which means \"I love programming\" in French.\n"
]
}
],
"source": [
"from langchain_core.messages import AIMessage, HumanMessage\n",
"from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder\n",
"from langchain_core.prompts import ChatPromptTemplate\n",
"\n",
"prompt = ChatPromptTemplate.from_messages(\n",
" [\n",
@ -115,23 +111,25 @@
" \"system\",\n",
" \"You are a helpful assistant. Answer all questions to the best of your ability.\",\n",
" ),\n",
" MessagesPlaceholder(variable_name=\"messages\"),\n",
" (\"placeholder\", \"{messages}\"),\n",
" ]\n",
")\n",
"\n",
"chain = prompt | chat\n",
"\n",
"chain.invoke(\n",
"ai_msg = chain.invoke(\n",
" {\n",
" \"messages\": [\n",
" HumanMessage(\n",
" content=\"Translate this sentence from English to French: I love programming.\"\n",
" (\n",
" \"human\",\n",
" \"Translate this sentence from English to French: I love programming.\",\n",
" ),\n",
" AIMessage(content=\"J'adore la programmation.\"),\n",
" HumanMessage(content=\"What did you just say?\"),\n",
" (\"ai\", \"J'adore la programmation.\"),\n",
" (\"human\", \"What did you just say?\"),\n",
" ],\n",
" }\n",
")"
")\n",
"print(ai_msg.content)"
]
},
{
@ -193,7 +191,7 @@
{
"data": {
"text/plain": [
"AIMessage(content='You asked me to translate the sentence \"I love programming\" from English to French.')"
"AIMessage(content='You just asked me to translate the sentence \"I love programming\" from English to French.', response_metadata={'token_usage': {'completion_tokens': 18, 'prompt_tokens': 61, 'total_tokens': 79}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-5cbb21c2-9c30-4031-8ea8-bfc497989535-0', usage_metadata={'input_tokens': 61, 'output_tokens': 18, 'total_tokens': 79})"
]
},
"execution_count": 5,
@ -250,7 +248,7 @@
" \"system\",\n",
" \"You are a helpful assistant. Answer all questions to the best of your ability.\",\n",
" ),\n",
" MessagesPlaceholder(variable_name=\"chat_history\"),\n",
" (\"placeholder\", \"{chat_history}\"),\n",
" (\"human\", \"{input}\"),\n",
" ]\n",
")\n",
@ -304,10 +302,17 @@
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Parent run dc4e2f79-4bcd-4a36-9506-55ace9040588 not found for run 34b5773e-3ced-46a6-8daf-4d464c15c940. Treating as a root run.\n"
]
},
{
"data": {
"text/plain": [
"AIMessage(content='The translation of \"I love programming\" in French is \"J\\'adore la programmation.\"')"
"AIMessage(content='\"J\\'adore la programmation.\"', response_metadata={'token_usage': {'completion_tokens': 9, 'prompt_tokens': 39, 'total_tokens': 48}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-648b0822-b0bb-47a2-8e7d-7d34744be8f2-0', usage_metadata={'input_tokens': 39, 'output_tokens': 9, 'total_tokens': 48})"
]
},
"execution_count": 8,
@ -327,10 +332,17 @@
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Parent run cc14b9d8-c59e-40db-a523-d6ab3fc2fa4f not found for run 5b75e25c-131e-46ee-9982-68569db04330. Treating as a root run.\n"
]
},
{
"data": {
"text/plain": [
"AIMessage(content='You just asked me to translate the sentence \"I love programming\" from English to French.')"
"AIMessage(content='You asked me to translate the sentence \"I love programming\" from English to French.', response_metadata={'token_usage': {'completion_tokens': 17, 'prompt_tokens': 63, 'total_tokens': 80}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-5950435c-1dc2-43a6-836f-f989fd62c95e-0', usage_metadata={'input_tokens': 63, 'output_tokens': 17, 'total_tokens': 80})"
]
},
"execution_count": 9,
@ -354,12 +366,12 @@
"\n",
"### Trimming messages\n",
"\n",
"LLMs and chat models have limited context windows, and even if you're not directly hitting limits, you may want to limit the amount of distraction the model has to deal with. One solution is to only load and store the most recent `n` messages. Let's use an example history with some preloaded messages:"
"LLMs and chat models have limited context windows, and even if you're not directly hitting limits, you may want to limit the amount of distraction the model has to deal with. One solution is trim the historic messages before passing them to the model. Let's use an example history with some preloaded messages:"
]
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 21,
"metadata": {},
"outputs": [
{
@ -371,7 +383,7 @@
" AIMessage(content='Fine thanks!')]"
]
},
"execution_count": 10,
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
@ -396,34 +408,28 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 22,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Parent run 7ff2d8ec-65e2-4f67-8961-e498e2c4a591 not found for run 3881e990-6596-4326-84f6-2b76949e0657. Treating as a root run.\n"
]
},
{
"data": {
"text/plain": [
"AIMessage(content='Your name is Nemo.')"
"AIMessage(content='Your name is Nemo.', response_metadata={'token_usage': {'completion_tokens': 6, 'prompt_tokens': 66, 'total_tokens': 72}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-f8aabef8-631a-4238-a39b-701e881fbe47-0', usage_metadata={'input_tokens': 66, 'output_tokens': 6, 'total_tokens': 72})"
]
},
"execution_count": 11,
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"prompt = ChatPromptTemplate.from_messages(\n",
" [\n",
" (\n",
" \"system\",\n",
" \"You are a helpful assistant. Answer all questions to the best of your ability.\",\n",
" ),\n",
" MessagesPlaceholder(variable_name=\"chat_history\"),\n",
" (\"human\", \"{input}\"),\n",
" ]\n",
")\n",
"\n",
"chain = prompt | chat\n",
"\n",
"chain_with_message_history = RunnableWithMessageHistory(\n",
" chain,\n",
" lambda session_id: demo_ephemeral_chat_history,\n",
@ -443,34 +449,33 @@
"source": [
"We can see the chain remembers the preloaded name.\n",
"\n",
"But let's say we have a very small context window, and we want to trim the number of messages passed to the chain to only the 2 most recent ones. We can use the `clear` method to remove messages and re-add them to the history. We don't have to, but let's put this method at the front of our chain to ensure it's always called:"
"But let's say we have a very small context window, and we want to trim the number of messages passed to the chain to only the 2 most recent ones. We can use the built in [trim_messages](/docs/how_to/trim_messages/) util to trim messages based on their token count before they reach our prompt. In this case we'll count each message as 1 \"token\" and keep only the last two messages:"
]
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
"from langchain_core.runnables import RunnablePassthrough\n",
"\n",
"\n",
"def trim_messages(chain_input):\n",
" stored_messages = demo_ephemeral_chat_history.messages\n",
" if len(stored_messages) <= 2:\n",
" return False\n",
"from operator import itemgetter\n",
"\n",
" demo_ephemeral_chat_history.clear()\n",
"\n",
" for message in stored_messages[-2:]:\n",
" demo_ephemeral_chat_history.add_message(message)\n",
"\n",
" return True\n",
"from langchain_core.messages import trim_messages\n",
"from langchain_core.runnables import RunnablePassthrough\n",
"\n",
"trimmer = trim_messages(strategy=\"last\", max_tokens=2, token_counter=len)\n",
"\n",
"chain_with_trimming = (\n",
" RunnablePassthrough.assign(messages_trimmed=trim_messages)\n",
" | chain_with_message_history\n",
" RunnablePassthrough.assign(chat_history=itemgetter(\"chat_history\") | trimmer)\n",
" | prompt\n",
" | chat\n",
")\n",
"\n",
"chain_with_trimmed_history = RunnableWithMessageHistory(\n",
" chain_with_trimming,\n",
" lambda session_id: demo_ephemeral_chat_history,\n",
" input_messages_key=\"input\",\n",
" history_messages_key=\"chat_history\",\n",
")"
]
},
@ -483,22 +488,29 @@
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": 24,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Parent run 775cde65-8d22-4c44-80bb-f0b9811c32ca not found for run 5cf71d0e-4663-41cd-8dbe-e9752689cfac. Treating as a root run.\n"
]
},
{
"data": {
"text/plain": [
"AIMessage(content=\"P. Sherman's address is 42 Wallaby Way, Sydney.\")"
"AIMessage(content='P. Sherman is a fictional character from the animated movie \"Finding Nemo\" who lives at 42 Wallaby Way, Sydney.', response_metadata={'token_usage': {'completion_tokens': 27, 'prompt_tokens': 53, 'total_tokens': 80}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-5642ef3a-fdbe-43cf-a575-d1785976a1b9-0', usage_metadata={'input_tokens': 53, 'output_tokens': 27, 'total_tokens': 80})"
]
},
"execution_count": 13,
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"chain_with_trimming.invoke(\n",
"chain_with_trimmed_history.invoke(\n",
" {\"input\": \"Where does P. Sherman live?\"},\n",
" {\"configurable\": {\"session_id\": \"unused\"}},\n",
")"
@ -506,19 +518,23 @@
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": 25,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[HumanMessage(content=\"What's my name?\"),\n",
" AIMessage(content='Your name is Nemo.'),\n",
"[HumanMessage(content=\"Hey there! I'm Nemo.\"),\n",
" AIMessage(content='Hello!'),\n",
" HumanMessage(content='How are you today?'),\n",
" AIMessage(content='Fine thanks!'),\n",
" HumanMessage(content=\"What's my name?\"),\n",
" AIMessage(content='Your name is Nemo.', response_metadata={'token_usage': {'completion_tokens': 6, 'prompt_tokens': 66, 'total_tokens': 72}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-f8aabef8-631a-4238-a39b-701e881fbe47-0', usage_metadata={'input_tokens': 66, 'output_tokens': 6, 'total_tokens': 72}),\n",
" HumanMessage(content='Where does P. Sherman live?'),\n",
" AIMessage(content=\"P. Sherman's address is 42 Wallaby Way, Sydney.\")]"
" AIMessage(content='P. Sherman is a fictional character from the animated movie \"Finding Nemo\" who lives at 42 Wallaby Way, Sydney.', response_metadata={'token_usage': {'completion_tokens': 27, 'prompt_tokens': 53, 'total_tokens': 80}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-5642ef3a-fdbe-43cf-a575-d1785976a1b9-0', usage_metadata={'input_tokens': 53, 'output_tokens': 27, 'total_tokens': 80})]"
]
},
"execution_count": 14,
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
@ -536,48 +552,39 @@
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": 27,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Parent run fde7123f-6fd3-421a-a3fc-2fb37dead119 not found for run 061a4563-2394-470d-a3ed-9bf1388ca431. Treating as a root run.\n"
]
},
{
"data": {
"text/plain": [
"AIMessage(content=\"I'm sorry, I don't have access to your personal information.\")"
"AIMessage(content=\"I'm sorry, but I don't have access to your personal information, so I don't know your name. How else may I assist you today?\", response_metadata={'token_usage': {'completion_tokens': 31, 'prompt_tokens': 74, 'total_tokens': 105}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-0ab03495-1f7c-4151-9070-56d2d1c565ff-0', usage_metadata={'input_tokens': 74, 'output_tokens': 31, 'total_tokens': 105})"
]
},
"execution_count": 15,
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"chain_with_trimming.invoke(\n",
"chain_with_trimmed_history.invoke(\n",
" {\"input\": \"What is my name?\"},\n",
" {\"configurable\": {\"session_id\": \"unused\"}},\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 16,
"cell_type": "markdown",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[HumanMessage(content='Where does P. Sherman live?'),\n",
" AIMessage(content=\"P. Sherman's address is 42 Wallaby Way, Sydney.\"),\n",
" HumanMessage(content='What is my name?'),\n",
" AIMessage(content=\"I'm sorry, I don't have access to your personal information.\")]"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"demo_ephemeral_chat_history.messages"
"Check out our [how to guide on trimming messages](/docs/how_to/trim_messages/) for more."
]
},
{
@ -638,7 +645,7 @@
" \"system\",\n",
" \"You are a helpful assistant. Answer all questions to the best of your ability. The provided chat history includes facts about the user you are speaking with.\",\n",
" ),\n",
" MessagesPlaceholder(variable_name=\"chat_history\"),\n",
" (\"placeholder\", \"{chat_history}\"),\n",
" (\"user\", \"{input}\"),\n",
" ]\n",
")\n",
@ -672,7 +679,7 @@
" return False\n",
" summarization_prompt = ChatPromptTemplate.from_messages(\n",
" [\n",
" MessagesPlaceholder(variable_name=\"chat_history\"),\n",
" (\"placeholder\", \"{chat_history}\"),\n",
" (\n",
" \"user\",\n",
" \"Distill the above chat messages into a single summary message. Include as many specific details as you can.\",\n",
@ -772,9 +779,9 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.1"
"version": "3.11.9"
}
},
"nbformat": 4,
"nbformat_minor": 2
"nbformat_minor": 4
}

Loading…
Cancel
Save