updates token counting function to work with gpt-4

pull/254/head
Ted Sanders 1 year ago
parent 0e3ec04447
commit a101157740

@ -66,9 +66,24 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 1,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: tiktoken in /Users/ted/.virtualenvs/openai/lib/python3.9/site-packages (0.3.2)\n",
"Requirement already satisfied: regex>=2022.1.18 in /Users/ted/.virtualenvs/openai/lib/python3.9/site-packages (from tiktoken) (2022.10.31)\n",
"Requirement already satisfied: requests>=2.26.0 in /Users/ted/.virtualenvs/openai/lib/python3.9/site-packages (from tiktoken) (2.28.2)\n",
"Requirement already satisfied: idna<4,>=2.5 in /Users/ted/.virtualenvs/openai/lib/python3.9/site-packages (from requests>=2.26.0->tiktoken) (3.3)\n",
"Requirement already satisfied: charset-normalizer<4,>=2 in /Users/ted/.virtualenvs/openai/lib/python3.9/site-packages (from requests>=2.26.0->tiktoken) (2.0.9)\n",
"Requirement already satisfied: certifi>=2017.4.17 in /Users/ted/.virtualenvs/openai/lib/python3.9/site-packages (from requests>=2.26.0->tiktoken) (2021.10.8)\n",
"Requirement already satisfied: urllib3<1.27,>=1.21.1 in /Users/ted/.virtualenvs/openai/lib/python3.9/site-packages (from requests>=2.26.0->tiktoken) (1.26.7)\n",
"Note: you may need to restart the kernel to use updated packages.\n"
]
}
],
"source": [
"%pip install --upgrade tiktoken"
]
@ -416,11 +431,11 @@
"source": [
"## 6. Counting tokens for chat API calls\n",
"\n",
"ChatGPT models like `gpt-3.5-turbo` use tokens in the same way as other models, but because of their message-based formatting, it's more difficult to count how many tokens will be used by a conversation.\n",
"ChatGPT models like `gpt-3.5-turbo` use tokens in the same way as past completions models, but because of their message-based formatting, it's more difficult to count how many tokens will be used by a conversation.\n",
"\n",
"Below is an example function for counting tokens for messages passed to `gpt-3.5-turbo-0301`.\n",
"Below is an example function for counting tokens for messages passed to `gpt-3.5-turbo-0301` or `gpt-4-0314`.\n",
"\n",
"The exact way that messages are converted into tokens may change from model to model. So when future model versions are released, the answers returned by this function may be only approximate. The [ChatML documentation](https://github.com/openai/openai-python/blob/main/chatml.md) explains how messages are converted into tokens by the OpenAI API, and may be useful for writing your own function."
"Note that the exact way that messages are converted into tokens may change from model to model. So when future model versions are released, the answers returned by this function may be only approximate. The [ChatML documentation](https://github.com/openai/openai-python/blob/main/chatml.md) explains in more detail how the OpenAI API converts messages into tokens."
]
},
{
@ -434,91 +449,103 @@
" try:\n",
" encoding = tiktoken.encoding_for_model(model)\n",
" except KeyError:\n",
" print(\"Warning: model not found. Using cl100k_base encoding.\")\n",
" encoding = tiktoken.get_encoding(\"cl100k_base\")\n",
" if model == \"gpt-3.5-turbo-0301\": # note: future models may deviate from this\n",
" num_tokens = 0\n",
" for message in messages:\n",
" num_tokens += 4 # every message follows <im_start>{role/name}\\n{content}<im_end>\\n\n",
" for key, value in message.items():\n",
" num_tokens += len(encoding.encode(value))\n",
" if key == \"name\": # if there's a name, the role is omitted\n",
" num_tokens += -1 # role is always required and always 1 token\n",
" num_tokens += 2 # every reply is primed with <im_start>assistant\n",
" return num_tokens\n",
" if model == \"gpt-3.5-turbo\":\n",
" print(\"Warning: gpt-3.5-turbo may change over time. Returning num tokens assuming gpt-3.5-turbo-0301.\")\n",
" return num_tokens_from_messages(messages, model=\"gpt-3.5-turbo-0301\")\n",
" elif model == \"gpt-4\":\n",
" print(\"Warning: gpt-4 may change over time. Returning num tokens assuming gpt-4-0314.\")\n",
" return num_tokens_from_messages(messages, model=\"gpt-4-0314\")\n",
" elif model == \"gpt-3.5-turbo-0301\":\n",
" tokens_per_message = 4 # every message follows <im_start>{role/name}\\n{content}<im_end>\\n\n",
" tokens_per_name = -1 # if there's a name, the role is omitted\n",
" elif model == \"gpt-4-0314\":\n",
" tokens_per_message = 3\n",
" tokens_per_name = 1\n",
" else:\n",
" raise NotImplementedError(f\"\"\"num_tokens_from_messages() is not presently implemented for model {model}.\n",
"See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens.\"\"\")\n"
" raise NotImplementedError(f\"\"\"num_tokens_from_messages() is not implemented for model {model}. See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens.\"\"\")\n",
" num_tokens = 0\n",
" for message in messages:\n",
" num_tokens += tokens_per_message\n",
" for key, value in message.items():\n",
" num_tokens += len(encoding.encode(value))\n",
" if key == \"name\":\n",
" num_tokens += tokens_per_name\n",
" num_tokens += 2 # every reply is primed with <im_start>assistant\n",
" return num_tokens\n"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"messages = [\n",
" {\"role\": \"system\", \"content\": \"You are a helpful, pattern-following assistant that translates corporate jargon into plain English.\"},\n",
" {\"role\": \"system\", \"name\":\"example_user\", \"content\": \"New synergies will help drive top-line growth.\"},\n",
" {\"role\": \"system\", \"name\": \"example_assistant\", \"content\": \"Things working well together will increase revenue.\"},\n",
" {\"role\": \"system\", \"name\":\"example_user\", \"content\": \"Let's circle back when we have more bandwidth to touch base on opportunities for increased leverage.\"},\n",
" {\"role\": \"system\", \"name\": \"example_assistant\", \"content\": \"Let's talk later when we're less busy about how to do better.\"},\n",
" {\"role\": \"user\", \"content\": \"This late pivot means we don't have time to boil the ocean for the client deliverable.\"},\n",
"]\n"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"126 prompt tokens counted.\n"
"gpt-3.5-turbo-0301\n",
"126 prompt tokens counted by num_tokens_from_messages().\n",
"126 prompt tokens counted by the OpenAI API.\n",
"\n",
"gpt-4-0314\n",
"128 prompt tokens counted by num_tokens_from_messages().\n",
"128 prompt tokens counted by the OpenAI API.\n",
"\n"
]
}
],
"source": [
"# example token count from the function defined above\n",
"model = \"gpt-3.5-turbo-0301\"\n",
"# let's verify the function above matches the OpenAI API response\n",
"\n",
"print(f\"{num_tokens_from_messages(messages, model)} prompt tokens counted.\")\n"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"126 prompt tokens used.\n"
]
}
],
"source": [
"# example token count from the OpenAI API\n",
"import openai\n",
"\n",
"example_messages = [\n",
" {\n",
" \"role\": \"system\",\n",
" \"content\": \"You are a helpful, pattern-following assistant that translates corporate jargon into plain English.\",\n",
" },\n",
" {\n",
" \"role\": \"system\",\n",
" \"name\": \"example_user\",\n",
" \"content\": \"New synergies will help drive top-line growth.\",\n",
" },\n",
" {\n",
" \"role\": \"system\",\n",
" \"name\": \"example_assistant\",\n",
" \"content\": \"Things working well together will increase revenue.\",\n",
" },\n",
" {\n",
" \"role\": \"system\",\n",
" \"name\": \"example_user\",\n",
" \"content\": \"Let's circle back when we have more bandwidth to touch base on opportunities for increased leverage.\",\n",
" },\n",
" {\n",
" \"role\": \"system\",\n",
" \"name\": \"example_assistant\",\n",
" \"content\": \"Let's talk later when we're less busy about how to do better.\",\n",
" },\n",
" {\n",
" \"role\": \"user\",\n",
" \"content\": \"This late pivot means we don't have time to boil the ocean for the client deliverable.\",\n",
" },\n",
"]\n",
"\n",
"response = openai.ChatCompletion.create(\n",
" model=model,\n",
" messages=messages,\n",
" temperature=0,\n",
")\n",
"\n",
"print(f'{response[\"usage\"][\"prompt_tokens\"]} prompt tokens used.')\n"
"for model in [\"gpt-3.5-turbo-0301\", \"gpt-4-0314\"]:\n",
" print(model)\n",
" # example token count from the function defined above\n",
" print(f\"{num_tokens_from_messages(example_messages, model)} prompt tokens counted by num_tokens_from_messages().\")\n",
" # example token count from the OpenAI API\n",
" response = openai.ChatCompletion.create(\n",
" model=model,\n",
" messages=example_messages,\n",
" temperature=0,\n",
" max_tokens=1 # we're only counting input tokens here, so let's not waste tokens on the output\n",
" )\n",
" print(f'{response[\"usage\"][\"prompt_tokens\"]} prompt tokens counted by the OpenAI API.')\n",
" print()\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {

@ -70,7 +70,7 @@
{
"data": {
"text/plain": [
"<OpenAIObject chat.completion id=chatcmpl-6pjrV9CvZ2ivOSxzZrBdEidUB6xfs at 0x13362cf90> JSON: {\n",
"<OpenAIObject chat.completion id=chatcmpl-6wE0D7QM6dLRUPmN5Vm6YPwF1JNMR at 0x134f0c270> JSON: {\n",
" \"choices\": [\n",
" {\n",
" \"finish_reason\": \"stop\",\n",
@ -81,14 +81,14 @@
" }\n",
" }\n",
" ],\n",
" \"created\": 1677789041,\n",
" \"id\": \"chatcmpl-6pjrV9CvZ2ivOSxzZrBdEidUB6xfs\",\n",
" \"created\": 1679334869,\n",
" \"id\": \"chatcmpl-6wE0D7QM6dLRUPmN5Vm6YPwF1JNMR\",\n",
" \"model\": \"gpt-3.5-turbo-0301\",\n",
" \"object\": \"chat.completion\",\n",
" \"usage\": {\n",
" \"completion_tokens\": 5,\n",
" \"completion_tokens\": 4,\n",
" \"prompt_tokens\": 38,\n",
" \"total_tokens\": 43\n",
" \"total_tokens\": 42\n",
" }\n",
"}"
]
@ -179,7 +179,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Ahoy matey! Asynchronous programming be like havin' a crew o' pirates workin' on different tasks at the same time. Ye see, instead o' waitin' for one task to be completed before startin' the next, we can have multiple tasks runnin' at once. It be like havin' me crew hoistin' the sails while others be swabbin' the deck and loadin' the cannons. Each task be workin' independently, but they all be contributin' to the overall success o' the ship. And just like how me crew communicates with each other to make sure everything be runnin' smoothly, asynchronous programming uses callbacks and promises to coordinate the different tasks and make sure they all be finished in the right order. Arrr, it be a powerful tool for any programmer lookin' to optimize their code and make it run faster.\n"
"Ahoy matey! Asynchronous programming be like havin' a crew o' pirates workin' on different tasks at the same time. Ye see, instead o' waitin' for one task to be completed before startin' the next, ye can assign tasks to yer crew and let 'em work on 'em simultaneously. This way, ye can get more done in less time and keep yer ship sailin' smoothly. It be like havin' a lookout keepin' watch while the cook be preparin' the next meal and the navigator be plottin' the course. Each pirate be doin' their own thing, but all workin' together to keep the ship runnin' smoothly. Arrr, that be asynchronous programming in a pirate's tongue!\n"
]
}
],
@ -210,13 +210,13 @@
"\n",
"Ahoy mateys! Let me tell ye about asynchronous programming, arrr! \n",
"\n",
"Ye see, in the world of programming, sometimes we need to wait for things to happen before we can move on to the next task. But with asynchronous programming, we can keep working on other tasks while we wait for those things to happen. \n",
"Ye see, in the world of programming, sometimes we need to wait for certain tasks to be completed before moving on to the next one. But with asynchronous programming, we can keep the ship sailing while we wait for those tasks to finish. \n",
"\n",
"It's like when we're sailing the high seas and we need to wait for the wind to change direction. We don't just sit there twiddling our thumbs, do we? No, we keep busy with other tasks like repairing the ship or checking the maps. \n",
"It's like having a crewmate scrubbing the deck while another is hoisting the sails. They're both working at the same time, but on different tasks. \n",
"\n",
"In programming, we use something called callbacks or promises to keep track of those things we're waiting for. And while we wait for those callbacks or promises to be fulfilled, we can keep working on other parts of our code. \n",
"In programming, we use something called callbacks or promises to keep track of these tasks. So while one task is waiting for a response from the server, the rest of the code can keep running. \n",
"\n",
"So, me hearties, asynchronous programming is like being a pirate on the high seas, always ready to tackle the next task while we wait for the winds to change. Arrr!\n"
"It's a bit like navigating through a stormy sea. We need to be able to adjust our course and keep moving forward, even when we hit rough waters. And with asynchronous programming, we can do just that, me hearties!\n"
]
}
],
@ -264,15 +264,13 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Sure! Fractions are a way of representing a part of a whole. The top number of a fraction is called the numerator, and it represents how many parts of the whole we are talking about. The bottom number is called the denominator, and it represents how many equal parts the whole is divided into.\n",
"Sure! Fractions are a way of representing a part of a whole. The top number of a fraction is called the numerator, and it represents how many parts you have. The bottom number is called the denominator, and it represents how many parts make up the whole. \n",
"\n",
"For example, if we have a pizza that is divided into 8 equal slices, and we take 3 slices, we can represent that as the fraction 3/8. The numerator is 3 because we took 3 slices, and the denominator is 8 because the pizza was divided into 8 slices.\n",
"For example, if you have a pizza that is cut into 8 equal slices, and you have eaten 3 of those slices, you can represent that as a fraction: 3/8. The numerator is 3 because you have eaten 3 slices, and the denominator is 8 because there are 8 slices in total.\n",
"\n",
"To add or subtract fractions, we need to have a common denominator. This means that the denominators of the fractions need to be the same. To do this, we can find the least common multiple (LCM) of the denominators and then convert each fraction to an equivalent fraction with the LCM as the denominator.\n",
"To add or subtract fractions, you need to have a common denominator. This means that the bottom numbers of the fractions need to be the same. For example, if you want to add 1/4 and 2/3, you need to find a common denominator. One way to do this is to multiply the denominators together: 4 x 3 = 12. Then, you need to convert each fraction so that the denominator is 12. To do this, you can multiply the numerator and denominator of each fraction by the same number. For example, to convert 1/4 to have a denominator of 12, you can multiply both the numerator and denominator by 3: 1/4 x 3/3 = 3/12. To convert 2/3 to have a denominator of 12, you can multiply both the numerator and denominator by 4: 2/3 x 4/4 = 8/12. Now that both fractions have a denominator of 12, you can add them together: 3/12 + 8/12 = 11/12.\n",
"\n",
"To multiply fractions, we simply multiply the numerators together and the denominators together. To divide fractions, we multiply the first fraction by the reciprocal of the second fraction (flip the second fraction upside down).\n",
"\n",
"Now, here's a question to check for understanding: If we have a pizza that is divided into 12 equal slices, and we take 4 slices, what is the fraction that represents how much of the pizza we took?\n"
"Do you have any questions about fractions?\n"
]
}
],
@ -433,7 +431,7 @@
"- the time it takes to generate the response\n",
"- when the reply gets cut off from hitting the maximum token limit (4096 for `gpt-3.5-turbo`)\n",
"\n",
"As of Mar 01, 2023, you can use the following function to count the number of tokens that a list of messages will use."
"You can use the following function to count the number of tokens that a list of messages will use."
]
},
{
@ -450,86 +448,101 @@
" try:\n",
" encoding = tiktoken.encoding_for_model(model)\n",
" except KeyError:\n",
" print(\"Warning: model not found. Using cl100k_base encoding.\")\n",
" encoding = tiktoken.get_encoding(\"cl100k_base\")\n",
" if model == \"gpt-3.5-turbo-0301\": # note: future models may deviate from this\n",
" num_tokens = 0\n",
" for message in messages:\n",
" num_tokens += 4 # every message follows <im_start>{role/name}\\n{content}<im_end>\\n\n",
" for key, value in message.items():\n",
" num_tokens += len(encoding.encode(value))\n",
" if key == \"name\": # if there's a name, the role is omitted\n",
" num_tokens += -1 # role is always required and always 1 token\n",
" num_tokens += 2 # every reply is primed with <im_start>assistant\n",
" return num_tokens\n",
" if model == \"gpt-3.5-turbo\":\n",
" print(\"Warning: gpt-3.5-turbo may change over time. Returning num tokens assuming gpt-3.5-turbo-0301.\")\n",
" return num_tokens_from_messages(messages, model=\"gpt-3.5-turbo-0301\")\n",
" elif model == \"gpt-4\":\n",
" print(\"Warning: gpt-4 may change over time. Returning num tokens assuming gpt-4-0314.\")\n",
" return num_tokens_from_messages(messages, model=\"gpt-4-0314\")\n",
" elif model == \"gpt-3.5-turbo-0301\":\n",
" tokens_per_message = 4 # every message follows <im_start>{role/name}\\n{content}<im_end>\\n\n",
" tokens_per_name = -1 # if there's a name, the role is omitted\n",
" elif model == \"gpt-4-0314\":\n",
" tokens_per_message = 3\n",
" tokens_per_name = 1\n",
" else:\n",
" raise NotImplementedError(f\"\"\"num_tokens_from_messages() is not presently implemented for model {model}.\n",
"See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens.\"\"\")\n"
" raise NotImplementedError(f\"\"\"num_tokens_from_messages() is not implemented for model {model}. See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens.\"\"\")\n",
" num_tokens = 0\n",
" for message in messages:\n",
" num_tokens += tokens_per_message\n",
" for key, value in message.items():\n",
" num_tokens += len(encoding.encode(value))\n",
" if key == \"name\":\n",
" num_tokens += tokens_per_name\n",
" num_tokens += 2 # every reply is primed with <im_start>assistant\n",
" return num_tokens\n"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"messages = [\n",
" {\"role\": \"system\", \"content\": \"You are a helpful, pattern-following assistant that translates corporate jargon into plain English.\"},\n",
" {\"role\": \"system\", \"name\":\"example_user\", \"content\": \"New synergies will help drive top-line growth.\"},\n",
" {\"role\": \"system\", \"name\": \"example_assistant\", \"content\": \"Things working well together will increase revenue.\"},\n",
" {\"role\": \"system\", \"name\":\"example_user\", \"content\": \"Let's circle back when we have more bandwidth to touch base on opportunities for increased leverage.\"},\n",
" {\"role\": \"system\", \"name\": \"example_assistant\", \"content\": \"Let's talk later when we're less busy about how to do better.\"},\n",
" {\"role\": \"user\", \"content\": \"This late pivot means we don't have time to boil the ocean for the client deliverable.\"},\n",
"]\n"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"126 prompt tokens counted.\n"
]
}
],
"source": [
"# example token count from the function defined above\n",
"print(f\"{num_tokens_from_messages(messages)} prompt tokens counted.\")\n"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"126 prompt tokens used.\n"
"gpt-3.5-turbo-0301\n",
"126 prompt tokens counted by num_tokens_from_messages().\n",
"126 prompt tokens counted by the OpenAI API.\n",
"\n",
"gpt-4-0314\n",
"128 prompt tokens counted by num_tokens_from_messages().\n",
"128 prompt tokens counted by the OpenAI API.\n",
"\n"
]
}
],
"source": [
"# example token count from the OpenAI API\n",
"response = openai.ChatCompletion.create(\n",
" model=MODEL,\n",
" messages=messages,\n",
" temperature=0,\n",
")\n",
"# let's verify the function above matches the OpenAI API response\n",
"\n",
"example_messages = [\n",
" {\n",
" \"role\": \"system\",\n",
" \"content\": \"You are a helpful, pattern-following assistant that translates corporate jargon into plain English.\",\n",
" },\n",
" {\n",
" \"role\": \"system\",\n",
" \"name\": \"example_user\",\n",
" \"content\": \"New synergies will help drive top-line growth.\",\n",
" },\n",
" {\n",
" \"role\": \"system\",\n",
" \"name\": \"example_assistant\",\n",
" \"content\": \"Things working well together will increase revenue.\",\n",
" },\n",
" {\n",
" \"role\": \"system\",\n",
" \"name\": \"example_user\",\n",
" \"content\": \"Let's circle back when we have more bandwidth to touch base on opportunities for increased leverage.\",\n",
" },\n",
" {\n",
" \"role\": \"system\",\n",
" \"name\": \"example_assistant\",\n",
" \"content\": \"Let's talk later when we're less busy about how to do better.\",\n",
" },\n",
" {\n",
" \"role\": \"user\",\n",
" \"content\": \"This late pivot means we don't have time to boil the ocean for the client deliverable.\",\n",
" },\n",
"]\n",
"\n",
"print(f'{response[\"usage\"][\"prompt_tokens\"]} prompt tokens used.')\n"
"for model in [\"gpt-3.5-turbo-0301\", \"gpt-4-0314\"]:\n",
" print(model)\n",
" # example token count from the function defined above\n",
" print(f\"{num_tokens_from_messages(example_messages, model)} prompt tokens counted by num_tokens_from_messages().\")\n",
" # example token count from the OpenAI API\n",
" response = openai.ChatCompletion.create(\n",
" model=model,\n",
" messages=example_messages,\n",
" temperature=0,\n",
" max_tokens=1 # we're only counting input tokens here, so let's not waste tokens on the output\n",
" )\n",
" print(f'{response[\"usage\"][\"prompt_tokens\"]} prompt tokens counted by the OpenAI API.')\n",
" print()\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {

Loading…
Cancel
Save