Ted/update token counting 0613 (#541)

* updates tiktoken counting function * slight rewording * updates ChatGPT guide with new models
1 year ago · de3bd58434
parent 26978e9ed1
commit de3bd58434
2 changed files with 164 additions and 109 deletions
--- a/examples/How_to_count_tokens_with_tiktoken.ipynb
+++ b/examples/How_to_count_tokens_with_tiktoken.ipynb
@ -66,24 +66,9 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Requirement already satisfied: tiktoken in /Users/ted/.virtualenvs/openai/lib/python3.9/site-packages (0.3.2)\n",
-      "Requirement already satisfied: regex>=2022.1.18 in /Users/ted/.virtualenvs/openai/lib/python3.9/site-packages (from tiktoken) (2022.10.31)\n",
-      "Requirement already satisfied: requests>=2.26.0 in /Users/ted/.virtualenvs/openai/lib/python3.9/site-packages (from tiktoken) (2.28.2)\n",
-      "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/ted/.virtualenvs/openai/lib/python3.9/site-packages (from requests>=2.26.0->tiktoken) (2.0.9)\n",
-      "Requirement already satisfied: idna<4,>=2.5 in /Users/ted/.virtualenvs/openai/lib/python3.9/site-packages (from requests>=2.26.0->tiktoken) (3.3)\n",
-      "Requirement already satisfied: certifi>=2017.4.17 in /Users/ted/.virtualenvs/openai/lib/python3.9/site-packages (from requests>=2.26.0->tiktoken) (2021.10.8)\n",
-      "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /Users/ted/.virtualenvs/openai/lib/python3.9/site-packages (from requests>=2.26.0->tiktoken) (1.26.7)\n",
-      "Note: you may need to restart the kernel to use updated packages.\n"
-     ]
-    }
-   ],
+   "outputs": [],
   "source": [
    "%pip install --upgrade tiktoken"
   ]
@ -97,7 +82,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
@ -117,7 +102,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
@ -133,7 +118,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
@ -157,7 +142,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
@ -166,7 +151,7 @@
       "[83, 1609, 5963, 374, 2294, 0]"
      ]
     },
-     "execution_count": 4,
+     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
@ -184,7 +169,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
@ -197,7 +182,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
@ -206,7 +191,7 @@
       "6"
      ]
     },
-     "execution_count": 6,
+     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
@ -231,7 +216,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
@ -240,7 +225,7 @@
       "'tiktoken is great!'"
      ]
     },
-     "execution_count": 7,
+     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
@ -265,7 +250,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
@ -274,7 +259,7 @@
       "[b't', b'ik', b'token', b' is', b' great', b'!']"
      ]
     },
-     "execution_count": 8,
+     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
@ -301,7 +286,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
@ -324,7 +309,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
@ -354,7 +339,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
@ -384,7 +369,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
@ -413,48 +398,57 @@
   ]
  },
  {
+   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "## 6. Counting tokens for chat API calls\n",
+    "## 6. Counting tokens for chat completions API calls\n",
    "\n",
    "ChatGPT models like `gpt-3.5-turbo` and `gpt-4` use tokens in the same way as older completions models, but because of their message-based formatting, it's more difficult to count how many tokens will be used by a conversation.\n",
    "\n",
-    "Below is an example function for counting tokens for messages passed to `gpt-3.5-turbo-0301` or `gpt-4-0314`.\n",
+    "Below is an example function for counting tokens for messages passed to `gpt-3.5-turbo` or `gpt-4`.\n",
+    "\n",
+    "Note that the exact way that tokens are counted from messages may change from model to model. Consider the counts from the function below an estimate, not a timeless guarantee.\n",
    "\n",
-    "Note that the exact way that tokens are counted from messages may change from model to model. Consider the counts from the function below an estimate, not a timeless guarantee."
+    "In particular, requests that use the optional functions input will consume extra tokens on top of the estimates calculated below."
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
-    "def num_tokens_from_messages(messages, model=\"gpt-3.5-turbo-0301\"):\n",
-    "    \"\"\"Returns the number of tokens used by a list of messages.\"\"\"\n",
+    "def num_tokens_from_messages(messages, model=\"gpt-3.5-turbo-0613\"):\n",
+    "    \"\"\"Return the number of tokens used by a list of messages.\"\"\"\n",
    "    try:\n",
    "        encoding = tiktoken.encoding_for_model(model)\n",
    "    except KeyError:\n",
    "        print(\"Warning: model not found. Using cl100k_base encoding.\")\n",
    "        encoding = tiktoken.get_encoding(\"cl100k_base\")\n",
-    "    if model == \"gpt-3.5-turbo\":\n",
-    "        print(\"Warning: gpt-3.5-turbo may change over time. Returning num tokens assuming gpt-3.5-turbo-0301.\")\n",
-    "        return num_tokens_from_messages(messages, model=\"gpt-3.5-turbo-0301\")\n",
-    "    elif model == \"gpt-3.5-turbo-16k\":\n",
-    "        print(\"Warning: gpt-3.5-turbo-16k may change over time. Returning num tokens assuming gpt-3.5-turbo-16k-0613.\")\n",
-    "        return num_tokens_from_messages(messages, model=\"gpt-3.5-turbo-16k-0613\")\n",
-    "    elif model == \"gpt-4\":\n",
-    "        print(\"Warning: gpt-4 may change over time. Returning num tokens assuming gpt-4-0314.\")\n",
-    "        return num_tokens_from_messages(messages, model=\"gpt-4-0314\")\n",
+    "    if model in {\n",
+    "        \"gpt-3.5-turbo-0613\",\n",
+    "        \"gpt-3.5-turbo-16k-0613\",\n",
+    "        \"gpt-4-0314\",\n",
+    "        \"gpt-4-32k-0314\",\n",
+    "        \"gpt-4-0613\",\n",
+    "        \"gpt-4-32k-0613\",\n",
+    "        }:\n",
+    "        tokens_per_message = 3\n",
+    "        tokens_per_name = 1\n",
    "    elif model == \"gpt-3.5-turbo-0301\":\n",
    "        tokens_per_message = 4  # every message follows <|start|>{role/name}\\n{content}<|end|>\\n\n",
    "        tokens_per_name = -1  # if there's a name, the role is omitted\n",
-    "    elif model in {\"gpt-4-0314\", \"gpt-3.5-turbo-0613\", \"gpt-3.5-turbo-16k-0613\"}:\n",
-    "        tokens_per_message = 3\n",
-    "        tokens_per_name = 1\n",
+    "    elif \"gpt-3.5-turbo\" in model:\n",
+    "        print(\"Warning: gpt-3.5-turbo may update over time. Returning num tokens assuming gpt-3.5-turbo-0613.\")\n",
+    "        return num_tokens_from_messages(messages, model=\"gpt-3.5-turbo-0613\")\n",
+    "    elif \"gpt-4\" in model:\n",
+    "        print(\"Warning: gpt-4 may update over time. Returning num tokens assuming gpt-4-0613.\")\n",
+    "        return num_tokens_from_messages(messages, model=\"gpt-4-0613\")\n",
    "    else:\n",
-    "        raise NotImplementedError(f\"\"\"num_tokens_from_messages() is not implemented for model {model}. See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens.\"\"\")\n",
+    "        raise NotImplementedError(\n",
+    "            f\"\"\"num_tokens_from_messages() is not implemented for model {model}. See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens.\"\"\"\n",
+    "        )\n",
    "    num_tokens = 0\n",
    "    for message in messages:\n",
    "        num_tokens += tokens_per_message\n",
@ -468,7 +462,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
@ -479,9 +473,27 @@
      "127 prompt tokens counted by num_tokens_from_messages().\n",
      "127 prompt tokens counted by the OpenAI API.\n",
      "\n",
+      "gpt-3.5-turbo-0613\n",
+      "129 prompt tokens counted by num_tokens_from_messages().\n",
+      "129 prompt tokens counted by the OpenAI API.\n",
+      "\n",
+      "gpt-3.5-turbo\n",
+      "Warning: gpt-3.5-turbo may update over time. Returning num tokens assuming gpt-3.5-turbo-0613.\n",
+      "129 prompt tokens counted by num_tokens_from_messages().\n",
+      "127 prompt tokens counted by the OpenAI API.\n",
+      "\n",
      "gpt-4-0314\n",
      "129 prompt tokens counted by num_tokens_from_messages().\n",
      "129 prompt tokens counted by the OpenAI API.\n",
+      "\n",
+      "gpt-4-0613\n",
+      "129 prompt tokens counted by num_tokens_from_messages().\n",
+      "129 prompt tokens counted by the OpenAI API.\n",
+      "\n",
+      "gpt-4\n",
+      "Warning: gpt-4 may update over time. Returning num tokens assuming gpt-4-0613.\n",
+      "129 prompt tokens counted by num_tokens_from_messages().\n",
+      "129 prompt tokens counted by the OpenAI API.\n",
      "\n"
     ]
    }
@ -522,7 +534,14 @@
    "    },\n",
    "]\n",
    "\n",
-    "for model in [\"gpt-3.5-turbo-0301\", \"gpt-4-0314\"]:\n",
+    "for model in [\n",
+    "    \"gpt-3.5-turbo-0301\",\n",
+    "    \"gpt-3.5-turbo-0613\",\n",
+    "    \"gpt-3.5-turbo\",\n",
+    "    \"gpt-4-0314\",\n",
+    "    \"gpt-4-0613\",\n",
+    "    \"gpt-4\",\n",
+    "    ]:\n",
    "    print(model)\n",
    "    # example token count from the function defined above\n",
    "    print(f\"{num_tokens_from_messages(example_messages, model)} prompt tokens counted by num_tokens_from_messages().\")\n",
@ -531,7 +550,7 @@
    "        model=model,\n",
    "        messages=example_messages,\n",
    "        temperature=0,\n",
-    "        max_tokens=1  # we're only counting input tokens here, so let's not waste tokens on the output\n",
+    "        max_tokens=1,  # we're only counting input tokens here, so let's not waste tokens on the output\n",
    "    )\n",
    "    print(f'{response[\"usage\"][\"prompt_tokens\"]} prompt tokens counted by the OpenAI API.')\n",
    "    print()\n"
--- a/examples/How_to_format_inputs_to_ChatGPT_models.ipynb
+++ b/examples/How_to_format_inputs_to_ChatGPT_models.ipynb
@ -36,7 +36,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
@ -52,13 +52,15 @@
    "# 2. An example chat API call\n",
    "\n",
    "A chat API call has two required inputs:\n",
-    "- `model`: the name of the model you want to use (e.g., `gpt-3.5-turbo`, `gpt-4`, `gpt-4-0314`)\n",
+    "- `model`: the name of the model you want to use (e.g., `gpt-3.5-turbo`, `gpt-4`, `gpt-3.5-turbo-0613`, `gpt-3.5-turbo-16k-0613`)\n",
    "- `messages`: a list of message objects, where each object has two required fields:\n",
    "    - `role`: the role of the messenger (either `system`, `user`, or `assistant`)\n",
    "    - `content`: the content of the message (e.g., `Write me a beautiful poem`)\n",
    "\n",
    "Messages can also contain an optional `name` field, which give the messenger a name. E.g., `example-user`, `Alice`, `BlackbeardBot`. Names may not contain spaces.\n",
    "\n",
+    "As of June 2023, you can also optionally submit a list of `functions` that tell GPT whether it can generate JSON to feed into a function. For details, see the [documentation](https://platform.openai.com/docs/guides/gpt/function-calling), [API reference](https://platform.openai.com/docs/api-reference/chat), or the Cookbook guide [How to call functions with chat models](How_to_call_functions_with_chat_models.ipynb).\n",
+    "\n",
    "Typically, a conversation will start with a system message that tells the assistant how to behave, followed by alternating user and assistant messages, but you are not required to follow this format.\n",
    "\n",
    "Let's look at an example chat API calls to see how the chat format works in practice."
@ -66,36 +68,36 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
-       "<OpenAIObject chat.completion id=chatcmpl-6xpmlDodtW6RwiaMaC1zhLsR8Y1D3 at 0x10dccc900> JSON: {\n",
+       "<OpenAIObject chat.completion id=chatcmpl-7UkgnSDzlevZxiy0YjZcLYdUMz5yZ at 0x118e394f0> JSON: {\n",
+       "  \"id\": \"chatcmpl-7UkgnSDzlevZxiy0YjZcLYdUMz5yZ\",\n",
+       "  \"object\": \"chat.completion\",\n",
+       "  \"created\": 1687563669,\n",
+       "  \"model\": \"gpt-3.5-turbo-0301\",\n",
       "  \"choices\": [\n",
       "    {\n",
-       "      \"finish_reason\": \"stop\",\n",
       "      \"index\": 0,\n",
       "      \"message\": {\n",
-       "        \"content\": \"Orange who?\",\n",
-       "        \"role\": \"assistant\"\n",
-       "      }\n",
+       "        \"role\": \"assistant\",\n",
+       "        \"content\": \"Orange who?\"\n",
+       "      },\n",
+       "      \"finish_reason\": \"stop\"\n",
       "    }\n",
       "  ],\n",
-       "  \"created\": 1679718435,\n",
-       "  \"id\": \"chatcmpl-6xpmlDodtW6RwiaMaC1zhLsR8Y1D3\",\n",
-       "  \"model\": \"gpt-3.5-turbo-0301\",\n",
-       "  \"object\": \"chat.completion\",\n",
       "  \"usage\": {\n",
-       "    \"completion_tokens\": 3,\n",
       "    \"prompt_tokens\": 39,\n",
+       "    \"completion_tokens\": 3,\n",
       "    \"total_tokens\": 42\n",
       "  }\n",
       "}"
      ]
     },
-     "execution_count": 2,
+     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
@ -144,7 +146,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
@ -153,7 +155,7 @@
       "'Orange who?'"
      ]
     },
-     "execution_count": 3,
+     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
@ -174,18 +176,14 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "Ahoy matey! Let me tell ye about asynchronous programming, arrr! It be like havin' a crew of sailors workin' on different tasks at the same time. Each sailor be doin' their own job, but they don't wait for the others to finish before movin' on to the next task. They be workin' independently, but still makin' progress towards the same goal.\n",
-      "\n",
-      "In programming, it be the same. Instead of waitin' for one task to finish before startin' the next, we can have multiple tasks runnin' at the same time. This be especially useful when we be dealin' with slow or unpredictable tasks, like fetchin' data from a server or readin' from a file. We don't want our program to be stuck waitin' for these tasks to finish, so we can use asynchronous programming to keep things movin' along.\n",
-      "\n",
-      "So, me hearty, asynchronous programming be like havin' a crew of sailors workin' independently towards the same goal. It be a powerful tool in the programmer's arsenal, and one that can help us build faster and more efficient programs. Arrr!\n"
+      "Ahoy matey! Asynchronous programming be like havin' a crew o' pirates workin' on different tasks at the same time. Ye see, instead o' waitin' for one task to be completed before startin' the next, ye can assign tasks to yer crew and let 'em work on 'em simultaneously. This way, ye can get more done in less time and keep yer ship sailin' smoothly. It be like havin' a bunch o' pirates rowin' the ship at different speeds, but still gettin' us to our destination. Arrr!\n"
     ]
    }
   ],
@ -205,22 +203,20 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "Ahoy mateys! Let me tell ye about asynchronous programming, arrr! \n",
+      "Ahoy mateys! Let me tell ye about asynchronous programming, arrr! It be like havin' a crew of sailors workin' on different tasks at the same time, without waitin' for each other to finish. Ye see, in traditional programming, ye have to wait for one task to be completed before movin' on to the next. But with asynchronous programming, ye can start multiple tasks at once and let them run in the background while ye focus on other things.\n",
      "\n",
-      "Ye see, in the world of programming, sometimes we need to wait for things to happen before we can move on to the next task. But with asynchronous programming, we can keep working on other tasks while we wait for those things to happen. \n",
+      "It be like havin' a lookout keepin' watch for enemy ships while the rest of the crew be busy with their own tasks. They don't have to stop what they're doin' to keep an eye out, because the lookout be doin' it for them. And when the lookout spots an enemy ship, they can alert the crew and everyone can work together to defend the ship.\n",
      "\n",
-      "It's like when we're sailing the high seas and we need to wait for the wind to change direction. We don't just sit there twiddling our thumbs, no sir! We keep busy with other tasks like repairing the ship or checking the maps. \n",
+      "In the same way, asynchronous programming allows different parts of yer code to work together without gettin' in each other's way. It be especially useful for tasks that take a long time to complete, like loadin' large files or connectin' to a server. Instead of makin' yer program wait for these tasks to finish, ye can let them run in the background while yer program continues to do other things.\n",
      "\n",
-      "In programming, we use something called callbacks or promises to keep track of those things we're waiting for. And while we wait for those callbacks or promises to be fulfilled, we can keep working on other parts of our code. \n",
-      "\n",
-      "So, me hearties, asynchronous programming is like being a pirate on the high seas - always busy with tasks and never wasting a moment! Arrr!\n"
+      "So there ye have it, me hearties! Asynchronous programming be like havin' a crew of sailors workin' together without gettin' in each other's way. It be a powerful tool for any programmer, and one that can help ye sail the seas of code with ease!\n"
     ]
    }
   ],
@ -256,12 +252,12 @@
    "\n",
    "The system message can be used to prime the assistant with different personalities or behaviors.\n",
    "\n",
-    "Be aware that `gpt-3.5-turbo-0301` does not generally pay as much attention to the system message as `gpt-4-0314`. Therefore, for `gpt-3.5-turbo-0301`, we recommend placing important instructions in the user message instead. Some developers have found success in continually moving the system message near the end of the conversation to keep the model's attention from drifting away as conversations get longer."
+    "Be aware that `gpt-3.5-turbo-0301` does not generally pay as much attention to the system message as `gpt-4-0314` or `gpt-3.5-turbo-0613`. Therefore, for `gpt-3.5-turbo-0301`, we recommend placing important instructions in the user message instead. Some developers have found success in continually moving the system message near the end of the conversation to keep the model's attention from drifting away as conversations get longer."
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
@ -270,11 +266,13 @@
     "text": [
      "Sure! Fractions are a way of representing a part of a whole. The top number of a fraction is called the numerator, and it represents how many parts of the whole we are talking about. The bottom number is called the denominator, and it represents how many equal parts the whole is divided into.\n",
      "\n",
-      "For example, if we have a pizza that is divided into 8 equal slices, and we have eaten 3 of those slices, we can represent that as the fraction 3/8. The numerator is 3 because we have eaten 3 slices, and the denominator is 8 because the pizza is divided into 8 slices.\n",
+      "For example, if we have a pizza that is divided into 8 equal slices, and we take 3 slices, we can represent that as the fraction 3/8. The numerator is 3 because we took 3 slices, and the denominator is 8 because the pizza was divided into 8 slices.\n",
      "\n",
-      "To add or subtract fractions, we need to have a common denominator. This means that we need to find a number that both denominators can divide into evenly. For example, if we want to add 1/4 and 2/3, we need to find a common denominator. We can do this by multiplying the denominators together, which gives us 12. Then, we can convert both fractions to have a denominator of 12. To do this, we multiply the numerator and denominator of 1/4 by 3, which gives us 3/12. We multiply the numerator and denominator of 2/3 by 4, which gives us 8/12. Now we can add the two fractions together, which gives us 11/12.\n",
+      "To add or subtract fractions, we need to have a common denominator. This means that the denominators of the fractions need to be the same. To do this, we can find the least common multiple (LCM) of the denominators and then convert each fraction to an equivalent fraction with the LCM as the denominator.\n",
      "\n",
-      "Does that make sense? Do you have any questions?\n"
+      "To multiply fractions, we simply multiply the numerators together and the denominators together. To divide fractions, we multiply the first fraction by the reciprocal of the second fraction (flip the second fraction upside down).\n",
+      "\n",
+      "Now, here's a question to check for understanding: If we have a pizza that is divided into 12 equal slices, and we take 4 slices, what is the fraction that represents how much of the pizza we took?\n"
     ]
    }
   ],
@ -294,7 +292,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
@ -335,7 +333,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
@ -378,7 +376,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
@ -437,41 +435,52 @@
    "\n",
    "You can use the following function to count the number of tokens that a list of messages will use.\n",
    "\n",
-    "Note that the exact way that tokens are counted from messages may change from model to model. Consider the counts from the function below an estimate, not a timeless guarantee.\n",
+    "Note that the exact way that tokens are counted from messages may change from model to model. Consider the counts from the function below an estimate, not a timeless guarantee. \n",
+    "\n",
+    "In particular, requests that use the optional functions input will consume extra tokens on top of the estimates calculated below.\n",
    "\n",
    "Read more about counting tokens in [How to count tokens with tiktoken](How_to_count_tokens_with_tiktoken.ipynb)."
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "import tiktoken\n",
    "\n",
    "\n",
-    "def num_tokens_from_messages(messages, model=\"gpt-3.5-turbo-0301\"):\n",
-    "    \"\"\"Returns the number of tokens used by a list of messages.\"\"\"\n",
+    "def num_tokens_from_messages(messages, model=\"gpt-3.5-turbo-0613\"):\n",
+    "    \"\"\"Return the number of tokens used by a list of messages.\"\"\"\n",
    "    try:\n",
    "        encoding = tiktoken.encoding_for_model(model)\n",
    "    except KeyError:\n",
    "        print(\"Warning: model not found. Using cl100k_base encoding.\")\n",
    "        encoding = tiktoken.get_encoding(\"cl100k_base\")\n",
-    "    if model == \"gpt-3.5-turbo\":\n",
-    "        print(\"Warning: gpt-3.5-turbo may change over time. Returning num tokens assuming gpt-3.5-turbo-0301.\")\n",
-    "        return num_tokens_from_messages(messages, model=\"gpt-3.5-turbo-0301\")\n",
-    "    elif model == \"gpt-4\":\n",
-    "        print(\"Warning: gpt-4 may change over time. Returning num tokens assuming gpt-4-0314.\")\n",
-    "        return num_tokens_from_messages(messages, model=\"gpt-4-0314\")\n",
+    "    if model in {\n",
+    "        \"gpt-3.5-turbo-0613\",\n",
+    "        \"gpt-3.5-turbo-16k-0613\",\n",
+    "        \"gpt-4-0314\",\n",
+    "        \"gpt-4-32k-0314\",\n",
+    "        \"gpt-4-0613\",\n",
+    "        \"gpt-4-32k-0613\",\n",
+    "        }:\n",
+    "        tokens_per_message = 3\n",
+    "        tokens_per_name = 1\n",
    "    elif model == \"gpt-3.5-turbo-0301\":\n",
    "        tokens_per_message = 4  # every message follows <|start|>{role/name}\\n{content}<|end|>\\n\n",
    "        tokens_per_name = -1  # if there's a name, the role is omitted\n",
-    "    elif model == \"gpt-4-0314\":\n",
-    "        tokens_per_message = 3\n",
-    "        tokens_per_name = 1\n",
+    "    elif \"gpt-3.5-turbo\" in model:\n",
+    "        print(\"Warning: gpt-3.5-turbo may update over time. Returning num tokens assuming gpt-3.5-turbo-0613.\")\n",
+    "        return num_tokens_from_messages(messages, model=\"gpt-3.5-turbo-0613\")\n",
+    "    elif \"gpt-4\" in model:\n",
+    "        print(\"Warning: gpt-4 may update over time. Returning num tokens assuming gpt-4-0613.\")\n",
+    "        return num_tokens_from_messages(messages, model=\"gpt-4-0613\")\n",
    "    else:\n",
-    "        raise NotImplementedError(f\"\"\"num_tokens_from_messages() is not implemented for model {model}. See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens.\"\"\")\n",
+    "        raise NotImplementedError(\n",
+    "            f\"\"\"num_tokens_from_messages() is not implemented for model {model}. See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens.\"\"\"\n",
+    "        )\n",
    "    num_tokens = 0\n",
    "    for message in messages:\n",
    "        num_tokens += tokens_per_message\n",
@ -485,7 +494,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
@ -496,9 +505,27 @@
      "127 prompt tokens counted by num_tokens_from_messages().\n",
      "127 prompt tokens counted by the OpenAI API.\n",
      "\n",
+      "gpt-3.5-turbo-0613\n",
+      "129 prompt tokens counted by num_tokens_from_messages().\n",
+      "129 prompt tokens counted by the OpenAI API.\n",
+      "\n",
+      "gpt-3.5-turbo\n",
+      "Warning: gpt-3.5-turbo may update over time. Returning num tokens assuming gpt-3.5-turbo-0613.\n",
+      "129 prompt tokens counted by num_tokens_from_messages().\n",
+      "127 prompt tokens counted by the OpenAI API.\n",
+      "\n",
      "gpt-4-0314\n",
      "129 prompt tokens counted by num_tokens_from_messages().\n",
      "129 prompt tokens counted by the OpenAI API.\n",
+      "\n",
+      "gpt-4-0613\n",
+      "129 prompt tokens counted by num_tokens_from_messages().\n",
+      "129 prompt tokens counted by the OpenAI API.\n",
+      "\n",
+      "gpt-4\n",
+      "Warning: gpt-4 may update over time. Returning num tokens assuming gpt-4-0613.\n",
+      "129 prompt tokens counted by num_tokens_from_messages().\n",
+      "129 prompt tokens counted by the OpenAI API.\n",
      "\n"
     ]
    }
@ -506,6 +533,8 @@
   "source": [
    "# let's verify the function above matches the OpenAI API response\n",
    "\n",
+    "import openai\n",
+    "\n",
    "example_messages = [\n",
    "    {\n",
    "        \"role\": \"system\",\n",
@ -537,7 +566,14 @@
    "    },\n",
    "]\n",
    "\n",
-    "for model in [\"gpt-3.5-turbo-0301\", \"gpt-4-0314\"]:\n",
+    "for model in [\n",
+    "    \"gpt-3.5-turbo-0301\",\n",
+    "    \"gpt-3.5-turbo-0613\",\n",
+    "    \"gpt-3.5-turbo\",\n",
+    "    \"gpt-4-0314\",\n",
+    "    \"gpt-4-0613\",\n",
+    "    \"gpt-4\",\n",
+    "    ]:\n",
    "    print(model)\n",
    "    # example token count from the function defined above\n",
    "    print(f\"{num_tokens_from_messages(example_messages, model)} prompt tokens counted by num_tokens_from_messages().\")\n",
@ -546,7 +582,7 @@
    "        model=model,\n",
    "        messages=example_messages,\n",
    "        temperature=0,\n",
-    "        max_tokens=1  # we're only counting input tokens here, so let's not waste tokens on the output\n",
+    "        max_tokens=1,  # we're only counting input tokens here, so let's not waste tokens on the output\n",
    "    )\n",
    "    print(f'{response[\"usage\"][\"prompt_tokens\"]} prompt tokens counted by the OpenAI API.')\n",
    "    print()\n"