From d972e7482ed71d36e12f5b058380a61d516bc6d0 Mon Sep 17 00:00:00 2001
From: Ted Sanders <ted@openai.com>
Date: Fri, 24 Mar 2023 21:28:14 -0700
Subject: [PATCH] updates chat guide with gpt-4 and updates token counting
 function

---
 ...w_to_format_inputs_to_ChatGPT_models.ipynb | 98 ++++++++++---------
 1 file changed, 53 insertions(+), 45 deletions(-)
diff --git a/examples/How_to_format_inputs_to_ChatGPT_models.ipynb b/examples/How_to_format_inputs_to_ChatGPT_models.ipynb
index 59a71293..f72a4859 100644
--- a/examples/How_to_format_inputs_to_ChatGPT_models.ipynb
+++ b/examples/How_to_format_inputs_to_ChatGPT_models.ipynb
@@ -7,9 +7,9 @@
    "source": [
     "# How to format inputs to ChatGPT models\n",
     "\n",
-    "ChatGPT is powered by `gpt-3.5-turbo`, OpenAI's most advanced model.\n",
+    "ChatGPT is powered by `gpt-3.5-turbo` and `gpt-4`, OpenAI's most advanced models.\n",
     "\n",
-    "You can build your own applications with `gpt-3.5-turbo` using the OpenAI API.\n",
+    "You can build your own applications with `gpt-3.5-turbo` or `gpt-4` using the OpenAI API.\n",
     "\n",
     "Chat models take a series of messages as input, and return an AI-written message as output.\n",
     "\n",
@@ -36,7 +36,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -52,25 +52,27 @@
     "# 2. An example chat API call\n",
     "\n",
     "A chat API call has two required inputs:\n",
-    "- `model`: the name of the model you want to use (e.g., `gpt-3.5-turbo`)\n",
-    "- `messages`: a list of message objects, where each object has at least two fields:\n",
+    "- `model`: the name of the model you want to use (e.g., `gpt-3.5-turbo`, `gpt-4`, `gpt-4-0314`)\n",
+    "- `messages`: a list of message objects, where each object has two required fields:\n",
     "    - `role`: the role of the messenger (either `system`, `user`, or `assistant`)\n",
     "    - `content`: the content of the message (e.g., `Write me a beautiful poem`)\n",
     "\n",
-    "Typically, a conversation will start with a system message, followed by alternating user and assistant messages, but you are not required to follow this format.\n",
+    "Messages can also contain an optional `name` field, which give the messenger a name. E.g., `example-user`, `Alice`, `BlackbeardBot`. Names may not contain spaces.\n",
+    "\n",
+    "Typically, a conversation will start with a system message that tells the assistant how to behave, followed by alternating user and assistant messages, but you are not required to follow this format.\n",
     "\n",
     "Let's look at an example chat API calls to see how the chat format works in practice."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "<OpenAIObject chat.completion id=chatcmpl-6wE0D7QM6dLRUPmN5Vm6YPwF1JNMR at 0x134f0c270> JSON: {\n",
+       "<OpenAIObject chat.completion id=chatcmpl-6xpmlDodtW6RwiaMaC1zhLsR8Y1D3 at 0x10dccc900> JSON: {\n",
        "  \"choices\": [\n",
        "    {\n",
        "      \"finish_reason\": \"stop\",\n",
@@ -81,19 +83,19 @@
        "      }\n",
        "    }\n",
        "  ],\n",
-       "  \"created\": 1679334869,\n",
-       "  \"id\": \"chatcmpl-6wE0D7QM6dLRUPmN5Vm6YPwF1JNMR\",\n",
+       "  \"created\": 1679718435,\n",
+       "  \"id\": \"chatcmpl-6xpmlDodtW6RwiaMaC1zhLsR8Y1D3\",\n",
        "  \"model\": \"gpt-3.5-turbo-0301\",\n",
        "  \"object\": \"chat.completion\",\n",
        "  \"usage\": {\n",
-       "    \"completion_tokens\": 4,\n",
-       "    \"prompt_tokens\": 38,\n",
+       "    \"completion_tokens\": 3,\n",
+       "    \"prompt_tokens\": 39,\n",
        "    \"total_tokens\": 42\n",
        "  }\n",
        "}"
       ]
      },
-     "execution_count": 3,
+     "execution_count": 2,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -142,7 +144,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [
     {
@@ -151,7 +153,7 @@
        "'Orange who?'"
       ]
      },
-     "execution_count": 4,
+     "execution_count": 3,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -172,14 +174,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Ahoy matey! Asynchronous programming be like havin' a crew o' pirates workin' on different tasks at the same time. Ye see, instead o' waitin' for one task to be completed before startin' the next, ye can assign tasks to yer crew and let 'em work on 'em simultaneously. This way, ye can get more done in less time and keep yer ship sailin' smoothly. It be like havin' a lookout keepin' watch while the cook be preparin' the next meal and the navigator be plottin' the course. Each pirate be doin' their own thing, but all workin' together to keep the ship runnin' smoothly. Arrr, that be asynchronous programming in a pirate's tongue!\n"
+      "Ahoy matey! Let me tell ye about asynchronous programming, arrr! It be like havin' a crew of sailors workin' on different tasks at the same time. Each sailor be doin' their own job, but they don't wait for the others to finish before movin' on to the next task. They be workin' independently, but still makin' progress towards the same goal.\n",
+      "\n",
+      "In programming, it be the same. Instead of waitin' for one task to finish before startin' the next, we can have multiple tasks runnin' at the same time. This be especially useful when we be dealin' with slow or unpredictable tasks, like fetchin' data from a server or readin' from a file. We don't want our program to be stuck waitin' for these tasks to finish, so we can use asynchronous programming to keep things movin' along.\n",
+      "\n",
+      "So, me hearty, asynchronous programming be like havin' a crew of sailors workin' independently towards the same goal. It be a powerful tool in the programmer's arsenal, and one that can help us build faster and more efficient programs. Arrr!\n"
      ]
     }
    ],
@@ -199,24 +205,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\n",
-      "\n",
       "Ahoy mateys! Let me tell ye about asynchronous programming, arrr! \n",
       "\n",
-      "Ye see, in the world of programming, sometimes we need to wait for certain tasks to be completed before moving on to the next one. But with asynchronous programming, we can keep the ship sailing while we wait for those tasks to finish. \n",
+      "Ye see, in the world of programming, sometimes we need to wait for things to happen before we can move on to the next task. But with asynchronous programming, we can keep working on other tasks while we wait for those things to happen. \n",
       "\n",
-      "It's like having a crewmate scrubbing the deck while another is hoisting the sails. They're both working at the same time, but on different tasks. \n",
+      "It's like when we're sailing the high seas and we need to wait for the wind to change direction. We don't just sit there twiddling our thumbs, no sir! We keep busy with other tasks like repairing the ship or checking the maps. \n",
       "\n",
-      "In programming, we use something called callbacks or promises to keep track of these tasks. So while one task is waiting for a response from the server, the rest of the code can keep running. \n",
+      "In programming, we use something called callbacks or promises to keep track of those things we're waiting for. And while we wait for those callbacks or promises to be fulfilled, we can keep working on other parts of our code. \n",
       "\n",
-      "It's a bit like navigating through a stormy sea. We need to be able to adjust our course and keep moving forward, even when we hit rough waters. And with asynchronous programming, we can do just that, me hearties!\n"
+      "So, me hearties, asynchronous programming is like being a pirate on the high seas - always busy with tasks and never wasting a moment! Arrr!\n"
      ]
     }
    ],
@@ -252,25 +256,25 @@
     "\n",
     "The system message can be used to prime the assistant with different personalities or behaviors.\n",
     "\n",
-    "However, the model does not generally pay as much attention to the system message, and therefore we recommend placing important instructions in the user message instead."
+    "Be aware that `gpt-3.5-turbo-0301` does not generally pay as much attention to the system message as `gpt-4-0314`. Therefore, for `gpt-3.5-turbo-0301`, we recommend placing important instructions in the user message instead. Some developers have found success in continually moving the system message near the end of the conversation to keep the model's attention from drifting away as conversations get longer."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Sure! Fractions are a way of representing a part of a whole. The top number of a fraction is called the numerator, and it represents how many parts you have. The bottom number is called the denominator, and it represents how many parts make up the whole. \n",
+      "Sure! Fractions are a way of representing a part of a whole. The top number of a fraction is called the numerator, and it represents how many parts of the whole we are talking about. The bottom number is called the denominator, and it represents how many equal parts the whole is divided into.\n",
       "\n",
-      "For example, if you have a pizza that is cut into 8 equal slices, and you have eaten 3 of those slices, you can represent that as a fraction: 3/8. The numerator is 3 because you have eaten 3 slices, and the denominator is 8 because there are 8 slices in total.\n",
+      "For example, if we have a pizza that is divided into 8 equal slices, and we have eaten 3 of those slices, we can represent that as the fraction 3/8. The numerator is 3 because we have eaten 3 slices, and the denominator is 8 because the pizza is divided into 8 slices.\n",
       "\n",
-      "To add or subtract fractions, you need to have a common denominator. This means that the bottom numbers of the fractions need to be the same. For example, if you want to add 1/4 and 2/3, you need to find a common denominator. One way to do this is to multiply the denominators together: 4 x 3 = 12. Then, you need to convert each fraction so that the denominator is 12. To do this, you can multiply the numerator and denominator of each fraction by the same number. For example, to convert 1/4 to have a denominator of 12, you can multiply both the numerator and denominator by 3: 1/4 x 3/3 = 3/12. To convert 2/3 to have a denominator of 12, you can multiply both the numerator and denominator by 4: 2/3 x 4/4 = 8/12. Now that both fractions have a denominator of 12, you can add them together: 3/12 + 8/12 = 11/12.\n",
+      "To add or subtract fractions, we need to have a common denominator. This means that we need to find a number that both denominators can divide into evenly. For example, if we want to add 1/4 and 2/3, we need to find a common denominator. We can do this by multiplying the denominators together, which gives us 12. Then, we can convert both fractions to have a denominator of 12. To do this, we multiply the numerator and denominator of 1/4 by 3, which gives us 3/12. We multiply the numerator and denominator of 2/3 by 4, which gives us 8/12. Now we can add the two fractions together, which gives us 11/12.\n",
       "\n",
-      "Do you have any questions about fractions?\n"
+      "Does that make sense? Do you have any questions?\n"
      ]
     }
    ],
@@ -290,7 +294,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [
     {
@@ -331,14 +335,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "We don't have enough time to complete everything perfectly for the client.\n"
+      "We don't have enough time to complete the entire project perfectly.\n"
      ]
     }
    ],
@@ -367,14 +371,14 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "To help clarify that the example messages are not part of a real conversation, and shouldn't be referred back to by the model, you can instead set the `name` field of `system` messages to `example_user` and `example_assistant`.\n",
+    "To help clarify that the example messages are not part of a real conversation, and shouldn't be referred back to by the model, you can try setting the `name` field of `system` messages to `example_user` and `example_assistant`.\n",
     "\n",
     "Transforming the few-shot example above, we could write:"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [
     {
@@ -429,14 +433,18 @@
     "The number of tokens used affects:\n",
     "- the cost of the request\n",
     "- the time it takes to generate the response\n",
-    "- when the reply gets cut off from hitting the maximum token limit (4096 for `gpt-3.5-turbo`)\n",
+    "- when the reply gets cut off from hitting the maximum token limit (4,096 for `gpt-3.5-turbo` or 8,192 for `gpt-4`)\n",
     "\n",
-    "You can use the following function to count the number of tokens that a list of messages will use."
+    "You can use the following function to count the number of tokens that a list of messages will use.\n",
+    "\n",
+    "Note that the exact way that messages are converted into tokens may change from model to model, and may even change over time for the same model. Therefore, the counts returned by the function below should be considered an estimate, not a guarantee.\n",
+    "\n",
+    "Read more about counting tokens in [How to count tokens with tiktoken](How_to_count_tokens_with_tiktoken.ipynb)."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -457,7 +465,7 @@
     "        print(\"Warning: gpt-4 may change over time. Returning num tokens assuming gpt-4-0314.\")\n",
     "        return num_tokens_from_messages(messages, model=\"gpt-4-0314\")\n",
     "    elif model == \"gpt-3.5-turbo-0301\":\n",
-    "        tokens_per_message = 4  # every message follows <im_start>{role/name}\\n{content}<im_end>\\n\n",
+    "        tokens_per_message = 4  # every message follows <|start|>{role/name}\\n{content}<|end|>\\n\n",
     "        tokens_per_name = -1  # if there's a name, the role is omitted\n",
     "    elif model == \"gpt-4-0314\":\n",
     "        tokens_per_message = 3\n",
@@ -471,13 +479,13 @@
     "            num_tokens += len(encoding.encode(value))\n",
     "            if key == \"name\":\n",
     "                num_tokens += tokens_per_name\n",
-    "    num_tokens += 2  # every reply is primed with <im_start>assistant\n",
+    "    num_tokens += 3  # every reply is primed with <|start|>assistant<|message|>\n",
     "    return num_tokens\n"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [
     {
@@ -485,12 +493,12 @@
      "output_type": "stream",
      "text": [
       "gpt-3.5-turbo-0301\n",
-      "126 prompt tokens counted by num_tokens_from_messages().\n",
-      "126 prompt tokens counted by the OpenAI API.\n",
+      "127 prompt tokens counted by num_tokens_from_messages().\n",
+      "127 prompt tokens counted by the OpenAI API.\n",
       "\n",
       "gpt-4-0314\n",
-      "128 prompt tokens counted by num_tokens_from_messages().\n",
-      "128 prompt tokens counted by the OpenAI API.\n",
+      "129 prompt tokens counted by num_tokens_from_messages().\n",
+      "129 prompt tokens counted by the OpenAI API.\n",
       "\n"
      ]
     }