Fix styling for using_logprobs cookbook (#947)

pull/948/head
Shyamal H Anadkat 5 months ago committed by GitHub
parent 1abc529895
commit 228cde1c46
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -41,22 +41,22 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 264,
"metadata": {},
"outputs": [],
"source": [
"from openai import OpenAI\n",
"from math import exp\n",
"import numpy as np\n",
"from colorama import init, Fore\n",
"from IPython.display import display, HTML\n",
"\n",
"\n",
"client = OpenAI()\n"
"client = OpenAI()"
]
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 265,
"metadata": {},
"outputs": [],
"source": [
@ -85,7 +85,7 @@
" params[\"tools\"] = tools\n",
"\n",
" completion = client.chat.completions.create(**params)\n",
" return completion\n"
" return completion"
]
},
{
@ -113,7 +113,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 266,
"metadata": {},
"outputs": [],
"source": [
@ -133,7 +133,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 267,
"metadata": {},
"outputs": [],
"source": [
@ -146,7 +146,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 268,
"metadata": {},
"outputs": [
{
@ -175,7 +175,7 @@
" [{\"role\": \"user\", \"content\": CLASSIFICATION_PROMPT.format(headline=headline)}],\n",
" model=\"gpt-4\",\n",
" )\n",
" print(f\"Category: {API_RESPONSE.choices[0].message.content}\\n\")\n"
" print(f\"Category: {API_RESPONSE.choices[0].message.content}\\n\")"
]
},
{
@ -187,7 +187,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 269,
"metadata": {},
"outputs": [
{
@ -195,21 +195,69 @@
"output_type": "stream",
"text": [
"\n",
"Headline: Tech Giant Unveils Latest Smartphone Model with Advanced Photo-Editing Features.\n",
"\u001b[36mOutput token 1:\u001b[39m Technology, \u001b[33mlogprobs:\u001b[39m -1.9816675e-06, \u001b[35mlinear probability:\u001b[39m 100.0%\n",
"\u001b[36mOutput token 2:\u001b[39m Techn, \u001b[33mlogprobs:\u001b[39m -14.062502, \u001b[35mlinear probability:\u001b[39m 0.0%\n",
"Headline: Tech Giant Unveils Latest Smartphone Model with Advanced Photo-Editing Features.\n"
]
},
{
"data": {
"text/html": [
"<span style='color: cyan'>Output token 1:</span> Technology, <span style='color: darkorange'>logprobs:</span> -2.4584822e-06, <span style='color: magenta'>linear probability:</span> 100.0%<br><span style='color: cyan'>Output token 2:</span> Techn, <span style='color: darkorange'>logprobs:</span> -13.781253, <span style='color: magenta'>linear probability:</span> 0.0%<br>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"\n",
"\n",
"Headline: Local Mayor Launches Initiative to Enhance Urban Public Transport.\n",
"\u001b[36mOutput token 1:\u001b[39m Politics, \u001b[33mlogprobs:\u001b[39m -3.650519e-06, \u001b[35mlinear probability:\u001b[39m 100.0%\n",
"\u001b[36mOutput token 2:\u001b[39m Technology, \u001b[33mlogprobs:\u001b[39m -13.015629, \u001b[35mlinear probability:\u001b[39m 0.0%\n",
"Headline: Local Mayor Launches Initiative to Enhance Urban Public Transport.\n"
]
},
{
"data": {
"text/html": [
"<span style='color: cyan'>Output token 1:</span> Politics, <span style='color: darkorange'>logprobs:</span> -2.4584822e-06, <span style='color: magenta'>linear probability:</span> 100.0%<br><span style='color: cyan'>Output token 2:</span> Technology, <span style='color: darkorange'>logprobs:</span> -13.937503, <span style='color: magenta'>linear probability:</span> 0.0%<br>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"\n",
"\n",
"Headline: Tennis Champion Showcases Hidden Talents in Symphony Orchestra Debut\n",
"\u001b[36mOutput token 1:\u001b[39m Art, \u001b[33mlogprobs:\u001b[39m -0.19579042, \u001b[35mlinear probability:\u001b[39m 82.22%\n",
"\u001b[36mOutput token 2:\u001b[39m Sports, \u001b[33mlogprobs:\u001b[39m -1.7270404, \u001b[35mlinear probability:\u001b[39m 17.78%\n",
"Headline: Tennis Champion Showcases Hidden Talents in Symphony Orchestra Debut\n"
]
},
{
"data": {
"text/html": [
"<span style='color: cyan'>Output token 1:</span> Art, <span style='color: darkorange'>logprobs:</span> -0.009169078, <span style='color: magenta'>linear probability:</span> 99.09%<br><span style='color: cyan'>Output token 2:</span> Sports, <span style='color: darkorange'>logprobs:</span> -4.696669, <span style='color: magenta'>linear probability:</span> 0.91%<br>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"\n"
]
@ -225,13 +273,15 @@
" top_logprobs=2,\n",
" )\n",
" top_two_logprobs = API_RESPONSE.choices[0].logprobs.content[0].top_logprobs\n",
" html_content = \"\"\n",
" for i, logprob in enumerate(top_two_logprobs, start=1):\n",
" print(\n",
" f\"{Fore.CYAN}Output token {i}:{Fore.RESET} {logprob.token}, \"\n",
" f\"{Fore.YELLOW}logprobs:{Fore.RESET} {logprob.logprob}, \"\n",
" f\"{Fore.MAGENTA}linear probability:{Fore.RESET} {np.round(np.exp(logprob.logprob)*100,2)}%\"\n",
" html_content += (\n",
" f\"<span style='color: cyan'>Output token {i}:</span> {logprob.token}, \"\n",
" f\"<span style='color: darkorange'>logprobs:</span> {logprob.logprob}, \"\n",
" f\"<span style='color: magenta'>linear probability:</span> {np.round(np.exp(logprob.logprob)*100,2)}%<br>\"\n",
" )\n",
" print(\"\\n\")\n"
" display(HTML(html_content))\n",
" print(\"\\n\")"
]
},
{
@ -266,7 +316,7 @@
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": 270,
"metadata": {},
"outputs": [],
"source": [
@ -289,7 +339,7 @@
"medium_questions = [\n",
" \"Did Lovelace collaborate with Charles Dickens\",\n",
" \"What concepts did Lovelace build with Charles Babbage\",\n",
"]\n"
"]"
]
},
{
@ -301,7 +351,7 @@
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": 271,
"metadata": {},
"outputs": [],
"source": [
@ -314,36 +364,25 @@
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": 272,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Questions clearly answered in article\n",
"\n",
"\u001b[32mQuestion:\u001b[39m What nationality was Ada Lovelace?\n",
"\u001b[36mhas_sufficient_context_for_answer:\u001b[39m True, \u001b[33mlogprobs:\u001b[39m -3.1281633e-07, \u001b[35mlinear probability:\u001b[39m 100.0% \n",
"\n",
"\u001b[32mQuestion:\u001b[39m What was an important finding from Lovelace's seventh note?\n",
"\u001b[36mhas_sufficient_context_for_answer:\u001b[39m True, \u001b[33mlogprobs:\u001b[39m -6.704273e-07, \u001b[35mlinear probability:\u001b[39m 100.0% \n",
"\n",
"\n",
"\n",
"Questions only partially covered in the article\n",
"\n",
"\u001b[32mQuestion:\u001b[39m Did Lovelace collaborate with Charles Dickens\n",
"\u001b[36mhas_sufficient_context_for_answer:\u001b[39m True, \u001b[33mlogprobs:\u001b[39m -0.07655343, \u001b[35mlinear probability:\u001b[39m 92.63% \n",
"\n",
"\u001b[32mQuestion:\u001b[39m What concepts did Lovelace build with Charles Babbage\n",
"\u001b[36mhas_sufficient_context_for_answer:\u001b[39m True, \u001b[33mlogprobs:\u001b[39m -0.0699371, \u001b[35mlinear probability:\u001b[39m 93.25% \n",
"\n"
]
"data": {
"text/html": [
"Questions clearly answered in article<p style=\"color:green\">Question: What nationality was Ada Lovelace?</p><p style=\"color:cyan\">has_sufficient_context_for_answer: True, <span style=\"color:darkorange\">logprobs: -3.1281633e-07, <span style=\"color:magenta\">linear probability: 100.0%</span></p><p style=\"color:green\">Question: What was an important finding from Lovelace's seventh note?</p><p style=\"color:cyan\">has_sufficient_context_for_answer: True, <span style=\"color:darkorange\">logprobs: -7.89631e-07, <span style=\"color:magenta\">linear probability: 100.0%</span></p>Questions only partially covered in the article<p style=\"color:green\">Question: Did Lovelace collaborate with Charles Dickens</p><p style=\"color:cyan\">has_sufficient_context_for_answer: True, <span style=\"color:darkorange\">logprobs: -0.06993677, <span style=\"color:magenta\">linear probability: 93.25%</span></p><p style=\"color:green\">Question: What concepts did Lovelace build with Charles Babbage</p><p style=\"color:cyan\">has_sufficient_context_for_answer: False, <span style=\"color:darkorange\">logprobs: -0.61807257, <span style=\"color:magenta\">linear probability: 53.9%</span></p>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"print(\"Questions clearly answered in article\" + \"\\n\")\n",
"html_output = \"\"\n",
"html_output += \"Questions clearly answered in article\"\n",
"\n",
"for question in easy_questions:\n",
" API_RESPONSE = get_completion(\n",
@ -358,14 +397,11 @@
" model=\"gpt-4\",\n",
" logprobs=True,\n",
" )\n",
" print(Fore.GREEN + \"Question:\" + Fore.RESET, question)\n",
" html_output += f'<p style=\"color:green\">Question: {question}</p>'\n",
" for logprob in API_RESPONSE.choices[0].logprobs.content:\n",
" print(\n",
" Fore.CYAN + \"has_sufficient_context_for_answer:\" + Fore.RESET + f\" {logprob.token}, \" + Fore.YELLOW + \"logprobs:\" + Fore.RESET + f\" {logprob.logprob}, \" + Fore.MAGENTA + \"linear probability:\" + Fore.RESET + f\" {np.round(np.exp(logprob.logprob)*100,2)}%\",\n",
" \"\\n\",\n",
" )\n",
" html_output += f'<p style=\"color:cyan\">has_sufficient_context_for_answer: {logprob.token}, <span style=\"color:darkorange\">logprobs: {logprob.logprob}, <span style=\"color:magenta\">linear probability: {np.round(np.exp(logprob.logprob)*100,2)}%</span></p>'\n",
"\n",
"print(\"\\n\\n\" + \"Questions only partially covered in the article\" + \"\\n\")\n",
"html_output += \"Questions only partially covered in the article\"\n",
"\n",
"for question in medium_questions:\n",
" API_RESPONSE = get_completion(\n",
@ -381,12 +417,11 @@
" logprobs=True,\n",
" top_logprobs=3,\n",
" )\n",
" print(Fore.GREEN + \"Question:\" + Fore.RESET, question)\n",
" html_output += f'<p style=\"color:green\">Question: {question}</p>'\n",
" for logprob in API_RESPONSE.choices[0].logprobs.content:\n",
" print(\n",
" Fore.CYAN + \"has_sufficient_context_for_answer:\" + Fore.RESET + f\" {logprob.token}, \" + Fore.YELLOW + \"logprobs:\" + Fore.RESET + f\" {logprob.logprob}, \" + Fore.MAGENTA + \"linear probability:\" + Fore.RESET + f\" {np.round(np.exp(logprob.logprob)*100,2)}%\",\n",
" \"\\n\",\n",
" )\n"
" html_output += f'<p style=\"color:cyan\">has_sufficient_context_for_answer: {logprob.token}, <span style=\"color:darkorange\">logprobs: {logprob.logprob}, <span style=\"color:magenta\">linear probability: {np.round(np.exp(logprob.logprob)*100,2)}%</span></p>'\n",
"\n",
"display(HTML(html_output))"
]
},
{
@ -421,7 +456,7 @@
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": 273,
"metadata": {},
"outputs": [],
"source": [
@ -433,7 +468,7 @@
" \"My least favorite TV show\",\n",
" \"My least favorite TV show is\",\n",
" \"My least favorite TV show is Breaking Bad\",\n",
"]\n"
"]"
]
},
{
@ -445,61 +480,26 @@
},
{
"cell_type": "code",
"execution_count": 17,
"execution_count": 274,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[36mSentence:\u001b[39m My\n",
"\u001b[36mPredicted next token:\u001b[39m favorite, \u001b[33mlogprobs:\u001b[39m -0.18245785, \u001b[35mlinear probability:\u001b[39m 83.32%\n",
"\u001b[36mPredicted next token:\u001b[39m dog, \u001b[33mlogprobs:\u001b[39m -2.397172, \u001b[35mlinear probability:\u001b[39m 9.1%\n",
"\u001b[36mPredicted next token:\u001b[39m ap, \u001b[33mlogprobs:\u001b[39m -3.8732424, \u001b[35mlinear probability:\u001b[39m 2.08%\n",
"\n",
"\n",
"\u001b[36mSentence:\u001b[39m My least\n",
"\u001b[36mPredicted next token:\u001b[39m favorite, \u001b[33mlogprobs:\u001b[39m -0.01722952, \u001b[35mlinear probability:\u001b[39m 98.29%\n",
"\u001b[36mPredicted next token:\u001b[39m My, \u001b[33mlogprobs:\u001b[39m -4.079079, \u001b[35mlinear probability:\u001b[39m 1.69%\n",
"\u001b[36mPredicted next token:\u001b[39m favorite, \u001b[33mlogprobs:\u001b[39m -9.6813755, \u001b[35mlinear probability:\u001b[39m 0.01%\n",
"\n",
"\n",
"\u001b[36mSentence:\u001b[39m My least favorite\n",
"\u001b[36mPredicted next token:\u001b[39m food, \u001b[33mlogprobs:\u001b[39m -0.9481721, \u001b[35mlinear probability:\u001b[39m 38.74%\n",
"\u001b[36mPredicted next token:\u001b[39m My, \u001b[33mlogprobs:\u001b[39m -1.3447137, \u001b[35mlinear probability:\u001b[39m 26.06%\n",
"\u001b[36mPredicted next token:\u001b[39m color, \u001b[33mlogprobs:\u001b[39m -1.3887696, \u001b[35mlinear probability:\u001b[39m 24.94%\n",
"\n",
"\n",
"\u001b[36mSentence:\u001b[39m My least favorite TV\n",
"\u001b[36mPredicted next token:\u001b[39m show, \u001b[33mlogprobs:\u001b[39m -0.0007898556, \u001b[35mlinear probability:\u001b[39m 99.92%\n",
"\u001b[36mPredicted next token:\u001b[39m My, \u001b[33mlogprobs:\u001b[39m -7.711523, \u001b[35mlinear probability:\u001b[39m 0.04%\n",
"\u001b[36mPredicted next token:\u001b[39m series, \u001b[33mlogprobs:\u001b[39m -9.348547, \u001b[35mlinear probability:\u001b[39m 0.01%\n",
"\n",
"\n",
"\u001b[36mSentence:\u001b[39m My least favorite TV show\n",
"\u001b[36mPredicted next token:\u001b[39m is, \u001b[33mlogprobs:\u001b[39m -0.18602066, \u001b[35mlinear probability:\u001b[39m 83.03%\n",
"\u001b[36mPredicted next token:\u001b[39m of, \u001b[33mlogprobs:\u001b[39m -2.0780265, \u001b[35mlinear probability:\u001b[39m 12.52%\n",
"\u001b[36mPredicted next token:\u001b[39m My, \u001b[33mlogprobs:\u001b[39m -3.271426, \u001b[35mlinear probability:\u001b[39m 3.8%\n",
"\n",
"\n",
"\u001b[36mSentence:\u001b[39m My least favorite TV show is\n",
"\u001b[36mPredicted next token:\u001b[39m \"My, \u001b[33mlogprobs:\u001b[39m -0.77423567, \u001b[35mlinear probability:\u001b[39m 46.11%\n",
"\u001b[36mPredicted next token:\u001b[39m \"The, \u001b[33mlogprobs:\u001b[39m -1.2854586, \u001b[35mlinear probability:\u001b[39m 27.65%\n",
"\u001b[36mPredicted next token:\u001b[39m My, \u001b[33mlogprobs:\u001b[39m -2.2629042, \u001b[35mlinear probability:\u001b[39m 10.4%\n",
"\n",
"\n",
"\u001b[36mSentence:\u001b[39m My least favorite TV show is Breaking Bad\n",
"\u001b[36mPredicted next token:\u001b[39m because, \u001b[33mlogprobs:\u001b[39m -0.16519119, \u001b[35mlinear probability:\u001b[39m 84.77%\n",
"\u001b[36mPredicted next token:\u001b[39m ,, \u001b[33mlogprobs:\u001b[39m -2.430881, \u001b[35mlinear probability:\u001b[39m 8.8%\n",
"\u001b[36mPredicted next token:\u001b[39m ., \u001b[33mlogprobs:\u001b[39m -3.2097907, \u001b[35mlinear probability:\u001b[39m 4.04%\n",
"\n",
"\n"
]
"data": {
"text/html": [
"<p>Sentence: My</p><p style=\"color:cyan\">Predicted next token: favorite, <span style=\"color:darkorange\">logprobs: -0.18245785, <span style=\"color:magenta\">linear probability: 83.32%</span></p><p style=\"color:cyan\">Predicted next token: dog, <span style=\"color:darkorange\">logprobs: -2.397172, <span style=\"color:magenta\">linear probability: 9.1%</span></p><p style=\"color:cyan\">Predicted next token: ap, <span style=\"color:darkorange\">logprobs: -3.8732424, <span style=\"color:magenta\">linear probability: 2.08%</span></p><br><p>Sentence: My least</p><p style=\"color:cyan\">Predicted next token: favorite, <span style=\"color:darkorange\">logprobs: -0.0146376295, <span style=\"color:magenta\">linear probability: 98.55%</span></p><p style=\"color:cyan\">Predicted next token: My, <span style=\"color:darkorange\">logprobs: -4.2417912, <span style=\"color:magenta\">linear probability: 1.44%</span></p><p style=\"color:cyan\">Predicted next token: favorite, <span style=\"color:darkorange\">logprobs: -9.748788, <span style=\"color:magenta\">linear probability: 0.01%</span></p><br><p>Sentence: My least favorite</p><p style=\"color:cyan\">Predicted next token: food, <span style=\"color:darkorange\">logprobs: -0.9481721, <span style=\"color:magenta\">linear probability: 38.74%</span></p><p style=\"color:cyan\">Predicted next token: My, <span style=\"color:darkorange\">logprobs: -1.3447137, <span style=\"color:magenta\">linear probability: 26.06%</span></p><p style=\"color:cyan\">Predicted next token: color, <span style=\"color:darkorange\">logprobs: -1.3887696, <span style=\"color:magenta\">linear probability: 24.94%</span></p><br><p>Sentence: My least favorite TV</p><p style=\"color:cyan\">Predicted next token: show, <span style=\"color:darkorange\">logprobs: -0.0007898556, <span style=\"color:magenta\">linear probability: 99.92%</span></p><p style=\"color:cyan\">Predicted next token: My, <span style=\"color:darkorange\">logprobs: -7.711523, <span style=\"color:magenta\">linear probability: 0.04%</span></p><p style=\"color:cyan\">Predicted next token: series, <span style=\"color:darkorange\">logprobs: -9.348547, <span style=\"color:magenta\">linear probability: 0.01%</span></p><br><p>Sentence: My least favorite TV show</p><p style=\"color:cyan\">Predicted next token: is, <span style=\"color:darkorange\">logprobs: -0.2851253, <span style=\"color:magenta\">linear probability: 75.19%</span></p><p style=\"color:cyan\">Predicted next token: of, <span style=\"color:darkorange\">logprobs: -1.55335, <span style=\"color:magenta\">linear probability: 21.15%</span></p><p style=\"color:cyan\">Predicted next token: My, <span style=\"color:darkorange\">logprobs: -3.4928775, <span style=\"color:magenta\">linear probability: 3.04%</span></p><br><p>Sentence: My least favorite TV show is</p><p style=\"color:cyan\">Predicted next token: \"My, <span style=\"color:darkorange\">logprobs: -0.69349754, <span style=\"color:magenta\">linear probability: 49.98%</span></p><p style=\"color:cyan\">Predicted next token: \"The, <span style=\"color:darkorange\">logprobs: -1.2899293, <span style=\"color:magenta\">linear probability: 27.53%</span></p><p style=\"color:cyan\">Predicted next token: My, <span style=\"color:darkorange\">logprobs: -2.4170141, <span style=\"color:magenta\">linear probability: 8.92%</span></p><br><p>Sentence: My least favorite TV show is Breaking Bad</p><p style=\"color:cyan\">Predicted next token: because, <span style=\"color:darkorange\">logprobs: -0.17786823, <span style=\"color:magenta\">linear probability: 83.71%</span></p><p style=\"color:cyan\">Predicted next token: ,, <span style=\"color:darkorange\">logprobs: -2.3946173, <span style=\"color:magenta\">linear probability: 9.12%</span></p><p style=\"color:cyan\">Predicted next token: ., <span style=\"color:darkorange\">logprobs: -3.1861975, <span style=\"color:magenta\">linear probability: 4.13%</span></p><br>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"high_prob_completions = {}\n",
"low_prob_completions = {}\n",
"html_output = \"\"\n",
"\n",
"for sentence in sentence_list:\n",
" PROMPT = \"\"\"Complete this sentence. You are acting as auto-complete. Simply complete the sentence to the best of your ability, make sure it is just ONE sentence: {sentence}\"\"\"\n",
@ -509,19 +509,19 @@
" logprobs=True,\n",
" top_logprobs=3,\n",
" )\n",
" print(Fore.CYAN + \"Sentence:\" + Fore.RESET, sentence)\n",
" html_output += f'<p>Sentence: {sentence}</p>'\n",
" first_token = True\n",
" for token in API_RESPONSE.choices[0].logprobs.content[0].top_logprobs:\n",
" print(\n",
" Fore.CYAN + \"Predicted next token:\" + Fore.RESET + f\" {token.token}, \" + Fore.YELLOW + \"logprobs:\" + Fore.RESET + f\" {token.logprob}, \" + Fore.MAGENTA + \"linear probability:\" + Fore.RESET + f\" {np.round(np.exp(token.logprob)*100,2)}%\"\n",
" )\n",
" html_output += f'<p style=\"color:cyan\">Predicted next token: {token.token}, <span style=\"color:darkorange\">logprobs: {token.logprob}, <span style=\"color:magenta\">linear probability: {np.round(np.exp(token.logprob)*100,2)}%</span></p>'\n",
" if first_token:\n",
" if np.exp(token.logprob) > 0.95:\n",
" high_prob_completions[sentence] = token.token\n",
" if np.exp(token.logprob) < 0.60:\n",
" low_prob_completions[sentence] = token.token\n",
" first_token = False\n",
" print(\"\\n\")\n"
" html_output += \"<br>\"\n",
"\n",
"display(HTML(html_output))"
]
},
{
@ -533,7 +533,7 @@
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": 275,
"metadata": {},
"outputs": [
{
@ -542,7 +542,7 @@
"{'My least': 'favorite', 'My least favorite TV': 'show'}"
]
},
"execution_count": 18,
"execution_count": 275,
"metadata": {},
"output_type": "execute_result"
}
@ -560,7 +560,7 @@
},
{
"cell_type": "code",
"execution_count": 19,
"execution_count": 276,
"metadata": {},
"outputs": [
{
@ -569,7 +569,7 @@
"{'My least favorite': 'food', 'My least favorite TV show is': '\"My'}"
]
},
"execution_count": 19,
"execution_count": 276,
"metadata": {},
"output_type": "execute_result"
}
@ -602,7 +602,7 @@
},
{
"cell_type": "code",
"execution_count": 20,
"execution_count": 277,
"metadata": {},
"outputs": [],
"source": [
@ -612,39 +612,52 @@
" [{\"role\": \"user\", \"content\": PROMPT}], model=\"gpt-4\", logprobs=True, top_logprobs=5\n",
")\n",
"\n",
"\n",
"def highlight_text(api_response):\n",
" colors = [\n",
" Fore.MAGENTA,\n",
" Fore.GREEN,\n",
" Fore.YELLOW,\n",
" Fore.RED,\n",
" Fore.BLUE,\n",
" \"#FF00FF\", # Magenta\n",
" \"#008000\", # Green\n",
" \"#FF8C00\", # Dark Orange\n",
" \"#FF0000\", # Red\n",
" \"#0000FF\", # Blue\n",
" ]\n",
" reset_color = Fore.RESET\n",
" tokens = api_response.choices[0].logprobs.content\n",
"\n",
" color_idx = 0 # Initialize color index\n",
" html_output = \"\" # Initialize HTML output\n",
" for t in tokens:\n",
" token_str = bytes(t.bytes).decode(\"utf-8\") # Decode bytes to string\n",
"\n",
" print(f\"{colors[color_idx]}{token_str}{reset_color}\", end=\"\")\n",
" # Add colored token to HTML output\n",
" html_output += f\"<span style='color: {colors[color_idx]}'>{token_str}</span>\"\n",
"\n",
" # Move to the next color\n",
" color_idx = (color_idx + 1) % len(colors)\n",
" print()\n",
" print(f\"Total number of tokens: {len(tokens)}\")\n"
" display(HTML(html_output)) # Display HTML output\n",
" print(f\"Total number of tokens: {len(tokens)}\")"
]
},
{
"cell_type": "code",
"execution_count": 21,
"execution_count": 278,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<span style='color: #FF00FF'>The</span><span style='color: #008000'> longest</span><span style='color: #FF8C00'> word</span><span style='color: #FF0000'> in</span><span style='color: #0000FF'> the</span><span style='color: #FF00FF'> English</span><span style='color: #008000'> language</span><span style='color: #FF8C00'>,</span><span style='color: #FF0000'> according</span><span style='color: #0000FF'> to</span><span style='color: #FF00FF'> the</span><span style='color: #008000'> Guinness</span><span style='color: #FF8C00'> World</span><span style='color: #FF0000'> Records</span><span style='color: #0000FF'>,</span><span style='color: #FF00FF'> is</span><span style='color: #008000'> '</span><span style='color: #FF8C00'>p</span><span style='color: #FF0000'>ne</span><span style='color: #0000FF'>um</span><span style='color: #FF00FF'>on</span><span style='color: #008000'>oul</span><span style='color: #FF8C00'>tram</span><span style='color: #FF0000'>icro</span><span style='color: #0000FF'>sc</span><span style='color: #FF00FF'>op</span><span style='color: #008000'>ics</span><span style='color: #FF8C00'>il</span><span style='color: #FF0000'>ic</span><span style='color: #0000FF'>ov</span><span style='color: #FF00FF'>ol</span><span style='color: #008000'>cano</span><span style='color: #FF8C00'>con</span><span style='color: #FF0000'>iosis</span><span style='color: #0000FF'>'.</span><span style='color: #FF00FF'> It</span><span style='color: #008000'> is</span><span style='color: #FF8C00'> a</span><span style='color: #FF0000'> type</span><span style='color: #0000FF'> of</span><span style='color: #FF00FF'> lung</span><span style='color: #008000'> disease</span><span style='color: #FF8C00'> caused</span><span style='color: #FF0000'> by</span><span style='color: #0000FF'> inh</span><span style='color: #FF00FF'>aling</span><span style='color: #008000'> ash</span><span style='color: #FF8C00'> and</span><span style='color: #FF0000'> sand</span><span style='color: #0000FF'> dust</span><span style='color: #FF00FF'>.</span>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[35mThe\u001b[39m\u001b[32m longest\u001b[39m\u001b[33m word\u001b[39m\u001b[31m in\u001b[39m\u001b[34m the\u001b[39m\u001b[35m English\u001b[39m\u001b[32m language\u001b[39m\u001b[33m,\u001b[39m\u001b[31m according\u001b[39m\u001b[34m to\u001b[39m\u001b[35m the\u001b[39m\u001b[32m Guinness\u001b[39m\u001b[33m World\u001b[39m\u001b[31m Records\u001b[39m\u001b[34m,\u001b[39m\u001b[35m is\u001b[39m\u001b[32m '\u001b[39m\u001b[33mp\u001b[39m\u001b[31mne\u001b[39m\u001b[34mum\u001b[39m\u001b[35mon\u001b[39m\u001b[32moul\u001b[39m\u001b[33mtram\u001b[39m\u001b[31micro\u001b[39m\u001b[34msc\u001b[39m\u001b[35mop\u001b[39m\u001b[32mics\u001b[39m\u001b[33mil\u001b[39m\u001b[31mic\u001b[39m\u001b[34mov\u001b[39m\u001b[35mol\u001b[39m\u001b[32mcano\u001b[39m\u001b[33mcon\u001b[39m\u001b[31miosis\u001b[39m\u001b[34m'.\u001b[39m\u001b[35m It\u001b[39m\u001b[32m is\u001b[39m\u001b[33m a\u001b[39m\u001b[31m type\u001b[39m\u001b[34m of\u001b[39m\u001b[35m lung\u001b[39m\u001b[32m disease\u001b[39m\u001b[33m caused\u001b[39m\u001b[31m by\u001b[39m\u001b[34m inh\u001b[39m\u001b[35maling\u001b[39m\u001b[32m ash\u001b[39m\u001b[33m and\u001b[39m\u001b[31m sand\u001b[39m\u001b[34m dust\u001b[39m\u001b[35m.\u001b[39m\n",
"Total number of tokens: 51\n"
]
}
@ -662,7 +675,7 @@
},
{
"cell_type": "code",
"execution_count": 22,
"execution_count": 279,
"metadata": {},
"outputs": [
{
@ -680,23 +693,23 @@
"Bytes: [153] \n",
"\n",
"Token: -\n",
"Log prob: -0.011257432\n",
"Linear prob: 98.88 %\n",
"Log prob: -0.0096905725\n",
"Linear prob: 99.04 %\n",
"Bytes: [32, 45] \n",
"\n",
"Token: Blue\n",
"Log prob: -0.0004397287\n",
"Log prob: -0.00042042506\n",
"Linear prob: 99.96 %\n",
"Bytes: [32, 66, 108, 117, 101] \n",
"\n",
"Token: Heart\n",
"Log prob: -7.1954215e-05\n",
"Log prob: -7.302705e-05\n",
"Linear prob: 99.99 %\n",
"Bytes: [32, 72, 101, 97, 114, 116] \n",
"\n",
"Bytes array: [240, 159, 146, 153, 32, 45, 32, 66, 108, 117, 101, 32, 72, 101, 97, 114, 116]\n",
"Decoded bytes: 💙 - Blue Heart\n",
"Joint prob: 98.8 %\n"
"Joint prob: 98.96 %\n"
]
}
],
@ -727,7 +740,7 @@
"# Print the results\n",
"print(\"Bytes array:\", aggregated_bytes)\n",
"print(f\"Decoded bytes: {aggregated_text}\")\n",
"print(\"Joint prob:\", np.round(exp(joint_logprob) * 100, 2), \"%\")\n"
"print(\"Joint prob:\", np.round(exp(joint_logprob) * 100, 2), \"%\")"
]
},
{

Loading…
Cancel
Save