From 3c53f34e01fe6b2dd92c22bb4d213526bed790d7 Mon Sep 17 00:00:00 2001 From: dmitry-brazhenko Date: Tue, 28 Mar 2023 17:31:14 +0200 Subject: [PATCH] added sharptoken as exampel --- .../How_to_count_tokens_with_tiktoken.ipynb | 23 ++++--------------- 1 file changed, 4 insertions(+), 19 deletions(-) diff --git a/examples/How_to_count_tokens_with_tiktoken.ipynb b/examples/How_to_count_tokens_with_tiktoken.ipynb index 83d20314..3ef07ff4 100644 --- a/examples/How_to_count_tokens_with_tiktoken.ipynb +++ b/examples/How_to_count_tokens_with_tiktoken.ipynb @@ -1,7 +1,6 @@ { "cells": [ { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -35,8 +34,9 @@ "\n", "## Tokenizer libraries by language\n", "\n", - "For `cl100k_base` and `p50k_base` encodings, `tiktoken` is the only tokenizer available as of March 2023.\n", + "For `cl100k_base` and `p50k_base` encodings:\n", "- Python: [tiktoken](https://github.com/openai/tiktoken/blob/main/README.md)\n", + "- .NET / C#: [SharpToken](https://github.com/dmitry-brazhenko/SharpToken)\n", "\n", "For `r50k_base` (`gpt2`) encodings, tokenizers are available in many languages.\n", "- Python: [tiktoken](https://github.com/openai/tiktoken/blob/main/README.md) (or alternatively [GPT2TokenizerFast](https://huggingface.co/docs/transformers/model_doc/gpt2#transformers.GPT2TokenizerFast))\n", @@ -54,7 +54,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -88,7 +87,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -105,7 +103,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -126,7 +123,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -143,7 +139,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -152,7 +147,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -180,7 +174,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -221,7 +214,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -229,7 +221,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -257,7 +248,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -265,7 +255,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -293,7 +282,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -301,7 +289,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -424,7 +411,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -549,7 +535,7 @@ ], "metadata": { "kernelspec": { - "display_name": "openai", + "display_name": "Python 3", "language": "python", "name": "python3" }, @@ -563,9 +549,8 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.9" + "version": "3.7.3" }, - "orig_nbformat": 4, "vscode": { "interpreter": { "hash": "365536dcbde60510dc9073d6b991cd35db2d9bac356a11f5b64279a5e6708b97"