From 777aaff84167e92dd1c77e722eec0938b76f95e5 Mon Sep 17 00:00:00 2001 From: Harrison Chase Date: Thu, 2 Feb 2023 22:08:14 -0800 Subject: [PATCH] fix routing to tiktoken encoder (#866) --- langchain/llms/openai.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/langchain/llms/openai.py b/langchain/llms/openai.py index 554d5ce4..6d00fde5 100644 --- a/langchain/llms/openai.py +++ b/langchain/llms/openai.py @@ -277,8 +277,13 @@ class BaseOpenAI(BaseLLM, BaseModel): "This is needed in order to calculate get_num_tokens. " "Please it install it with `pip install tiktoken`." ) + encoder = "gpt2" + if self.model_name in ("text-davinci-003", "text-davinci-002"): + encoder = "p50k_base" + if self.model_name.startswith("code"): + encoder = "p50k_base" # create a GPT-3 encoder instance - enc = tiktoken.get_encoding("gpt2") + enc = tiktoken.get_encoding(encoder) # encode the text using the GPT-3 encoder tokenized_text = enc.encode(text)