fix routing to tiktoken encoder (#866)

This commit is contained in:
Harrison Chase 2023-02-02 22:08:14 -08:00 committed by GitHub
parent e9ef08862d
commit 777aaff841
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -277,8 +277,13 @@ class BaseOpenAI(BaseLLM, BaseModel):
"This is needed in order to calculate get_num_tokens. "
"Please it install it with `pip install tiktoken`."
)
encoder = "gpt2"
if self.model_name in ("text-davinci-003", "text-davinci-002"):
encoder = "p50k_base"
if self.model_name.startswith("code"):
encoder = "p50k_base"
# create a GPT-3 encoder instance
enc = tiktoken.get_encoding("gpt2")
enc = tiktoken.get_encoding(encoder)
# encode the text using the GPT-3 encoder
tokenized_text = enc.encode(text)