|
|
@ -277,8 +277,13 @@ class BaseOpenAI(BaseLLM, BaseModel):
|
|
|
|
"This is needed in order to calculate get_num_tokens. "
|
|
|
|
"This is needed in order to calculate get_num_tokens. "
|
|
|
|
"Please it install it with `pip install tiktoken`."
|
|
|
|
"Please it install it with `pip install tiktoken`."
|
|
|
|
)
|
|
|
|
)
|
|
|
|
|
|
|
|
encoder = "gpt2"
|
|
|
|
|
|
|
|
if self.model_name in ("text-davinci-003", "text-davinci-002"):
|
|
|
|
|
|
|
|
encoder = "p50k_base"
|
|
|
|
|
|
|
|
if self.model_name.startswith("code"):
|
|
|
|
|
|
|
|
encoder = "p50k_base"
|
|
|
|
# create a GPT-3 encoder instance
|
|
|
|
# create a GPT-3 encoder instance
|
|
|
|
enc = tiktoken.get_encoding("gpt2")
|
|
|
|
enc = tiktoken.get_encoding(encoder)
|
|
|
|
|
|
|
|
|
|
|
|
# encode the text using the GPT-3 encoder
|
|
|
|
# encode the text using the GPT-3 encoder
|
|
|
|
tokenized_text = enc.encode(text)
|
|
|
|
tokenized_text = enc.encode(text)
|
|
|
|