forked from Archives/langchain
issue#4082 base_language had wrong code comment that it was using gpt… (#4084)
…3 to tokenize text instead of gpt-2 Co-authored-by: Pulkit <pulkit.mehta@catylex.com>
This commit is contained in:
parent
6caba8e759
commit
bbbca10704
@ -25,7 +25,7 @@ def _get_num_tokens_default_method(text: str) -> int:
|
|||||||
# create a GPT-2 tokenizer instance
|
# create a GPT-2 tokenizer instance
|
||||||
tokenizer = GPT2TokenizerFast.from_pretrained("gpt2")
|
tokenizer = GPT2TokenizerFast.from_pretrained("gpt2")
|
||||||
|
|
||||||
# tokenize the text using the GPT-3 tokenizer
|
# tokenize the text using the GPT-2 tokenizer
|
||||||
tokenized_text = tokenizer.tokenize(text)
|
tokenized_text = tokenizer.tokenize(text)
|
||||||
|
|
||||||
# calculate the number of tokens in the tokenized text
|
# calculate the number of tokens in the tokenized text
|
||||||
|
Loading…
Reference in New Issue
Block a user