mirror of
https://github.com/arc53/DocsGPT
synced 2024-11-17 21:26:26 +00:00
6 lines
206 B
Python
6 lines
206 B
Python
from transformers import GPT2TokenizerFast
|
|
|
|
tokenizer = GPT2TokenizerFast.from_pretrained('gpt2')
|
|
tokenizer.model_max_length = 100000
|
|
def count_tokens(string):
|
|
return len(tokenizer(string)['input_ids']) |