DocsGPT/application/utils.py

6 lines
206 B
Python

from transformers import GPT2TokenizerFast
tokenizer = GPT2TokenizerFast.from_pretrained('gpt2')
tokenizer.model_max_length = 100000
def count_tokens(string):
return len(tokenizer(string)['input_ids'])