mirror of
https://github.com/nomic-ai/gpt4all
synced 2024-11-08 07:10:32 +00:00
Rephrasing comment for clarity
This commit is contained in:
parent
17fb6f668a
commit
1195d09fba
2
data.py
2
data.py
@ -31,7 +31,7 @@ def tokenize_inputs(config, tokenizer, examples):
|
|||||||
|
|
||||||
# add target tokens, remove bos
|
# add target tokens, remove bos
|
||||||
input_ids[i, newline_plus_inputs: newline_plus_inputs + len(target_tokens)] = target_tokens
|
input_ids[i, newline_plus_inputs: newline_plus_inputs + len(target_tokens)] = target_tokens
|
||||||
# add eos token, enforce stopping if we don't truncate
|
# add eos token; ensure generation stops if inputs aren't truncated
|
||||||
# we don't want long code to stop generating if truncated during training
|
# we don't want long code to stop generating if truncated during training
|
||||||
if newline_plus_inputs + len(target_tokens) < max_length:
|
if newline_plus_inputs + len(target_tokens) < max_length:
|
||||||
input_ids[i, newline_plus_inputs + len(target_tokens)] = tokenizer.eos_token_id
|
input_ids[i, newline_plus_inputs + len(target_tokens)] = tokenizer.eos_token_id
|
||||||
|
Loading…
Reference in New Issue
Block a user