Rephrasing comment for clarity

2024-11-02 09:40:42 +00:00 · 2023-04-06 20:20:18 -04:00 · 2023-04-06 20:20:18 -04:00 · 43ddc3eefa
commit 43ddc3eefa
parent 0689c2e974
1 changed files with 1 additions and 1 deletions
--- a/data.py
+++ b/data.py
@ -31,7 +31,7 @@ def tokenize_inputs(config, tokenizer, examples):
        # add target tokens, remove bos
        input_ids[i, newline_plus_inputs: newline_plus_inputs + len(target_tokens)] = target_tokens
-        # add eos token, enforce stopping if we don't truncate
+        # add eos token; ensure generation stops if inputs aren't truncated
        # we don't want long code to stop generating if truncated during training
        if newline_plus_inputs + len(target_tokens) < max_length:
            input_ids[i, newline_plus_inputs + len(target_tokens)] = tokenizer.eos_token_id