From 43ddc3eefa761a542b965e3dff443b0de1a5de88 Mon Sep 17 00:00:00 2001 From: MalikMAlna Date: Thu, 6 Apr 2023 20:20:18 -0400 Subject: [PATCH] Rephrasing comment for clarity --- data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data.py b/data.py index e5a7fb14..a83ed3d6 100644 --- a/data.py +++ b/data.py @@ -31,7 +31,7 @@ def tokenize_inputs(config, tokenizer, examples): # add target tokens, remove bos input_ids[i, newline_plus_inputs: newline_plus_inputs + len(target_tokens)] = target_tokens - # add eos token, enforce stopping if we don't truncate + # add eos token; ensure generation stops if inputs aren't truncated # we don't want long code to stop generating if truncated during training if newline_plus_inputs + len(target_tokens) < max_length: input_ids[i, newline_plus_inputs + len(target_tokens)] = tokenizer.eos_token_id