From 1195d09fba7dd51c14e15b7dfea6227ca75739e9 Mon Sep 17 00:00:00 2001
From: MalikMAlna <mmikema.developer@gmail.com>
Date: Thu, 6 Apr 2023 20:20:18 -0400
Subject: [PATCH] Rephrasing comment for clarity

---
 data.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/data.py b/data.py
index e5a7fb14..a83ed3d6 100644
--- a/data.py
+++ b/data.py
@@ -31,7 +31,7 @@ def tokenize_inputs(config, tokenizer, examples):
 
         # add target tokens, remove bos
         input_ids[i, newline_plus_inputs: newline_plus_inputs + len(target_tokens)] = target_tokens
-        # add eos token, enforce stopping if we don't truncate
+        # add eos token; ensure generation stops if inputs aren't truncated
         # we don't want long code to stop generating if truncated during training
         if newline_plus_inputs + len(target_tokens) < max_length:
             input_ids[i, newline_plus_inputs + len(target_tokens)] = tokenizer.eos_token_id