From 902bf6400a990a1cac729ad7b37d6750e2567155 Mon Sep 17 00:00:00 2001 From: justheuristic Date: Sun, 12 Jun 2022 06:50:19 +0300 Subject: [PATCH] [temp workaround] create alibi --- src/block.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/block.py b/src/block.py index 16af49a..30f22d8 100644 --- a/src/block.py +++ b/src/block.py @@ -15,7 +15,7 @@ from src.ops import ( attention_mask_func, dropout_add, pre_process_alibi_for_pad, - split_tensor_along_last_dim, + split_tensor_along_last_dim, build_alibi_tensor, ) @@ -73,6 +73,8 @@ class BloomAttention(nn.Module): use_cache=False, output_attentions=False, ): + if alibi is None: + alibi = build_alibi_tensor(hidden_states.shape[1], n_head=self.num_heads, dtype=hidden_states.dtype) # hidden_states: [batch_size, seq_length, hidden_size] # repeat alibi tensor with the batch size alibi = alibi.repeat(hidden_states.shape[0], 1, 1).to(hidden_states.device)