From 0c7bc2a3d57bde397ccf75b769dc5582b6ace345 Mon Sep 17 00:00:00 2001 From: Shashank Rajput <144760128+ShashankMosaicML@users.noreply.github.com> Date: Wed, 8 May 2024 17:10:20 -0700 Subject: [PATCH] checking if attention mask present for ignoring pad tokens in ffn (#1188) --- llmfoundry/models/layers/blocks.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llmfoundry/models/layers/blocks.py b/llmfoundry/models/layers/blocks.py index 3ff65fd8b3..7e62d9e355 100644 --- a/llmfoundry/models/layers/blocks.py +++ b/llmfoundry/models/layers/blocks.py @@ -221,11 +221,11 @@ def apply_ffn( """ batch_size, seq_len = m.size()[:2] indices = None - if not self.use_pad_tok_in_ffn: + if not self.use_pad_tok_in_ffn and attention_mask is not None: assert unpad_input is not None m, indices, _, _ = unpad_input(m, attention_mask) n = self.ffn(m) - if not self.use_pad_tok_in_ffn: + if not self.use_pad_tok_in_ffn and attention_mask is not None: assert pad_input is not None n = pad_input(n, indices, batch_size, seq_len) return n