..

mosaicml · Jan 17, 2024 · 3351d23 · 3351d23
1 parent 03113a9
commit 3351d23
Showing 1 changed file with 12 additions and 2 deletions.
diff --git a/tests/models/layers/test_flash_triton_torch.py b/tests/models/layers/test_flash_triton_torch.py
@@ -327,11 +327,16 @@ def gen_tca_mask():
     x1.requires_grad = True
 
     with torch.autocast(x0.device.type):
+        flash_attn_padding_info = None
+        if attn_impl == 'flash':
+            flash_attn_padding_info = gen_flash_attn_padding_info(
+                n, s, 0, torch.device(device), None, attention_mask)
         y0, _, _ = mmhsa(x0,
                          past_key_value=None,
                          attn_bias=None,
                          attention_mask=attention_mask,
-                         is_causal=True)
+                         is_causal=True,
+                         flash_attn_padding_info=flash_attn_padding_info)
         y1, _ = tmhsa(x1,
                       x1,
                       x1,
@@ -401,11 +406,16 @@ def test_grouped_attention_heads(attn_impl: str,
     x0.requires_grad = True
 
     with torch.autocast(x0.device.type):
+        flash_attn_padding_info = None
+        if attn_impl == 'flash':
+            flash_attn_padding_info = gen_flash_attn_padding_info(
+                n, s, 0, torch.device(device), None, attention_mask)
         y0, _, _ = mmhsa(x0,
                          past_key_value=None,
                          attn_bias=None,
                          attention_mask=attention_mask,
-                         is_causal=True)
+                         is_causal=True,
+                         flash_attn_padding_info=flash_attn_padding_info)
         y0 *= attention_mask.unsqueeze(-1)
 
         loss0 = y0.sum()