fix fp8

Summary: Test Plan: Reviewers: Subscribers: Tasks: Tags:
pytorch-labs · Dec 5, 2024 · 02ef2c2 · 02ef2c2
1 parent 2867e2f
commit 02ef2c2
Show file tree

Hide file tree

Showing 2 changed files with 5 additions and 5 deletions.
diff --git a/tritonbench/kernels/triton_fused_attention.py b/tritonbench/kernels/triton_fused_attention.py
@@ -458,10 +458,10 @@ def _attn_fwd_inner_ws(
             num_warps=w,
         )
     )
-    for BM in [128]  # 64, 128]
-    for BN in [128]  # 64, 128]
-    for s in [3]  # 3, 4, 7]
-    for w in [8]  # 4, 8]
+    for BM in [64, 128]
+    for BN in [64, 128]
+    for s in [3, 4, 7]
+    for w in [4, 8]
 ]
 # TMA, WS, and CompPipe
 configsTmaWS = [

diff --git a/tritonbench/operators/fp8_attention/operator.py b/tritonbench/operators/fp8_attention/operator.py
@@ -110,7 +110,7 @@ def triton_flash_v2(
         triton_q, triton_k, triton_v = self.triton_preprocess(q, k, v)
         # full fp8 will be enabled if type of q,k,v is fp8
         return lambda: triton_attention(
-            triton_q, triton_k, triton_v, False, self.sm_scale, "base"
+            triton_q, triton_k, triton_v, False, self.sm_scale, "base", "base"
         )
 
     def get_x_val(self, _example_inputs) -> Tuple[int, int, int, int]: