Convert output back to qdtype

pytorch · Apr 25, 2024 · 961dfff · 961dfff
1 parent 5778694
commit 961dfff
Showing 1 changed file with 1 addition and 1 deletion.
diff --git a/torch_xla/experimental/custom_kernel.py b/torch_xla/experimental/custom_kernel.py
@@ -414,7 +414,7 @@ def paged_attention(q, k_pages, v_pages, lengths, page_indices,
       ], payload, [q.shape, output_shape, output_shape],
       [q_output_dtype, torch.float32, torch.float32])
 
-  return output.reshape(batch_size, num_heads, head_dim)
+  return output.reshape(batch_size, num_heads, head_dim).to(q.dtype)
 
 
 def non_xla_attetion(q, k, v, attention_type):