diff --git a/.gitignore b/.gitignore index 97df0437f..6bddc9d2f 100644 --- a/.gitignore +++ b/.gitignore @@ -26,9 +26,9 @@ var/ # Dev venv -scripts +# Other .eggs - .vscode core +scripts log* \ No newline at end of file diff --git a/flash_attn/flash_attn_interface.py b/flash_attn/flash_attn_interface.py index 28153218d..24a317f9e 100644 --- a/flash_attn/flash_attn_interface.py +++ b/flash_attn/flash_attn_interface.py @@ -788,12 +788,6 @@ def flash_attn_func( deterministic=False, return_attn_probs=False, ): - print() - print("flash_attn_func") - print("q:", q.shape) - print("k:", k.shape) - print("v:", v.shape) - print("dropout_p:", dropout_p) """dropout_p should be set to 0.0 during evaluation Supports multi-query and grouped-query attention (MQA/GQA) by passing in KV with fewer heads than Q. Note that the number of heads in Q must be divisible by the number of heads in KV.