Add padding_mask fix

mosaicml · Oct 30, 2023 · 090dc02 · 090dc02
1 parent 9027f49
commit 090dc02
Showing 1 changed file with 4 additions and 0 deletions.
diff --git a/llmfoundry/models/layers/llama_attention_monkeypatch.py b/llmfoundry/models/layers/llama_attention_monkeypatch.py
@@ -78,6 +78,8 @@ def llama_attention_patch_torch(
     past_key_value: Optional[Tuple[torch.Tensor]] = None,
     output_attentions: bool = False,
     use_cache: bool = False,
+    # Temporary fix for llama2 transformers compatibility, padding_mask will be removed in the next transformers release >4.32.1.
+    padding_mask: Optional[torch.LongTensor] = None,
 ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
     if use_cache:
         raise NotImplementedError(
@@ -186,6 +188,8 @@ def llama_attention_patch_triton(
     past_key_value: Optional[Tuple[torch.Tensor]] = None,
     output_attentions: bool = False,
     use_cache: bool = False,
+    # Temporary fix for llama2 transformers compatibility, padding_mask will be removed in the next transformers release >4.32.1.
+    padding_mask: Optional[torch.LongTensor] = None,
 ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
     if use_cache:
         raise NotImplementedError(