Skip to content

Commit

Permalink
Revert "Fix gradient checkpointing + fp16 autocast for most models" (#…
Browse files Browse the repository at this point in the history
…24420)

Revert "Fix gradient checkpointing + fp16 autocast for most models (#24247)"

This reverts commit 285a480.
  • Loading branch information
younesbelkada authored Jun 22, 2023
1 parent ebb62e8 commit 3ce3385
Show file tree
Hide file tree
Showing 179 changed files with 271 additions and 836 deletions.
6 changes: 1 addition & 5 deletions src/transformers/models/albert/modeling_albert.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,7 @@
TokenClassifierOutput,
)
from ...modeling_utils import PreTrainedModel
from ...pytorch_utils import (
apply_chunking_to_forward,
find_pruneable_heads_and_indices,
prune_linear_layer,
)
from ...pytorch_utils import apply_chunking_to_forward, find_pruneable_heads_and_indices, prune_linear_layer
from ...utils import (
ModelOutput,
add_code_sample_docstrings,
Expand Down
9 changes: 2 additions & 7 deletions src/transformers/models/align/modeling_align.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,7 @@
BaseModelOutputWithPoolingAndNoAttention,
)
from ...modeling_utils import PreTrainedModel
from ...pytorch_utils import (
apply_chunking_to_forward,
find_pruneable_heads_and_indices,
prune_linear_layer,
torch_custom_checkpointing,
)
from ...pytorch_utils import apply_chunking_to_forward, find_pruneable_heads_and_indices, prune_linear_layer
from ...utils import (
ModelOutput,
add_start_docstrings,
Expand Down Expand Up @@ -1105,7 +1100,7 @@ def custom_forward(*inputs):

return custom_forward

layer_outputs = torch_custom_checkpointing(
layer_outputs = torch.utils.checkpoint.checkpoint(
create_custom_forward(layer_module),
hidden_states,
attention_mask,
Expand Down
11 changes: 3 additions & 8 deletions src/transformers/models/altclip/modeling_altclip.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,7 @@
BaseModelOutputWithPoolingAndProjection,
)
from ...modeling_utils import PreTrainedModel
from ...pytorch_utils import (
apply_chunking_to_forward,
find_pruneable_heads_and_indices,
prune_linear_layer,
torch_custom_checkpointing,
)
from ...pytorch_utils import apply_chunking_to_forward, find_pruneable_heads_and_indices, prune_linear_layer
from ...utils import ModelOutput, add_start_docstrings_to_model_forward, logging, replace_return_docstrings
from .configuration_altclip import AltCLIPConfig, AltCLIPTextConfig, AltCLIPVisionConfig

Expand Down Expand Up @@ -656,7 +651,7 @@ def custom_forward(*inputs):

return custom_forward

layer_outputs = torch_custom_checkpointing(
layer_outputs = torch.utils.checkpoint.checkpoint(
create_custom_forward(layer_module),
hidden_states,
attention_mask,
Expand Down Expand Up @@ -970,7 +965,7 @@ def custom_forward(*inputs):

return custom_forward

layer_outputs = torch_custom_checkpointing(
layer_outputs = torch.utils.checkpoint.checkpoint(
create_custom_forward(encoder_layer),
hidden_states,
attention_mask,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from ...activations import ACT2FN
from ...modeling_outputs import BaseModelOutput, BaseModelOutputWithPooling, SequenceClassifierOutput
from ...modeling_utils import PreTrainedModel
from ...pytorch_utils import find_pruneable_heads_and_indices, prune_linear_layer, torch_custom_checkpointing
from ...pytorch_utils import find_pruneable_heads_and_indices, prune_linear_layer
from ...utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_model_forward, logging
from .configuration_audio_spectrogram_transformer import ASTConfig

Expand Down Expand Up @@ -343,7 +343,7 @@ def custom_forward(*inputs):

return custom_forward

layer_outputs = torch_custom_checkpointing(
layer_outputs = torch.utils.checkpoint.checkpoint(
create_custom_forward(layer_module),
hidden_states,
layer_head_mask,
Expand Down
5 changes: 2 additions & 3 deletions src/transformers/models/autoformer/modeling_autoformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@
Seq2SeqTSPredictionOutput,
)
from ...modeling_utils import PreTrainedModel
from ...pytorch_utils import torch_custom_checkpointing
from ...time_series_utils import NegativeBinomialOutput, NormalOutput, StudentTOutput
from ...utils import add_start_docstrings, add_start_docstrings_to_model_forward, logging, replace_return_docstrings
from .configuration_autoformer import AutoformerConfig
Expand Down Expand Up @@ -1211,7 +1210,7 @@ def custom_forward(*inputs):

return custom_forward

layer_outputs = torch_custom_checkpointing(
layer_outputs = torch.utils.checkpoint.checkpoint(
create_custom_forward(encoder_layer),
hidden_states,
attention_mask,
Expand Down Expand Up @@ -1429,7 +1428,7 @@ def custom_forward(*inputs):

return custom_forward

layer_outputs = torch_custom_checkpointing(
layer_outputs = torch.utils.checkpoint.checkpoint(
create_custom_forward(decoder_layer),
hidden_states,
attention_mask,
Expand Down
5 changes: 2 additions & 3 deletions src/transformers/models/bart/modeling_bart.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@
Seq2SeqSequenceClassifierOutput,
)
from ...modeling_utils import PreTrainedModel
from ...pytorch_utils import torch_custom_checkpointing
from ...utils import (
add_code_sample_docstrings,
add_end_docstrings,
Expand Down Expand Up @@ -850,7 +849,7 @@ def custom_forward(*inputs):

return custom_forward

layer_outputs = torch_custom_checkpointing(
layer_outputs = torch.utils.checkpoint.checkpoint(
create_custom_forward(encoder_layer),
hidden_states,
attention_mask,
Expand Down Expand Up @@ -1106,7 +1105,7 @@ def custom_forward(*inputs):

return custom_forward

layer_outputs = torch_custom_checkpointing(
layer_outputs = torch.utils.checkpoint.checkpoint(
create_custom_forward(decoder_layer),
hidden_states,
attention_mask,
Expand Down
4 changes: 2 additions & 2 deletions src/transformers/models/beit/modeling_beit.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
SemanticSegmenterOutput,
)
from ...modeling_utils import PreTrainedModel
from ...pytorch_utils import find_pruneable_heads_and_indices, meshgrid, prune_linear_layer, torch_custom_checkpointing
from ...pytorch_utils import find_pruneable_heads_and_indices, meshgrid, prune_linear_layer
from ...utils import (
add_code_sample_docstrings,
add_start_docstrings,
Expand Down Expand Up @@ -517,7 +517,7 @@ def custom_forward(*inputs):

return custom_forward

layer_outputs = torch_custom_checkpointing(
layer_outputs = torch.utils.checkpoint.checkpoint(
create_custom_forward(layer_module),
hidden_states,
layer_head_mask,
Expand Down
9 changes: 2 additions & 7 deletions src/transformers/models/bert/modeling_bert.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,7 @@
TokenClassifierOutput,
)
from ...modeling_utils import PreTrainedModel
from ...pytorch_utils import (
apply_chunking_to_forward,
find_pruneable_heads_and_indices,
prune_linear_layer,
torch_custom_checkpointing,
)
from ...pytorch_utils import apply_chunking_to_forward, find_pruneable_heads_and_indices, prune_linear_layer
from ...utils import (
ModelOutput,
add_code_sample_docstrings,
Expand Down Expand Up @@ -603,7 +598,7 @@ def custom_forward(*inputs):

return custom_forward

layer_outputs = torch_custom_checkpointing(
layer_outputs = torch.utils.checkpoint.checkpoint(
create_custom_forward(layer_module),
hidden_states,
attention_mask,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,7 @@
from ...activations import ACT2FN
from ...modeling_outputs import BaseModelOutputWithPastAndCrossAttentions, CausalLMOutputWithCrossAttentions
from ...modeling_utils import PreTrainedModel
from ...pytorch_utils import (
apply_chunking_to_forward,
find_pruneable_heads_and_indices,
prune_linear_layer,
torch_custom_checkpointing,
)
from ...pytorch_utils import apply_chunking_to_forward, find_pruneable_heads_and_indices, prune_linear_layer
from ...utils import (
add_code_sample_docstrings,
add_start_docstrings,
Expand Down Expand Up @@ -413,7 +408,7 @@ def custom_forward(*inputs):

return custom_forward

layer_outputs = torch_custom_checkpointing(
layer_outputs = torch.utils.checkpoint.checkpoint(
create_custom_forward(layer_module),
hidden_states,
attention_mask,
Expand Down
4 changes: 2 additions & 2 deletions src/transformers/models/big_bird/modeling_big_bird.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
TokenClassifierOutput,
)
from ...modeling_utils import PreTrainedModel
from ...pytorch_utils import apply_chunking_to_forward, torch_custom_checkpointing
from ...pytorch_utils import apply_chunking_to_forward
from ...utils import (
ModelOutput,
add_code_sample_docstrings,
Expand Down Expand Up @@ -1622,7 +1622,7 @@ def custom_forward(*inputs):

return custom_forward

layer_outputs = torch_custom_checkpointing(
layer_outputs = torch.utils.checkpoint.checkpoint(
create_custom_forward(layer_module),
hidden_states,
attention_mask,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@
Seq2SeqSequenceClassifierOutput,
)
from ...modeling_utils import PreTrainedModel
from ...pytorch_utils import torch_custom_checkpointing
from ...utils import (
add_code_sample_docstrings,
add_end_docstrings,
Expand Down Expand Up @@ -1946,7 +1945,7 @@ def custom_forward(*inputs):

return custom_forward

layer_outputs = torch_custom_checkpointing(
layer_outputs = torch.utils.checkpoint.checkpoint(
create_custom_forward(encoder_layer),
hidden_states,
attention_mask,
Expand Down Expand Up @@ -2292,7 +2291,7 @@ def custom_forward(*inputs):

return custom_forward

layer_outputs = torch_custom_checkpointing(
layer_outputs = torch.utils.checkpoint.checkpoint(
create_custom_forward(decoder_layer),
hidden_states,
attention_mask,
Expand Down
3 changes: 1 addition & 2 deletions src/transformers/models/biogpt/modeling_biogpt.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@
TokenClassifierOutput,
)
from ...modeling_utils import PreTrainedModel
from ...pytorch_utils import torch_custom_checkpointing
from ...utils import (
add_code_sample_docstrings,
add_start_docstrings,
Expand Down Expand Up @@ -595,7 +594,7 @@ def custom_forward(*inputs):

return custom_forward

layer_outputs = torch_custom_checkpointing(
layer_outputs = torch.utils.checkpoint.checkpoint(
create_custom_forward(decoder_layer),
hidden_states,
attention_mask,
Expand Down
5 changes: 2 additions & 3 deletions src/transformers/models/blenderbot/modeling_blenderbot.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@
Seq2SeqModelOutput,
)
from ...modeling_utils import PreTrainedModel
from ...pytorch_utils import torch_custom_checkpointing
from ...utils import (
add_end_docstrings,
add_start_docstrings,
Expand Down Expand Up @@ -780,7 +779,7 @@ def custom_forward(*inputs):

return custom_forward

layer_outputs = torch_custom_checkpointing(
layer_outputs = torch.utils.checkpoint.checkpoint(
create_custom_forward(encoder_layer),
hidden_states,
attention_mask,
Expand Down Expand Up @@ -1035,7 +1034,7 @@ def custom_forward(*inputs):

return custom_forward

layer_outputs = torch_custom_checkpointing(
layer_outputs = torch.utils.checkpoint.checkpoint(
create_custom_forward(decoder_layer),
hidden_states,
attention_mask,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@
Seq2SeqModelOutput,
)
from ...modeling_utils import PreTrainedModel
from ...pytorch_utils import torch_custom_checkpointing
from ...utils import (
add_end_docstrings,
add_start_docstrings,
Expand Down Expand Up @@ -778,7 +777,7 @@ def custom_forward(*inputs):

return custom_forward

layer_outputs = torch_custom_checkpointing(
layer_outputs = torch.utils.checkpoint.checkpoint(
create_custom_forward(encoder_layer),
hidden_states,
attention_mask,
Expand Down Expand Up @@ -1032,7 +1031,7 @@ def custom_forward(*inputs):

return custom_forward

layer_outputs = torch_custom_checkpointing(
layer_outputs = torch.utils.checkpoint.checkpoint(
create_custom_forward(decoder_layer),
hidden_states,
attention_mask,
Expand Down
3 changes: 1 addition & 2 deletions src/transformers/models/blip/modeling_blip.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
from ...activations import ACT2FN
from ...modeling_outputs import BaseModelOutput, BaseModelOutputWithPooling
from ...modeling_utils import PreTrainedModel
from ...pytorch_utils import torch_custom_checkpointing
from ...utils import (
ModelOutput,
add_start_docstrings,
Expand Down Expand Up @@ -621,7 +620,7 @@ def custom_forward(*inputs):

return custom_forward

layer_outputs = torch_custom_checkpointing(
layer_outputs = torch.utils.checkpoint.checkpoint(
create_custom_forward(encoder_layer),
hidden_states,
attention_mask,
Expand Down
3 changes: 1 addition & 2 deletions src/transformers/models/blip/modeling_blip_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@
find_pruneable_heads_and_indices,
prune_linear_layer,
)
from ...pytorch_utils import torch_custom_checkpointing
from ...utils import logging
from .configuration_blip import BlipTextConfig

Expand Down Expand Up @@ -428,7 +427,7 @@ def custom_forward(*inputs):

return custom_forward

layer_outputs = torch_custom_checkpointing(
layer_outputs = torch.utils.checkpoint.checkpoint(
create_custom_forward(layer_module),
hidden_states,
attention_mask,
Expand Down
11 changes: 3 additions & 8 deletions src/transformers/models/blip_2/modeling_blip_2.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,7 @@
BaseModelOutputWithPoolingAndCrossAttentions,
)
from ...modeling_utils import PreTrainedModel
from ...pytorch_utils import (
apply_chunking_to_forward,
find_pruneable_heads_and_indices,
prune_linear_layer,
torch_custom_checkpointing,
)
from ...pytorch_utils import apply_chunking_to_forward, find_pruneable_heads_and_indices, prune_linear_layer
from ...utils import (
ModelOutput,
add_start_docstrings,
Expand Down Expand Up @@ -497,7 +492,7 @@ def custom_forward(*inputs):

return custom_forward

layer_outputs = torch_custom_checkpointing(
layer_outputs = torch.utils.checkpoint.checkpoint(
create_custom_forward(encoder_layer),
hidden_states,
attention_mask,
Expand Down Expand Up @@ -968,7 +963,7 @@ def custom_forward(*inputs):

return custom_forward

layer_outputs = torch_custom_checkpointing(
layer_outputs = torch.utils.checkpoint.checkpoint(
create_custom_forward(layer_module),
hidden_states,
attention_mask,
Expand Down
3 changes: 1 addition & 2 deletions src/transformers/models/bloom/modeling_bloom.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@
TokenClassifierOutput,
)
from ...modeling_utils import PreTrainedModel
from ...pytorch_utils import torch_custom_checkpointing
from ...utils import logging
from .configuration_bloom import BloomConfig

Expand Down Expand Up @@ -776,7 +775,7 @@ def custom_forward(*inputs):

return custom_forward

outputs = torch_custom_checkpointing(
outputs = torch.utils.checkpoint.checkpoint(
create_custom_forward(block),
hidden_states,
alibi,
Expand Down
Loading

0 comments on commit 3ce3385

Please sign in to comment.