From 11ba1d472c61eaacdc58a12e31156d4436b132ce Mon Sep 17 00:00:00 2001 From: Lysandre Debut Date: Thu, 12 Dec 2024 19:23:28 +0100 Subject: [PATCH] [Init refactor] Modular changes (#35240) * Modular changes * Gemma * Gemma --- src/transformers/models/gemma/__init__.py | 113 ++---------------- .../models/gemma/modeling_flax_gemma.py | 3 + .../models/gemma/modeling_gemma.py | 8 +- .../models/gemma/modular_gemma.py | 6 + .../models/gemma/tokenization_gemma_fast.py | 3 + src/transformers/models/gemma2/__init__.py | 48 ++------ .../models/gemma2/configuration_gemma2.py | 3 + .../models/gemma2/modeling_gemma2.py | 9 ++ .../models/gemma2/modular_gemma2.py | 10 ++ .../models/llava_next_video/__init__.py | 57 ++------- .../configuration_llava_next_video.py | 3 + .../image_processing_llava_next_video.py | 3 + .../modeling_llava_next_video.py | 33 ++--- .../modular_llava_next_video.py | 8 ++ .../processing_llava_next_video.py | 3 + .../models/starcoder2/__init__.py | 51 ++------ .../starcoder2/configuration_starcoder2.py | 3 + .../models/starcoder2/modeling_starcoder2.py | 9 ++ .../models/starcoder2/modular_starcoder2.py | 9 ++ 19 files changed, 129 insertions(+), 253 deletions(-) diff --git a/src/transformers/models/gemma/__init__.py b/src/transformers/models/gemma/__init__.py index 1aafae6e88c2f1..65fb1ca5edef43 100644 --- a/src/transformers/models/gemma/__init__.py +++ b/src/transformers/models/gemma/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2024 The HuggingFace Inc. team. All rights reserved. +# Copyright 2024 The HuggingFace Team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,111 +13,18 @@ # limitations under the License. from typing import TYPE_CHECKING -from ...utils import ( - OptionalDependencyNotAvailable, - _LazyModule, - is_flax_available, - is_sentencepiece_available, - is_tokenizers_available, - is_torch_available, -) - - -_import_structure = { - "configuration_gemma": ["GemmaConfig"], -} - -try: - if not is_sentencepiece_available(): - raise OptionalDependencyNotAvailable() -except OptionalDependencyNotAvailable: - pass -else: - _import_structure["tokenization_gemma"] = ["GemmaTokenizer"] - -try: - if not is_tokenizers_available(): - raise OptionalDependencyNotAvailable() -except OptionalDependencyNotAvailable: - pass -else: - _import_structure["tokenization_gemma_fast"] = ["GemmaTokenizerFast"] - - -try: - if not is_torch_available(): - raise OptionalDependencyNotAvailable() -except OptionalDependencyNotAvailable: - pass -else: - _import_structure["modeling_gemma"] = [ - "GemmaForCausalLM", - "GemmaModel", - "GemmaPreTrainedModel", - "GemmaForSequenceClassification", - "GemmaForTokenClassification", - ] - -try: - if not is_flax_available(): - raise OptionalDependencyNotAvailable() -except OptionalDependencyNotAvailable: - pass -else: - _import_structure["modeling_flax_gemma"] = [ - "FlaxGemmaForCausalLM", - "FlaxGemmaModel", - "FlaxGemmaPreTrainedModel", - ] +from ...utils import _LazyModule +from ...utils.import_utils import define_import_structure if TYPE_CHECKING: - from .configuration_gemma import GemmaConfig - - try: - if not is_sentencepiece_available(): - raise OptionalDependencyNotAvailable() - except OptionalDependencyNotAvailable: - pass - else: - from .tokenization_gemma import GemmaTokenizer - - try: - if not is_tokenizers_available(): - raise OptionalDependencyNotAvailable() - except OptionalDependencyNotAvailable: - pass - else: - from .tokenization_gemma_fast import GemmaTokenizerFast - - try: - if not is_torch_available(): - raise OptionalDependencyNotAvailable() - except OptionalDependencyNotAvailable: - pass - else: - from .modeling_gemma import ( - GemmaForCausalLM, - GemmaForSequenceClassification, - GemmaForTokenClassification, - GemmaModel, - GemmaPreTrainedModel, - ) - - try: - if not is_flax_available(): - raise OptionalDependencyNotAvailable() - except OptionalDependencyNotAvailable: - pass - else: - from .modeling_flax_gemma import ( - FlaxGemmaForCausalLM, - FlaxGemmaModel, - FlaxGemmaPreTrainedModel, - ) - - + from .configuration_gemma import * + from .modeling_flax_gemma import * + from .modeling_gemma import * + from .tokenization_gemma import * + from .tokenization_gemma_fast import * else: import sys - sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__) + _file = globals()["__file__"] + sys.modules[__name__] = _LazyModule(__name__, _file, define_import_structure(_file), module_spec=__spec__) diff --git a/src/transformers/models/gemma/modeling_flax_gemma.py b/src/transformers/models/gemma/modeling_flax_gemma.py index 16291f3c3abe0a..dfe9739ba6555d 100644 --- a/src/transformers/models/gemma/modeling_flax_gemma.py +++ b/src/transformers/models/gemma/modeling_flax_gemma.py @@ -772,3 +772,6 @@ def update_inputs_for_generation(self, model_outputs, model_kwargs): _CONFIG_FOR_DOC, real_checkpoint=_REAL_CHECKPOINT_FOR_DOC, ) + + +__all__ = ["FlaxGemmaForCausalLM", "FlaxGemmaModel", "FlaxGemmaPreTrainedModel"] diff --git a/src/transformers/models/gemma/modeling_gemma.py b/src/transformers/models/gemma/modeling_gemma.py index 52d02995016167..b3253fdd5614e1 100644 --- a/src/transformers/models/gemma/modeling_gemma.py +++ b/src/transformers/models/gemma/modeling_gemma.py @@ -1295,4 +1295,10 @@ def forward( ) -__all__ = ["GemmaModel", "GemmaForCausalLM", "GemmaForSequenceClassification", "GemmaForTokenClassification"] +__all__ = [ + "GemmaModel", + "GemmaForCausalLM", + "GemmaForSequenceClassification", + "GemmaForTokenClassification", + "GemmaPreTrainedModel", +] diff --git a/src/transformers/models/gemma/modular_gemma.py b/src/transformers/models/gemma/modular_gemma.py index ad1348ae5e3163..778ef7e19b65b6 100644 --- a/src/transformers/models/gemma/modular_gemma.py +++ b/src/transformers/models/gemma/modular_gemma.py @@ -36,6 +36,7 @@ LlamaForSequenceClassification, LlamaForTokenClassification, LlamaModel, + LlamaPreTrainedModel, apply_rotary_pos_emb, repeat_kv, ) @@ -803,6 +804,10 @@ def forward( return outputs +class GemmaPreTrainedModel(LlamaPreTrainedModel): + pass + + class GemmaModel(LlamaModel): def __init__(self, config: GemmaConfig): super().__init__(config) @@ -1040,4 +1045,5 @@ def __init__(self, config): "GemmaForCausalLM", "GemmaForSequenceClassification", "GemmaForTokenClassification", + "GemmaPreTrainedModel", ] diff --git a/src/transformers/models/gemma/tokenization_gemma_fast.py b/src/transformers/models/gemma/tokenization_gemma_fast.py index fd7a979e8b7509..0e6f4a20b6d6d7 100644 --- a/src/transformers/models/gemma/tokenization_gemma_fast.py +++ b/src/transformers/models/gemma/tokenization_gemma_fast.py @@ -197,3 +197,6 @@ def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None): output = output + bos_token_id + token_ids_1 + eos_token_id return output + + +__all__ = ["GemmaTokenizerFast"] diff --git a/src/transformers/models/gemma2/__init__.py b/src/transformers/models/gemma2/__init__.py index ce59dfd8c7ac5a..18905bac42cc6b 100644 --- a/src/transformers/models/gemma2/__init__.py +++ b/src/transformers/models/gemma2/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2024 The HuggingFace Inc. team. All rights reserved. +# Copyright 2024 The HuggingFace Team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,49 +13,15 @@ # limitations under the License. from typing import TYPE_CHECKING -from ...utils import ( - OptionalDependencyNotAvailable, - _LazyModule, - is_torch_available, -) +from ...utils import _LazyModule +from ...utils.import_utils import define_import_structure -_import_structure = { - "configuration_gemma2": ["Gemma2Config"], -} - -try: - if not is_torch_available(): - raise OptionalDependencyNotAvailable() -except OptionalDependencyNotAvailable: - pass -else: - _import_structure["modeling_gemma2"] = [ - "Gemma2ForCausalLM", - "Gemma2Model", - "Gemma2PreTrainedModel", - "Gemma2ForSequenceClassification", - "Gemma2ForTokenClassification", - ] - if TYPE_CHECKING: - from .configuration_gemma2 import Gemma2Config - - try: - if not is_torch_available(): - raise OptionalDependencyNotAvailable() - except OptionalDependencyNotAvailable: - pass - else: - from .modeling_gemma2 import ( - Gemma2ForCausalLM, - Gemma2ForSequenceClassification, - Gemma2ForTokenClassification, - Gemma2Model, - Gemma2PreTrainedModel, - ) - + from .configuration_gemma2 import * + from .modeling_gemma2 import * else: import sys - sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__) + _file = globals()["__file__"] + sys.modules[__name__] = _LazyModule(__name__, _file, define_import_structure(_file), module_spec=__spec__) diff --git a/src/transformers/models/gemma2/configuration_gemma2.py b/src/transformers/models/gemma2/configuration_gemma2.py index eb562b3a6893bd..dc2eba7893a058 100644 --- a/src/transformers/models/gemma2/configuration_gemma2.py +++ b/src/transformers/models/gemma2/configuration_gemma2.py @@ -153,3 +153,6 @@ def __init__( self.final_logit_softcapping = final_logit_softcapping self.attn_logit_softcapping = attn_logit_softcapping self.cache_implementation = cache_implementation + + +__all__ = ["Gemma2Config"] diff --git a/src/transformers/models/gemma2/modeling_gemma2.py b/src/transformers/models/gemma2/modeling_gemma2.py index 58836a5631c2c0..288913697f2641 100644 --- a/src/transformers/models/gemma2/modeling_gemma2.py +++ b/src/transformers/models/gemma2/modeling_gemma2.py @@ -1280,3 +1280,12 @@ def forward( hidden_states=outputs.hidden_states, attentions=outputs.attentions, ) + + +__all__ = [ + "Gemma2ForCausalLM", + "Gemma2Model", + "Gemma2PreTrainedModel", + "Gemma2ForSequenceClassification", + "Gemma2ForTokenClassification", +] diff --git a/src/transformers/models/gemma2/modular_gemma2.py b/src/transformers/models/gemma2/modular_gemma2.py index 7236ae2f5c9f87..5e04fe1b63a362 100644 --- a/src/transformers/models/gemma2/modular_gemma2.py +++ b/src/transformers/models/gemma2/modular_gemma2.py @@ -903,3 +903,13 @@ def __init__(self, config): super().__init__(config) self.model = Gemma2Model(config) self.post_init() + + +__all__ = [ + "Gemma2Config", + "Gemma2ForCausalLM", + "Gemma2Model", + "Gemma2PreTrainedModel", + "Gemma2ForSequenceClassification", + "Gemma2ForTokenClassification", +] diff --git a/src/transformers/models/llava_next_video/__init__.py b/src/transformers/models/llava_next_video/__init__.py index d079643e73e99d..e3632c7a2a1427 100644 --- a/src/transformers/models/llava_next_video/__init__.py +++ b/src/transformers/models/llava_next_video/__init__.py @@ -13,58 +13,17 @@ # limitations under the License. from typing import TYPE_CHECKING -from ...utils import OptionalDependencyNotAvailable, _LazyModule, is_torch_available, is_vision_available +from ...utils import _LazyModule +from ...utils.import_utils import define_import_structure -_import_structure = { - "configuration_llava_next_video": ["LlavaNextVideoConfig"], - "processing_llava_next_video": ["LlavaNextVideoProcessor"], -} - - -try: - if not is_vision_available(): - raise OptionalDependencyNotAvailable() -except OptionalDependencyNotAvailable: - pass -else: - _import_structure["image_processing_llava_next_video"] = ["LlavaNextVideoImageProcessor"] - -try: - if not is_torch_available(): - raise OptionalDependencyNotAvailable() -except OptionalDependencyNotAvailable: - pass -else: - _import_structure["modeling_llava_next_video"] = [ - "LlavaNextVideoForConditionalGeneration", - "LlavaNextVideoPreTrainedModel", - ] - if TYPE_CHECKING: - from .configuration_llava_next_video import LlavaNextVideoConfig - from .processing_llava_next_video import LlavaNextVideoProcessor - - try: - if not is_vision_available(): - raise OptionalDependencyNotAvailable() - except OptionalDependencyNotAvailable: - pass - else: - from .image_processing_llava_next_video import LlavaNextVideoImageProcessor - - try: - if not is_torch_available(): - raise OptionalDependencyNotAvailable() - except OptionalDependencyNotAvailable: - pass - else: - from .modeling_llava_next_video import ( - LlavaNextVideoForConditionalGeneration, - LlavaNextVideoPreTrainedModel, - ) - + from .configuration_llava_next_video import * + from .image_processing_llava_next_video import * + from .modeling_llava_next_video import * + from .processing_llava_next_video import * else: import sys - sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure) + _file = globals()["__file__"] + sys.modules[__name__] = _LazyModule(__name__, _file, define_import_structure(_file), module_spec=__spec__) diff --git a/src/transformers/models/llava_next_video/configuration_llava_next_video.py b/src/transformers/models/llava_next_video/configuration_llava_next_video.py index 2fe889da60336b..e608e5a0d20ece 100644 --- a/src/transformers/models/llava_next_video/configuration_llava_next_video.py +++ b/src/transformers/models/llava_next_video/configuration_llava_next_video.py @@ -158,3 +158,6 @@ def __init__( self.text_config = text_config super().__init__(tie_word_embeddings=tie_word_embeddings, **kwargs) + + +__all__ = ["LlavaNextVideoConfig"] diff --git a/src/transformers/models/llava_next_video/image_processing_llava_next_video.py b/src/transformers/models/llava_next_video/image_processing_llava_next_video.py index 59d0d9d9447252..f30e2c54fe90a3 100644 --- a/src/transformers/models/llava_next_video/image_processing_llava_next_video.py +++ b/src/transformers/models/llava_next_video/image_processing_llava_next_video.py @@ -414,3 +414,6 @@ def preprocess( data = {"pixel_values_videos": pixel_values} return BatchFeature(data=data, tensor_type=return_tensors) + + +__all__ = ["LlavaNextVideoImageProcessor"] diff --git a/src/transformers/models/llava_next_video/modeling_llava_next_video.py b/src/transformers/models/llava_next_video/modeling_llava_next_video.py index b0a20d6c5ccd93..7cd7e18abaf3e0 100644 --- a/src/transformers/models/llava_next_video/modeling_llava_next_video.py +++ b/src/transformers/models/llava_next_video/modeling_llava_next_video.py @@ -122,21 +122,6 @@ def forward(self, image_features): return image_features_spatial_pool.flatten(2).transpose(1, 2).contiguous() -class LlavaNextVideoMultiModalProjector(nn.Module): - def __init__(self, config: LlavaNextVideoConfig): - super().__init__() - - self.linear_1 = nn.Linear(config.vision_config.hidden_size, config.text_config.hidden_size, bias=True) - self.act = ACT2FN[config.projector_hidden_act] - self.linear_2 = nn.Linear(config.text_config.hidden_size, config.text_config.hidden_size, bias=True) - - def forward(self, image_features): - hidden_states = self.linear_1(image_features) - hidden_states = self.act(hidden_states) - hidden_states = self.linear_2(hidden_states) - return hidden_states - - LLAVA_NEXT_VIDEO_START_DOCSTRING = r""" This model inherits from [`PreTrainedModel`]. Check the superclass documentation for the generic methods the library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads @@ -191,6 +176,21 @@ def _init_weights(self, module): module.weight.data[module.padding_idx].zero_() +class LlavaNextVideoMultiModalProjector(nn.Module): + def __init__(self, config: LlavaNextVideoConfig): + super().__init__() + + self.linear_1 = nn.Linear(config.vision_config.hidden_size, config.text_config.hidden_size, bias=True) + self.act = ACT2FN[config.projector_hidden_act] + self.linear_2 = nn.Linear(config.text_config.hidden_size, config.text_config.hidden_size, bias=True) + + def forward(self, image_features): + hidden_states = self.linear_1(image_features) + hidden_states = self.act(hidden_states) + hidden_states = self.linear_2(hidden_states) + return hidden_states + + def get_anyres_image_grid_shape(image_size, grid_pinpoints, patch_size): """ Calculate the shape of the image patch grid after the preprocessing for images of any resolution. @@ -1157,3 +1157,6 @@ def get_video_features( video_features = self.multi_modal_projector(video_features) video_features = torch.split(video_features, frames, dim=0) return video_features + + +__all__ = ["LlavaNextVideoForConditionalGeneration", "LlavaNextVideoPreTrainedModel"] diff --git a/src/transformers/models/llava_next_video/modular_llava_next_video.py b/src/transformers/models/llava_next_video/modular_llava_next_video.py index 3d6431d7ea29ba..94c1432a41b1f1 100644 --- a/src/transformers/models/llava_next_video/modular_llava_next_video.py +++ b/src/transformers/models/llava_next_video/modular_llava_next_video.py @@ -24,6 +24,7 @@ from transformers.models.llava_next.modeling_llava_next import ( LlavaNextCausalLMOutputWithPast, LlavaNextForConditionalGeneration, + LlavaNextPreTrainedModel, image_size_to_num_patches, ) @@ -218,6 +219,10 @@ def forward(self, image_features): return image_features_spatial_pool.flatten(2).transpose(1, 2).contiguous() +class LlavaNextVideoPreTrainedModel(LlavaNextPreTrainedModel): + pass + + class LlavaNextVideoForConditionalGeneration(LlavaNextForConditionalGeneration): def __init__(self, config: LlavaNextVideoConfig, **super_kwargs): super().__init__(config, **super_kwargs) @@ -641,3 +646,6 @@ def prepare_inputs_for_generation( model_inputs["image_sizes"] = image_sizes return model_inputs + + +__all__ = ["LlavaNextVideoConfig", "LlavaNextVideoForConditionalGeneration", "LlavaNextVideoPreTrainedModel"] diff --git a/src/transformers/models/llava_next_video/processing_llava_next_video.py b/src/transformers/models/llava_next_video/processing_llava_next_video.py index 65195b77240721..857ee28a080041 100644 --- a/src/transformers/models/llava_next_video/processing_llava_next_video.py +++ b/src/transformers/models/llava_next_video/processing_llava_next_video.py @@ -291,3 +291,6 @@ def model_input_names(self): tokenizer_input_names = self.tokenizer.model_input_names image_processor_input_names = self.image_processor.model_input_names return list(dict.fromkeys(tokenizer_input_names + image_processor_input_names)) + + +__all__ = ["LlavaNextVideoProcessor"] diff --git a/src/transformers/models/starcoder2/__init__.py b/src/transformers/models/starcoder2/__init__.py index d9dc2cd1e5001c..6349255ed3a475 100644 --- a/src/transformers/models/starcoder2/__init__.py +++ b/src/transformers/models/starcoder2/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2024 BigCode and The HuggingFace Inc. team. All rights reserved. +# Copyright 2024 The HuggingFace Team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,52 +13,15 @@ # limitations under the License. from typing import TYPE_CHECKING -from ...utils import ( - OptionalDependencyNotAvailable, - _LazyModule, - is_torch_available, -) - - -_import_structure = { - "configuration_starcoder2": ["Starcoder2Config"], -} - - -try: - if not is_torch_available(): - raise OptionalDependencyNotAvailable() -except OptionalDependencyNotAvailable: - pass -else: - _import_structure["modeling_starcoder2"] = [ - "Starcoder2ForCausalLM", - "Starcoder2Model", - "Starcoder2PreTrainedModel", - "Starcoder2ForSequenceClassification", - "Starcoder2ForTokenClassification", - ] +from ...utils import _LazyModule +from ...utils.import_utils import define_import_structure if TYPE_CHECKING: - from .configuration_starcoder2 import Starcoder2Config - - try: - if not is_torch_available(): - raise OptionalDependencyNotAvailable() - except OptionalDependencyNotAvailable: - pass - else: - from .modeling_starcoder2 import ( - Starcoder2ForCausalLM, - Starcoder2ForSequenceClassification, - Starcoder2ForTokenClassification, - Starcoder2Model, - Starcoder2PreTrainedModel, - ) - - + from .configuration_starcoder2 import * + from .modeling_starcoder2 import * else: import sys - sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__) + _file = globals()["__file__"] + sys.modules[__name__] = _LazyModule(__name__, _file, define_import_structure(_file), module_spec=__spec__) diff --git a/src/transformers/models/starcoder2/configuration_starcoder2.py b/src/transformers/models/starcoder2/configuration_starcoder2.py index 5749eb68358468..7f21d1f12d8b22 100644 --- a/src/transformers/models/starcoder2/configuration_starcoder2.py +++ b/src/transformers/models/starcoder2/configuration_starcoder2.py @@ -197,3 +197,6 @@ def __init__( eos_token_id=eos_token_id, **kwargs, ) + + +__all__ = ["Starcoder2Config"] diff --git a/src/transformers/models/starcoder2/modeling_starcoder2.py b/src/transformers/models/starcoder2/modeling_starcoder2.py index eb218accdb8c03..8047e23bb05bd8 100644 --- a/src/transformers/models/starcoder2/modeling_starcoder2.py +++ b/src/transformers/models/starcoder2/modeling_starcoder2.py @@ -1324,3 +1324,12 @@ def forward( hidden_states=outputs.hidden_states, attentions=outputs.attentions, ) + + +__all__ = [ + "Starcoder2ForCausalLM", + "Starcoder2Model", + "Starcoder2PreTrainedModel", + "Starcoder2ForSequenceClassification", + "Starcoder2ForTokenClassification", +] diff --git a/src/transformers/models/starcoder2/modular_starcoder2.py b/src/transformers/models/starcoder2/modular_starcoder2.py index a1cec871baca28..013c8e472b325d 100644 --- a/src/transformers/models/starcoder2/modular_starcoder2.py +++ b/src/transformers/models/starcoder2/modular_starcoder2.py @@ -544,3 +544,12 @@ class Starcoder2ForSequenceClassification(LlamaForSequenceClassification): class Starcoder2ForTokenClassification(LlamaForTokenClassification): pass + + +__all__ = [ + "Starcoder2ForCausalLM", + "Starcoder2Model", + "Starcoder2PreTrainedModel", + "Starcoder2ForSequenceClassification", + "Starcoder2ForTokenClassification", +]