diff --git a/llmfoundry/command_utils/train.py b/llmfoundry/command_utils/train.py index 3b94e57a6c..46d7e2c61a 100644 --- a/llmfoundry/command_utils/train.py +++ b/llmfoundry/command_utils/train.py @@ -65,11 +65,6 @@ log = logging.getLogger(__name__) -ic.configureOutput(includeContext=True) -install() - -ic.disable() - def validate_config(train_config: TrainConfig): """Validates compatible model and dataloader selection.""" # Validate the rest of the config diff --git a/llmfoundry/models/layers/blocks.py b/llmfoundry/models/layers/blocks.py index cf07e453fa..c88cf33d1b 100644 --- a/llmfoundry/models/layers/blocks.py +++ b/llmfoundry/models/layers/blocks.py @@ -206,7 +206,6 @@ def forward( m = self.norm_2(x) n = self.apply_ffn(attention_mask, m) - ic(x.shape, x.device, m.shape, m.device, n.shape, n.device) # In the following line we move the `x` tensor to the same devices as the output of ffn layer. This operation should be a no-op during training. # This is done to fix pipeline parallel generation using hf.generate. Please see this comment for details: https://github.com/mosaicml/llm-foundry/pull/1332#issue-2386827204 x = x.to(device=n.device, diff --git a/llmfoundry/models/mpt/modeling_mpt.py b/llmfoundry/models/mpt/modeling_mpt.py index 770af5a9cf..e3f6f0575e 100644 --- a/llmfoundry/models/mpt/modeling_mpt.py +++ b/llmfoundry/models/mpt/modeling_mpt.py @@ -966,7 +966,6 @@ def forward( if prev_layer_key_value is not None: extra_kwargs['prev_layer_key_value'] = prev_layer_key_value - ic(type(x), type(past_key_value), type(attn_bias), type(attention_mask)) x, attn_weights, present = block( x, past_key_value=past_key_value, @@ -1144,7 +1143,6 @@ def forward( use_cache if use_cache is not None else self.config.use_cache ) - ic(type(input_ids)) outputs = self.transformer( input_ids=input_ids, past_key_values=past_key_values, @@ -1157,7 +1155,6 @@ def forward( inputs_embeds=inputs_embeds, position_ids=position_ids, ) - ic(outputs) if self.lm_head is not None: logits = self.lm_head(outputs.last_hidden_state) diff --git a/llmfoundry/models/utils/tp_strategy.py b/llmfoundry/models/utils/tp_strategy.py index 158daa6425..35c87b86dd 100644 --- a/llmfoundry/models/utils/tp_strategy.py +++ b/llmfoundry/models/utils/tp_strategy.py @@ -17,7 +17,6 @@ def ffn_tp_strategy(model: ComposerModel) -> dict[str, ParallelStyle]: # generate layer plan layer_plan: dict[str, ParallelStyle] = {} for name, _ in model.named_modules(): - ic(name) if name.split('.')[-2:] == ['ffn', 'up_proj']: layer_plan[name] = ColwiseParallel( input_layouts = Replicate(),