diff --git a/src/transformers/integrations/deepspeed.py b/src/transformers/integrations/deepspeed.py index aae1204acf488c..622080d413573b 100644 --- a/src/transformers/integrations/deepspeed.py +++ b/src/transformers/integrations/deepspeed.py @@ -241,7 +241,7 @@ def trainer_config_finalize(self, args, model, num_training_steps): # automatically assign the optimal config values based on model config self.fill_only( "zero_optimization.stage3_prefetch_bucket_size", - 0.9 * hidden_size * hidden_size, + int(0.9 * hidden_size * hidden_size), ) self.fill_only( "zero_optimization.stage3_param_persistence_threshold",