From 20f6c3f58cad8055fec3e7614cfc64ade437babd Mon Sep 17 00:00:00 2001 From: Zach Mueller Date: Thu, 26 Oct 2023 19:42:32 +0000 Subject: [PATCH 1/5] Fix memory leak --- src/accelerate/utils/transformer_engine.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/accelerate/utils/transformer_engine.py b/src/accelerate/utils/transformer_engine.py index a6342d7150f..b39d8efeaf3 100644 --- a/src/accelerate/utils/transformer_engine.py +++ b/src/accelerate/utils/transformer_engine.py @@ -36,15 +36,15 @@ def convert_model(model, to_transformer_engine=True, _convert_linear=True, _conv te_module = te.Linear( module.in_features, module.out_features, bias=has_bias, params_dtype=module.weight.dtype ) - te_module.weight.data = module.weight.data.clone() + module.weight.copy_(te_module.weight) if has_bias: - te_module.bias.data = module.bias.data.clone() + module.bias.copy_(te_module.bias) setattr(model, name, te_module) elif isinstance(module, nn.LayerNorm) and to_transformer_engine and _convert_ln: te_module = te.LayerNorm(module.normalized_shape[0], eps=module.eps, params_dtype=module.weight.dtype) - te_module.weight.data = module.weight.data.clone() - te_module.bias.data = module.bias.data.clone() + module.weight.copy_(te_module.weight) + module.bias.copy_(te_module.bias) setattr(model, name, te_module) elif isinstance(module, te.Linear) and not to_transformer_engine and _convert_linear: @@ -52,15 +52,15 @@ def convert_model(model, to_transformer_engine=True, _convert_linear=True, _conv new_module = nn.Linear( module.in_features, module.out_features, bias=has_bias, params_dtype=module.weight.dtype ) - new_module.weight.data = module.weight.data.clone() + module.weight.copy_(new_module.weight) if has_bias: - new_module.bias.data = module.bias.data.clone() + module.bias.copy_(new_module.bias) setattr(model, name, new_module) elif isinstance(module, te.LayerNorm) and not to_transformer_engine and _convert_ln: new_module = nn.LayerNorm(module.normalized_shape[0], eps=module.eps, params_dtype=module.weight.dtype) - new_module.weight.data = module.weight.data.clone() - new_module.bias.data = module.bias.data.clone() + module.weight.copy_(new_module.weight) + module.bias.copy_(new_module.bias) setattr(model, name, new_module) else: From b05da0d07a5f65583f0880bc8970d2b15c5bcfa7 Mon Sep 17 00:00:00 2001 From: Zach Mueller Date: Thu, 26 Oct 2023 19:45:10 +0000 Subject: [PATCH 2/5] Change when model is moved to cuda --- src/accelerate/accelerator.py | 58 +++++++++++++++++------------------ 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/src/accelerate/accelerator.py b/src/accelerate/accelerator.py index 3bd420f8f7b..242f6401b7f 100755 --- a/src/accelerate/accelerator.py +++ b/src/accelerate/accelerator.py @@ -1357,35 +1357,6 @@ def prepare_model(self, model: torch.nn.Module, device_placement: bool = None, e " Please rerun your script specifying `--num_processes=1` or by launching with `python {{myscript.py}}`." ) - if (getattr(model, "is_loaded_in_8bit", False) or getattr(model, "is_loaded_in_4bit", False)) and getattr( - model, "hf_device_map", False - ): - model_devices = set(model.hf_device_map.values()) - if len(model_devices) > 1 and self.distributed_type != DistributedType.NO: - raise ValueError( - "You can't train a model that has been loaded in 8-bit precision on multiple devices in any distributed mode." - " In order to use 8-bit models that have been loaded across multiple GPUs the solution is to use Naive Pipeline Parallelism." - " Therefore you should not specify that you are under any distributed regime in your accelerate config." - ) - current_device = list(model_devices)[0] - current_device_index = current_device.index if isinstance(current_device, torch.device) else current_device - - if torch.device(current_device_index) != self.device: - # if on the first device (GPU 0) we don't care - if (self.device.index is not None) or (current_device_index != 0): - raise ValueError( - "You can't train a model that has been loaded in 8-bit precision on a different device than the one " - "you're training on. Make sure you loaded the model on the correct device using for example `device_map={'':torch.cuda.current_device()}" - "you're training on. Make sure you loaded the model on the correct device using for example `device_map={'':torch.cuda.current_device() or device_map={'':torch.xpu.current_device()}" - ) - - if "cpu" in model_devices or "disk" in model_devices: - raise ValueError( - "You can't train a model that has been loaded in 8-bit precision with CPU or disk offload." - ) - elif device_placement and not self.verify_device_map(model): - model = model.to(self.device) - if self.native_amp: model._original_forward = model.forward model_forward_func = model.forward.__func__ if hasattr(model.forward, "__func__") else model.forward @@ -1416,6 +1387,35 @@ def prepare_model(self, model: torch.nn.Module, device_placement: bool = None, e "or higher, compute capability of 8.9 or higher). Will use FP16 instead." ) model.forward = fp8_autocast(enabled=fp8_enabled, fp8_recipe=fp8_recipe)(model.forward) + + if (getattr(model, "is_loaded_in_8bit", False) or getattr(model, "is_loaded_in_4bit", False)) and getattr( + model, "hf_device_map", False + ): + model_devices = set(model.hf_device_map.values()) + if len(model_devices) > 1 and self.distributed_type != DistributedType.NO: + raise ValueError( + "You can't train a model that has been loaded in 8-bit precision on multiple devices in any distributed mode." + " In order to use 8-bit models that have been loaded across multiple GPUs the solution is to use Naive Pipeline Parallelism." + " Therefore you should not specify that you are under any distributed regime in your accelerate config." + ) + current_device = list(model_devices)[0] + current_device_index = current_device.index if isinstance(current_device, torch.device) else current_device + + if torch.device(current_device_index) != self.device: + # if on the first device (GPU 0) we don't care + if (self.device.index is not None) or (current_device_index != 0): + raise ValueError( + "You can't train a model that has been loaded in 8-bit precision on a different device than the one " + "you're training on. Make sure you loaded the model on the correct device using for example `device_map={'':torch.cuda.current_device()}" + "you're training on. Make sure you loaded the model on the correct device using for example `device_map={'':torch.cuda.current_device() or device_map={'':torch.xpu.current_device()}" + ) + + if "cpu" in model_devices or "disk" in model_devices: + raise ValueError( + "You can't train a model that has been loaded in 8-bit precision with CPU or disk offload." + ) + elif device_placement and not self.verify_device_map(model): + model = model.to(self.device) if not evaluation_mode: if self.distributed_type in ( DistributedType.MULTI_GPU, From f8f9b2fbbe6b266ebd819d37dcbb98a8e8813721 Mon Sep 17 00:00:00 2001 From: Zach Mueller Date: Fri, 27 Oct 2023 12:58:56 +0000 Subject: [PATCH 3/5] Add from PR --- src/accelerate/accelerator.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/accelerate/accelerator.py b/src/accelerate/accelerator.py index 242f6401b7f..cc34cd5cb1c 100755 --- a/src/accelerate/accelerator.py +++ b/src/accelerate/accelerator.py @@ -1406,7 +1406,6 @@ def prepare_model(self, model: torch.nn.Module, device_placement: bool = None, e if (self.device.index is not None) or (current_device_index != 0): raise ValueError( "You can't train a model that has been loaded in 8-bit precision on a different device than the one " - "you're training on. Make sure you loaded the model on the correct device using for example `device_map={'':torch.cuda.current_device()}" "you're training on. Make sure you loaded the model on the correct device using for example `device_map={'':torch.cuda.current_device() or device_map={'':torch.xpu.current_device()}" ) From bc58807b14555df212041ed0dc38e7b3bd5c7acf Mon Sep 17 00:00:00 2001 From: Zach Mueller Date: Fri, 27 Oct 2023 16:52:03 +0000 Subject: [PATCH 4/5] Remove link --- src/accelerate/accelerator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/accelerate/accelerator.py b/src/accelerate/accelerator.py index cc34cd5cb1c..5bb96d8c224 100755 --- a/src/accelerate/accelerator.py +++ b/src/accelerate/accelerator.py @@ -1372,7 +1372,7 @@ def prepare_model(self, model: torch.nn.Module, device_placement: bool = None, e with torch.no_grad(): convert_model(model) model._converted_to_transformer_engine = True - model._original_forward = model.forward + #model._original_forward = model.forward kwargs = self.fp8_recipe_handler.to_kwargs() if self.fp8_recipe_handler is not None else {} if "fp8_format" in kwargs: From d1df551835c14e98b066d41f9b527005c49e964f Mon Sep 17 00:00:00 2001 From: Zach Mueller Date: Tue, 31 Oct 2023 09:08:42 -0400 Subject: [PATCH 5/5] Undo original forward link --- src/accelerate/accelerator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/accelerate/accelerator.py b/src/accelerate/accelerator.py index 5bb96d8c224..cc34cd5cb1c 100755 --- a/src/accelerate/accelerator.py +++ b/src/accelerate/accelerator.py @@ -1372,7 +1372,7 @@ def prepare_model(self, model: torch.nn.Module, device_placement: bool = None, e with torch.no_grad(): convert_model(model) model._converted_to_transformer_engine = True - #model._original_forward = model.forward + model._original_forward = model.forward kwargs = self.fp8_recipe_handler.to_kwargs() if self.fp8_recipe_handler is not None else {} if "fp8_format" in kwargs: