diff --git a/src/accelerate/hooks.py b/src/accelerate/hooks.py index b9bc2a86215..a42a64e3e94 100644 --- a/src/accelerate/hooks.py +++ b/src/accelerate/hooks.py @@ -156,16 +156,17 @@ def add_hook_to_module(module: nn.Module, hook: ModelHook, append: bool = False) module._hf_hook = hook @functools.wraps(old_forward) - def new_forward(*args, **kwargs): - args, kwargs = module._hf_hook.pre_forward(module, *args, **kwargs) - if module._hf_hook.no_grad: + def new_forward(self, *args, **kwargs): + args, kwargs = self._hf_hook.pre_forward(self, *args, **kwargs) + if self._hf_hook.no_grad: with torch.no_grad(): - output = old_forward(*args, **kwargs) + output = self._old_forward(*args, **kwargs) else: - output = old_forward(*args, **kwargs) - return module._hf_hook.post_forward(module, output) + output = self._old_forward(*args, **kwargs) + return self._hf_hook.post_forward(self, output) + + module.forward = new_forward.__get__(module) - module.forward = new_forward return module diff --git a/tests/test_big_modeling.py b/tests/test_big_modeling.py index a4f6a671be3..d8d31d57651 100644 --- a/tests/test_big_modeling.py +++ b/tests/test_big_modeling.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - +import copy import os import unittest from tempfile import TemporaryDirectory @@ -45,6 +45,18 @@ def forward(self, x): return self.linear2(self.batchnorm(self.linear1(x))) +class ModelForTestCopy(nn.Module): + def __init__(self, id: int): + super().__init__() + self.id = id + self.linear1 = nn.Linear(3, 4) + self.batchnorm = nn.BatchNorm1d(4) + self.linear2 = nn.Linear(4, 5) + + def forward(self, x): + return self.linear2(self.batchnorm(self.linear1(x))), self.id + + class ModelForTestTiedWeights(nn.Module): def __init__(self): super().__init__() @@ -325,6 +337,24 @@ def test_dispatch_model_multi_gpu(self): output = model(x) self.assertTrue(torch.allclose(expected, output.cpu(), atol=1e-5)) + @require_cuda + def test_dispatch_model_copy(self): + original_model = ModelForTestCopy(id=1) + device_map = {"linear1": 0, "batchnorm": "cpu", "linear2": 0} + + x = torch.randn(2, 3) + expected, original_output_id = original_model(x) + + dispatch_model(original_model, device_map) + + copied_model = copy.deepcopy(original_model) + copied_model.id = 2 + output, copied_output_id = copied_model(x) + + self.assertEqual(original_model.id, original_output_id) + self.assertEqual(copied_model.id, copied_output_id) + self.assertTrue(torch.allclose(expected, output.cpu(), atol=1e-5)) + @require_cuda def test_dispatch_model_move_offloaded_model(self): model = ModelForTest()