[Misc] Improve BNB loader to handle mixture of sharded and merged wei…

…ghts with same suffix (#11566) Signed-off-by: Isotr0py <[email protected]>
vllm-project · Dec 27, 2024 · dde1fa1 · dde1fa1
1 parent 0240402
commit dde1fa1
Showing 1 changed file with 5 additions and 2 deletions.
diff --git a/vllm/model_executor/model_loader/loader.py b/vllm/model_executor/model_loader/loader.py
@@ -1001,8 +1001,11 @@ def _get_bnb_target_modules(self, model: nn.Module) -> None:
                     for sub_name in sub_modules:
                         self.target_modules.append(
                             name.replace(last_name, sub_name))
-                else:
-                    self.target_modules.append(name)
+                # Add original module name even if the module has stacked map,
+                # in case model has a mixture of disk-merged and disk-splitted
+                # weights with same last name.
+                self.target_modules.append(name)
+
         assert (self.target_modules
                 ), "vllm currently does not support BNB quantization for"
         f" {type(model).__name__}"