diff --git a/optimum/gptq/constants.py b/optimum/gptq/constants.py index 2d3e51da7a..701868a3b8 100644 --- a/optimum/gptq/constants.py +++ b/optimum/gptq/constants.py @@ -18,6 +18,10 @@ "model.decoder.layers", "gpt_neox.layers", "model.layers", + # modules loaded by AutoModel vs AutoModelForCausalLM have different prefixes + "h", + "decoder.layers", + "layers", ] GPTQ_CONFIG = "quantize_config.json" diff --git a/optimum/gptq/utils.py b/optimum/gptq/utils.py index 732ecbd66b..c32f364d2f 100644 --- a/optimum/gptq/utils.py +++ b/optimum/gptq/utils.py @@ -72,7 +72,7 @@ def get_block_name_with_pattern(model: nn.Module): modules_names = [n for n, _ in model.named_modules()] for pattern_candidate in BLOCK_PATTERNS: pattern_candidate = pattern_candidate - if any(pattern_candidate in name for name in modules_names): + if any(name.startswith(pattern_candidate) for name in modules_names): return pattern_candidate raise ValueError("Block pattern could not be match. Pass `block_name_to_quantize` argument in `quantize_model`")