Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fixed bug in peft installation for gptqmodel #81

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,18 @@
# these parameters are to be patched for triton v2
# consider making a map if patching more kernels
PATCH_FOR_FSDP_TRITON_V2 = ["qweight", "qzeros"]
PEFT_ALL_LINEAR = "all-linear"


def requires_installation_on_all_linears(peft_config):
tm = peft_config.target_modules
assert isinstance(tm, (list, str)), "target modules can only be list or string"
if isinstance(tm, list):
if PEFT_ALL_LINEAR not in tm:
return False
assert len(tm) == 1, f"`{PEFT_ALL_LINEAR}` must exist alone in target modules"
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ahh interesting. So we don't allow someone passing target_modules as ["all_linear", "lm_head"]? We allow this in our doc for LoRA as HF allows it for loRA https://github.com/foundation-model-stack/fms-hf-tuning?tab=readme-ov-file#lora-tuning-example (see the section "How to specify lm_head as a target module", example 3). I can update the doc however for qLoRA. Or should we keep consistent with loRA and allow both values?

Up to you! I will approve the PR so it's not blocked on me for merging for this release, but we can open new PRs to amend this behavior

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@aluu317 in your doc, I understand ["all_linear", "lm_head"] as equivalent to ["all_linear"], is that correct? That is, all-linear by default includes the lm_head also?

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no, what that doc means is running ["all_linear", "lm_head"] is same as running ["all_linear"] only. lm_head will not be produced in either way

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry I am not clear, are you saying ["all_linear", "lm_head"] will not install adapters onto lm_head?

return True
return tm == PEFT_ALL_LINEAR


def build_patch_to_view_tensor_to_parameter_for_fsdp_gptq(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,10 @@
import torch.distributed

# Local
from .autogptq_utils import register_tensors_as_parameters_patch_rule
from .autogptq_utils import (
register_tensors_as_parameters_patch_rule,
requires_installation_on_all_linears,
)


class AutoGPTQAccelerationPlugin(AccelerationPlugin):
Expand Down Expand Up @@ -318,7 +321,7 @@ def augmentation(
model = get_gptq_peft_model(
model,
peft_config=peft_config,
auto_find_all_linears=peft_config.target_modules is None,
auto_find_all_linears=requires_installation_on_all_linears(peft_config),
train_mode=True, # install adapaters for training
)
modifiable_args = (None,) # return a None for peft_config
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@

# Local
from ..models.base import BaseGPTQModel
from ..nn_modules.qlinear.qlinear_tritonv2 import QuantLinear as QuantLinearTriton
from ..nn_modules.qlinear import BaseQuantLinear


class GPTQLoraConfig(LoraConfig):
Expand All @@ -61,7 +61,7 @@ def _create_new_module(
lora_config: LoraConfig,
adapter_name: str,
target: torch.nn.Module,
target_cls: torch.nn.Module = QuantLinearTriton,
target_cls: torch.nn.Module = BaseQuantLinear,
**kwargs,
):
# if the base layer module matches a supported class, dispatch the lora linear
Expand Down Expand Up @@ -92,12 +92,12 @@ def find_all_linear_names(
):
if not ignore:
ignore = []
lm_head_name = model.lm_head_name
lm_head_name = model.lm_head
fabianlim marked this conversation as resolved.
Show resolved Hide resolved
if ignore_lm_head and lm_head_name not in ignore:
ignore.append(lm_head_name)
results = set()
for n, m in model.named_modules():
if isinstance(m, torch.nn.Linear):
if isinstance(m, BaseQuantLinear):
res = n.split(".")[-1]
if res not in ignore:
results.add(res)
Expand Down Expand Up @@ -127,6 +127,7 @@ def get_gptq_peft_model(
adapter_name: str = "default",
auto_find_all_linears: bool = True,
train_mode: bool = False,
ignore_lm_head=True,
):
if train_mode and not peft_config:
raise ValueError("peft_config not specified when in train mode.")
Expand All @@ -142,7 +143,7 @@ def get_gptq_peft_model(
if peft_type in [PeftType.LORA.value]:
if auto_find_all_linears:
peft_config.target_modules = find_all_linear_names(
model, ignore_lm_head=True
model, ignore_lm_head=ignore_lm_head
)
if peft_type == PeftType.LORA.value and not isinstance(
peft_config, GPTQLoraConfig
Expand Down
Loading