diff --git a/plugins/accelerated-peft/pyproject.toml b/plugins/accelerated-peft/pyproject.toml index a2b2f12e..a5d2bc59 100644 --- a/plugins/accelerated-peft/pyproject.toml +++ b/plugins/accelerated-peft/pyproject.toml @@ -32,7 +32,7 @@ auto_gptq = ["auto_gptq @ git+https://github.com/AutoGPTQ/AutoGPTQ.git"] files = ["requirements.txt"] [tool.hatch.build.targets.wheel] -only-include = ["src/"] +only-include = ["src/fms_acceleration_peft"] [tool.hatch.metadata] allow-direct-references = true diff --git a/plugins/accelerated-peft/src/fms_acceleration_peft/framework_plugin_autogptq.py b/plugins/accelerated-peft/src/fms_acceleration_peft/framework_plugin_autogptq.py index 63254c25..efa82c45 100644 --- a/plugins/accelerated-peft/src/fms_acceleration_peft/framework_plugin_autogptq.py +++ b/plugins/accelerated-peft/src/fms_acceleration_peft/framework_plugin_autogptq.py @@ -47,7 +47,10 @@ def __init__(self, configurations: Dict[str, Dict], use_external_lib: bool = Fal self._check_config_equal( key="peft.quantization.auto_gptq.from_quantized", value=True ) - self.use_external_lib = use_external_lib and importlib.util.find_spec("autogptq") is not None + self.use_external_lib = use_external_lib + + if self.use_external_lib: + assert importlib.util.find_spec("auto_gptq") is not None, "Unable to use external library, autogptq module not found." def model_loader(self, model_name: str, **kwargs): # guarded imports @@ -61,9 +64,9 @@ def model_loader(self, model_name: str, **kwargs): QuantLinear, ) else: - from gptqmodel import GPTQModel, QuantizeConfig - from gptqmodel.utils import Backend - from gptqmodel.nn_modules.qlinear.qlinear_tritonv2 import ( + from .gptqmodel import GPTQModel, QuantizeConfig + from .gptqmodel.utils import Backend + from .gptqmodel.nn_modules.qlinear.qlinear_tritonv2 import ( QuantLinear, ) @@ -126,22 +129,21 @@ def model_loader(self, model_name: str, **kwargs): # this is a HF method that checks if the low_cpu_mem mode is enabled # via HF accelerate - if is_fsdp_enabled(): - if self.use_external_lib: - # Local - from .autogptq_utils import ( # pylint: disable=import-outside-toplevel - _patch_target_module, - make_sure_no_tensor_in_meta_device, - ) - - # We patch `make_sure_no_tensor_in_meta_device` - # from autogptq to avoid errors on models without bias - _patch_target_module( - to_patch="auto_gptq.modeling._utils.make_sure_no_tensor_in_meta_device", - replace_with=make_sure_no_tensor_in_meta_device, - target_module="auto_gptq.modeling._base", - ) - kwargs["low_cpu_mem_usage"] = True + if is_fsdp_enabled() and self.use_external_lib: + # Local + from .autogptq_utils import ( # pylint: disable=import-outside-toplevel + _patch_target_module, + make_sure_no_tensor_in_meta_device, + ) + + # We patch `make_sure_no_tensor_in_meta_device` + # from autogptq to avoid errors on models without bias + _patch_target_module( + to_patch="auto_gptq.modeling._utils.make_sure_no_tensor_in_meta_device", + replace_with=make_sure_no_tensor_in_meta_device, + target_module="auto_gptq.modeling._base", + ) + kwargs["low_cpu_mem_usage"] = True # NOTE: need to set the device map as below as we want to use AutoGPTQ for training. # device_map is for inference only @@ -253,7 +255,7 @@ def augmentation( replace_module_peft, ) else: - from gptqmodel.utils.peft import get_gptq_peft_model + from .gptqmodel.utils.peft import get_gptq_peft_model (peft_config,) = modifiable_args # unpack modifiable args diff --git a/plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/__init__.py b/plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/__init__.py new file mode 100644 index 00000000..dd205fa8 --- /dev/null +++ b/plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/__init__.py @@ -0,0 +1,18 @@ +############################################################################### +# Adapted from https://github.com/ModelCloud/GPTQModel +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +############################################################################### +from .models import GPTQModel +from .quantization import BaseQuantizeConfig, QuantizeConfig +from .utils import Backend, get_backend \ No newline at end of file diff --git a/plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/models/__init__.py b/plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/models/__init__.py new file mode 100644 index 00000000..ac31ca08 --- /dev/null +++ b/plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/models/__init__.py @@ -0,0 +1,25 @@ +############################################################################### +# Adapted from https://github.com/ModelCloud/GPTQModel +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +############################################################################### +from .auto import MODEL_MAP, GPTQModel +from .base import BaseGPTQModel +from .dbrx import DbrxGPTQ +from .dbrx_converted import DbrxConvertedGPTQ +from .gemma import GemmaGPTQ +from .gpt_bigcode import GPTBigCodeGPTQ +from .gpt_neox import GPTNeoXGPTQ +from .llama import LlamaGPTQ +from .mistral import MistralGPTQ +from .mixtral import MixtralGPTQ diff --git a/plugins/accelerated-peft/src/gptqmodel/models/_const.py b/plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/models/_const.py similarity index 100% rename from plugins/accelerated-peft/src/gptqmodel/models/_const.py rename to plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/models/_const.py diff --git a/plugins/accelerated-peft/src/gptqmodel/models/auto.py b/plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/models/auto.py similarity index 100% rename from plugins/accelerated-peft/src/gptqmodel/models/auto.py rename to plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/models/auto.py diff --git a/plugins/accelerated-peft/src/gptqmodel/models/base.py b/plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/models/base.py similarity index 100% rename from plugins/accelerated-peft/src/gptqmodel/models/base.py rename to plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/models/base.py diff --git a/plugins/accelerated-peft/src/gptqmodel/models/dbrx.py b/plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/models/dbrx.py similarity index 100% rename from plugins/accelerated-peft/src/gptqmodel/models/dbrx.py rename to plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/models/dbrx.py diff --git a/plugins/accelerated-peft/src/gptqmodel/models/dbrx_converted.py b/plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/models/dbrx_converted.py similarity index 100% rename from plugins/accelerated-peft/src/gptqmodel/models/dbrx_converted.py rename to plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/models/dbrx_converted.py diff --git a/plugins/accelerated-peft/src/gptqmodel/models/gemma.py b/plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/models/gemma.py similarity index 100% rename from plugins/accelerated-peft/src/gptqmodel/models/gemma.py rename to plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/models/gemma.py diff --git a/plugins/accelerated-peft/src/gptqmodel/models/gpt_bigcode.py b/plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/models/gpt_bigcode.py similarity index 100% rename from plugins/accelerated-peft/src/gptqmodel/models/gpt_bigcode.py rename to plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/models/gpt_bigcode.py diff --git a/plugins/accelerated-peft/src/gptqmodel/models/gpt_neox.py b/plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/models/gpt_neox.py similarity index 100% rename from plugins/accelerated-peft/src/gptqmodel/models/gpt_neox.py rename to plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/models/gpt_neox.py diff --git a/plugins/accelerated-peft/src/gptqmodel/models/llama.py b/plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/models/llama.py similarity index 100% rename from plugins/accelerated-peft/src/gptqmodel/models/llama.py rename to plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/models/llama.py diff --git a/plugins/accelerated-peft/src/gptqmodel/models/mistral.py b/plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/models/mistral.py similarity index 100% rename from plugins/accelerated-peft/src/gptqmodel/models/mistral.py rename to plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/models/mistral.py diff --git a/plugins/accelerated-peft/src/gptqmodel/models/mixtral.py b/plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/models/mixtral.py similarity index 100% rename from plugins/accelerated-peft/src/gptqmodel/models/mixtral.py rename to plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/models/mixtral.py diff --git a/plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/nn_modules/__init__.py b/plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/nn_modules/__init__.py new file mode 100644 index 00000000..feb7a9e6 --- /dev/null +++ b/plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/nn_modules/__init__.py @@ -0,0 +1,15 @@ +############################################################################### +# Adapted from https://github.com/ModelCloud/GPTQModel +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +############################################################################### \ No newline at end of file diff --git a/plugins/accelerated-peft/src/gptqmodel/nn_modules/qlinear/__init__.py b/plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/nn_modules/qlinear/__init__.py similarity index 100% rename from plugins/accelerated-peft/src/gptqmodel/nn_modules/qlinear/__init__.py rename to plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/nn_modules/qlinear/__init__.py diff --git a/plugins/accelerated-peft/src/gptqmodel/nn_modules/qlinear/qlinear_tritonv2.py b/plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/nn_modules/qlinear/qlinear_tritonv2.py similarity index 100% rename from plugins/accelerated-peft/src/gptqmodel/nn_modules/qlinear/qlinear_tritonv2.py rename to plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/nn_modules/qlinear/qlinear_tritonv2.py diff --git a/plugins/accelerated-peft/src/gptqmodel/nn_modules/__init__.py b/plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/nn_modules/triton_utils/__init__.py similarity index 100% rename from plugins/accelerated-peft/src/gptqmodel/nn_modules/__init__.py rename to plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/nn_modules/triton_utils/__init__.py diff --git a/plugins/accelerated-peft/src/gptqmodel/nn_modules/triton_utils/custom_autotune.py b/plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/nn_modules/triton_utils/custom_autotune.py similarity index 100% rename from plugins/accelerated-peft/src/gptqmodel/nn_modules/triton_utils/custom_autotune.py rename to plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/nn_modules/triton_utils/custom_autotune.py diff --git a/plugins/accelerated-peft/src/gptqmodel/nn_modules/triton_utils/dequant.py b/plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/nn_modules/triton_utils/dequant.py similarity index 100% rename from plugins/accelerated-peft/src/gptqmodel/nn_modules/triton_utils/dequant.py rename to plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/nn_modules/triton_utils/dequant.py diff --git a/plugins/accelerated-peft/src/gptqmodel/nn_modules/triton_utils/kernels.py b/plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/nn_modules/triton_utils/kernels.py similarity index 100% rename from plugins/accelerated-peft/src/gptqmodel/nn_modules/triton_utils/kernels.py rename to plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/nn_modules/triton_utils/kernels.py diff --git a/plugins/accelerated-peft/src/gptqmodel/nn_modules/triton_utils/mixin.py b/plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/nn_modules/triton_utils/mixin.py similarity index 100% rename from plugins/accelerated-peft/src/gptqmodel/nn_modules/triton_utils/mixin.py rename to plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/nn_modules/triton_utils/mixin.py diff --git a/plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/quantization/__init__.py b/plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/quantization/__init__.py new file mode 100644 index 00000000..377ffb63 --- /dev/null +++ b/plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/quantization/__init__.py @@ -0,0 +1,19 @@ +############################################################################### +# Adapted from https://github.com/ModelCloud/GPTQModel +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +############################################################################### +from .config import (FORMAT, FORMAT_FIELD_CODE, FORMAT_FIELD_JSON, + QUANT_CONFIG_FILENAME, QUANT_METHOD, QUANT_METHOD_FIELD, BaseQuantizeConfig, QuantizeConfig) +from .gptq import GPTQ +from .quantizer import Quantizer, quantize diff --git a/plugins/accelerated-peft/src/gptqmodel/quantization/config.py b/plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/quantization/config.py similarity index 100% rename from plugins/accelerated-peft/src/gptqmodel/quantization/config.py rename to plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/quantization/config.py diff --git a/plugins/accelerated-peft/src/gptqmodel/quantization/gptq.py b/plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/quantization/gptq.py similarity index 100% rename from plugins/accelerated-peft/src/gptqmodel/quantization/gptq.py rename to plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/quantization/gptq.py diff --git a/plugins/accelerated-peft/src/gptqmodel/quantization/quantizer.py b/plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/quantization/quantizer.py similarity index 100% rename from plugins/accelerated-peft/src/gptqmodel/quantization/quantizer.py rename to plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/quantization/quantizer.py diff --git a/plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/utils/__init__.py b/plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/utils/__init__.py new file mode 100644 index 00000000..668d0859 --- /dev/null +++ b/plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/utils/__init__.py @@ -0,0 +1,16 @@ +############################################################################### +# Adapted from https://github.com/ModelCloud/GPTQModel +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +############################################################################### +from .backend import Backend, get_backend diff --git a/plugins/accelerated-peft/src/gptqmodel/utils/backend.py b/plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/utils/backend.py similarity index 100% rename from plugins/accelerated-peft/src/gptqmodel/utils/backend.py rename to plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/utils/backend.py diff --git a/plugins/accelerated-peft/src/gptqmodel/utils/data.py b/plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/utils/data.py similarity index 100% rename from plugins/accelerated-peft/src/gptqmodel/utils/data.py rename to plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/utils/data.py diff --git a/plugins/accelerated-peft/src/gptqmodel/utils/importer.py b/plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/utils/importer.py similarity index 100% rename from plugins/accelerated-peft/src/gptqmodel/utils/importer.py rename to plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/utils/importer.py diff --git a/plugins/accelerated-peft/src/gptqmodel/utils/model.py b/plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/utils/model.py similarity index 100% rename from plugins/accelerated-peft/src/gptqmodel/utils/model.py rename to plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/utils/model.py diff --git a/plugins/accelerated-peft/src/gptqmodel/utils/peft.py b/plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/utils/peft.py similarity index 84% rename from plugins/accelerated-peft/src/gptqmodel/utils/peft.py rename to plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/utils/peft.py index db27fa09..f5b0f9e6 100644 --- a/plugins/accelerated-peft/src/gptqmodel/utils/peft.py +++ b/plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/utils/peft.py @@ -21,15 +21,13 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. ############################################################################### -import warnings -from contextlib import contextmanager from typing import List, Optional, Tuple, Union import torch from peft import PeftConfig, PeftModel, PeftType, get_peft_model from peft.mapping import PEFT_TYPE_TO_CONFIG_MAPPING from peft.peft_model import PEFT_TYPE_TO_MODEL_MAPPING -from peft.tuners.lora import LoraConfig, LoraLayer, LoraModel +from peft.tuners.lora import LoraConfig, LoraModel from peft.tuners.lora.gptq import QuantLinear as LoraLinearGPTQ from ..models.base import BaseGPTQModel @@ -100,22 +98,6 @@ def find_all_linear_names( results.add(res) return list(results) - -@contextmanager -def hijack_peft_mappings(): - PEFT_TYPE_TO_CONFIG_MAPPING[PeftType.LORA] = GPTQLoraConfig - PEFT_TYPE_TO_MODEL_MAPPING[PeftType.LORA] = GPTQLoraModel - - try: - yield - except: - PEFT_TYPE_TO_CONFIG_MAPPING[PeftType.LORA] = GPTQLoraConfig - PEFT_TYPE_TO_MODEL_MAPPING[PeftType.LORA] = GPTQLoraModel - raise - finally: - PEFT_TYPE_TO_CONFIG_MAPPING[PeftType.LORA] = GPTQLoraConfig - PEFT_TYPE_TO_MODEL_MAPPING[PeftType.LORA] = GPTQLoraModel - def get_gptq_peft_model( model: BaseGPTQModel, peft_config: PeftConfig = None, @@ -129,6 +111,9 @@ def get_gptq_peft_model( if not train_mode and not model_id: raise ValueError("model_id(where to load adapters) not specified when in inference mode.") + PEFT_TYPE_TO_CONFIG_MAPPING[PeftType.LORA] = GPTQLoraConfig + PEFT_TYPE_TO_MODEL_MAPPING[PeftType.LORA] = GPTQLoraModel + if train_mode: peft_type = peft_config.peft_type if not isinstance(peft_type, str): @@ -139,16 +124,9 @@ def get_gptq_peft_model( if peft_type == PeftType.LORA.value and not isinstance(peft_config, GPTQLoraConfig): peft_config = GPTQLoraConfig(**peft_config.to_dict()) - with hijack_peft_mappings(): - try: - if train_mode: - peft_model = get_peft_model(model.model, peft_config, adapter_name=adapter_name) - else: - peft_model = PeftModel.from_pretrained(model.model, model_id, adapter_name) - except: - raise NotImplementedError( - f"{model.__class__.__name__} not support {peft_config.peft_type.value} peft type yet." - ) + peft_model = get_peft_model(model.model, peft_config, adapter_name=adapter_name) + else: + peft_model = PeftModel.from_pretrained(model.model, model_id, adapter_name) return peft_model diff --git a/plugins/accelerated-peft/src/gptqmodel/__init__.py b/plugins/accelerated-peft/src/gptqmodel/__init__.py deleted file mode 100644 index 2808d505..00000000 --- a/plugins/accelerated-peft/src/gptqmodel/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .models import GPTQModel -from .quantization import BaseQuantizeConfig, QuantizeConfig -from .utils import Backend, get_backend \ No newline at end of file diff --git a/plugins/accelerated-peft/src/gptqmodel/models/__init__.py b/plugins/accelerated-peft/src/gptqmodel/models/__init__.py deleted file mode 100644 index 5496b45e..00000000 --- a/plugins/accelerated-peft/src/gptqmodel/models/__init__.py +++ /dev/null @@ -1,10 +0,0 @@ -from .auto import MODEL_MAP, GPTQModel -from .base import BaseGPTQModel -from .dbrx import DbrxGPTQ -from .dbrx_converted import DbrxConvertedGPTQ -from .gemma import GemmaGPTQ -from .gpt_bigcode import GPTBigCodeGPTQ -from .gpt_neox import GPTNeoXGPTQ -from .llama import LlamaGPTQ -from .mistral import MistralGPTQ -from .mixtral import MixtralGPTQ diff --git a/plugins/accelerated-peft/src/gptqmodel/nn_modules/triton_utils/__init__.py b/plugins/accelerated-peft/src/gptqmodel/nn_modules/triton_utils/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/plugins/accelerated-peft/src/gptqmodel/quantization/__init__.py b/plugins/accelerated-peft/src/gptqmodel/quantization/__init__.py deleted file mode 100644 index ca86e26f..00000000 --- a/plugins/accelerated-peft/src/gptqmodel/quantization/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from .config import (FORMAT, FORMAT_FIELD_CODE, FORMAT_FIELD_JSON, - QUANT_CONFIG_FILENAME, QUANT_METHOD, QUANT_METHOD_FIELD, BaseQuantizeConfig, QuantizeConfig) -from .gptq import GPTQ -from .quantizer import Quantizer, quantize diff --git a/plugins/accelerated-peft/src/gptqmodel/utils/__init__.py b/plugins/accelerated-peft/src/gptqmodel/utils/__init__.py deleted file mode 100644 index 0e79e1d3..00000000 --- a/plugins/accelerated-peft/src/gptqmodel/utils/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .backend import Backend, get_backend diff --git a/plugins/accelerated-peft/tests/test_gptqmodel.py b/plugins/accelerated-peft/tests/test_gptqmodel.py index 84581da1..9157ccf7 100644 --- a/plugins/accelerated-peft/tests/test_gptqmodel.py +++ b/plugins/accelerated-peft/tests/test_gptqmodel.py @@ -1,3 +1,20 @@ +# Copyright The IBM Tuning Team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# SPDX-License-Identifier: Apache-2.0 +# https://spdx.dev/learn/handling-license-info/ + import pytest # pylint: disable=import-error import torch from typing import List @@ -14,7 +31,7 @@ BS = 1 SEQLEN = 128 -LOSS_TOLERANCE = 1e-3 +LOSS_TOLERANCE = 0.1 ALLCLOSE_RTOL = 1e-3 ALLCLOSE_ATOL = 1e-4 @@ -60,12 +77,12 @@ class TrainArgs: # quantization function to manage the loading and quantizing of pretrained model # using external or local autogptq -def quantize_model(model_name, config, calibration_dataset, quant_config_kwargs, device, use_external_lib=False): +def quantize_model(model_name, config, calibration_dataset, quant_config_kwargs, device, torch_dtype, use_external_lib=False): if use_external_lib: from auto_gptq import AutoGPTQForCausalLM as GPTQModel, BaseQuantizeConfig as QuantizeConfig quantize_kwargs = {"use_triton": True} else: - from gptqmodel import GPTQModel, QuantizeConfig + from fms_acceleration_peft.gptqmodel import GPTQModel, QuantizeConfig quantize_kwargs = {} quantize_config = QuantizeConfig( @@ -76,6 +93,7 @@ def quantize_model(model_name, config, calibration_dataset, quant_config_kwargs, model_name, quantize_config = quantize_config, config = config, + torch_dtype = getattr(torch, torch_dtype), ).to(device) # quantize model, the examples should be list of dict whose keys can only be "input_ids" model.quantize(calibration_dataset, **quantize_kwargs) @@ -184,6 +202,7 @@ def test_quantizing_pretrained_model_outputs_match( calibration_dataset, quant_config_kwargs, device, + FLOAT16, use_external_lib=True ) refactored_model = quantize_model( @@ -192,6 +211,7 @@ def test_quantizing_pretrained_model_outputs_match( calibration_dataset, quant_config_kwargs, device, + FLOAT16, use_external_lib=False ) @@ -228,7 +248,7 @@ def test_quantizing_pretrained_model_outputs_match( refactored_logits = refactored_model(input_ids).logits # Measure the distribution error with KD Loss - loss_fn = torch.nn.KLDivLoss(reduction="mean") + loss_fn = torch.nn.KLDivLoss(reduction="batchmean") # input should be a distribution in the log space input = torch.nn.functional.log_softmax(refactored_logits, dim=1) # target must be prob distribution