-
Notifications
You must be signed in to change notification settings - Fork 12
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Extracted Subset of AutoGPTQ library into Accelerated-Peft Plugin (#48)
* added gptqmodel to plugin * edited peft header * add package build workflow * add unit tests on extracted autogptq * modify autogptq plugin to support both external and extracted autogptq * addressed additional PR changes * reintroduce support for low_cpu_mem_usage in extracted lib * Use transformers package checking instead of importlib * formatting * linting * add additional entry to requirements.txt * fixed union type backward compatibility with py39 * Fix FOAK dequant for compatibility with local gptq package * add benchmark comparison script * modified comparison script * formatted scripts/ * edited comparison script to detect difference in command args * addresed PR edits * updated benchmarks * Add comment for foak kernel
- Loading branch information
Showing
47 changed files
with
6,012 additions
and
188 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,13 +1,13 @@ | ||
# decide not to have this as an requirement for now | ||
# fms_acceleration @ git+https://github.com/foundation-model-stack/fms-acceleration.git#subdirectory=plugins/framework | ||
|
||
# put this in here because there is a breaking FSDP api change that | ||
# is fixed after peft > 0.10 | ||
accelerate < 0.29 | ||
# Needs a lower bound due to`accelerate.load_checkpoint_in_model` function used in gptqmodel | ||
accelerate >= 0.29 | ||
|
||
# bitsandbytes for the BNB plugin | ||
bitsandbytes | ||
|
||
# Installing from repository because "auto_gptq > 0.7.1" it not yet available | ||
# Specifying the commit id here as recent commits to the main branch have introduced additional dependencies | ||
auto_gptq @ git+https://github.com/AutoGPTQ/AutoGPTQ.git@ea829c7bbe83561c2b1de26795b6592992373ef7 | ||
# Used to manage the thread limit in functions for converting old | ||
# GPTQ models to new GPTQ model format that support symmetrical=False | ||
# https://github.com/AutoGPTQ/AutoGPTQ/pull/640 | ||
threadpoolctl |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
19 changes: 19 additions & 0 deletions
19
plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/__init__.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
############################################################################### | ||
# Adapted from https://github.com/ModelCloud/GPTQModel | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
############################################################################### | ||
# Local | ||
from .models import GPTQModel | ||
from .quantization import BaseQuantizeConfig, QuantizeConfig | ||
from .utils import Backend, get_backend |
26 changes: 26 additions & 0 deletions
26
plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/models/__init__.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
############################################################################### | ||
# Adapted from https://github.com/ModelCloud/GPTQModel | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
############################################################################### | ||
# Local | ||
from .auto import MODEL_MAP, GPTQModel | ||
from .base import BaseGPTQModel | ||
from .dbrx import DbrxGPTQ | ||
from .dbrx_converted import DbrxConvertedGPTQ | ||
from .gemma import GemmaGPTQ | ||
from .gpt_bigcode import GPTBigCodeGPTQ | ||
from .gpt_neox import GPTNeoXGPTQ | ||
from .llama import LlamaGPTQ | ||
from .mistral import MistralGPTQ | ||
from .mixtral import MixtralGPTQ |
Oops, something went wrong.