ModelTC · helloyongyang · Nov 23, 2024 · Nov 23, 2024
diff --git a/configs/quantization/methods/Awq/awq_w_only_vlm.yml b/configs/quantization/methods/Awq/awq_w_only_vlm.yml
@@ -0,0 +1,46 @@
+base:
+    seed: &seed 42
+model:
+    type: model_type
+    path: model path
+    tokenizer_mode: slow
+    torch_dtype: auto
+calib:
+    name: vlm_datastes
+    type: img_txt
+    download: False
+    path: calib data path
+    n_samples: 128
+    bs: -1
+    seq_len: 512
+    preproc: vlm_general
+    padding: True
+    seed: *seed
+eval:
+    eval_pos: [pretrain, fake_quant]
+    type: img_txt
+    name: MME
+    download: False
+    path: MME dataset path
+    bs: 16
+    inference_per_block: False
+quant:
+    method: Awq
+    quant_objects: [vision, language] # default is [language]
+    weight:
+        bit: 4
+        symmetric: False
+        granularity: per_group
+        group_size: 128
+    special:
+        trans: True
+        # The options for "trans_version" include "v1" and "v2".
+        # But their results don't differ significantly.
+        trans_version: v2
+        weight_clip: True
+        # For 2-bit quantization, setting "clip_sym: False" will yield better results.
+        clip_sym: True
+save:
+    save_trans: False
+    save_fake: False
+    save_path: /path/to/save/
diff --git a/llmc/models/internvl2.py b/llmc/models/internvl2.py
@@ -252,5 +252,6 @@ def get_vision_subsets_in_block(self, block):
                 'inspect': block.mlp.fc2,
                 'has_kwargs': False,
                 'is_mlp': True,
+                'do_trans': False
             },
         ]
diff --git a/llmc/models/llava.py b/llmc/models/llava.py
@@ -124,5 +124,6 @@ def get_vision_subsets_in_block(self, block):
                 'inspect': block.mlp.fc2,
                 'has_kwargs': False,
                 'is_mlp': True,
+                'do_trans': False
             },
         ]
diff --git a/llmc/models/qwen2vl.py b/llmc/models/qwen2vl.py
@@ -154,6 +154,7 @@ def get_vision_subsets_in_block(self, block):
                 'inspect': block.mlp.fc2,
                 'has_kwargs': False,
                 'is_mlp': True,
+                'do_trans': False
             },
         ]