diff --git a/examples/qwen/README.md b/examples/qwen/README.md new file mode 100644 index 0000000000..b62fb4101b --- /dev/null +++ b/examples/qwen/README.md @@ -0,0 +1,10 @@ +# Qwen + +TODO + +# Qwen2 MoE + +✅ multipack +✅ qwen2_moe 4-bit QLoRA +✅ qwen2_moe 16-bit LoRA +❓ qwen2_moe 8-bit LoRA diff --git a/examples/qwen/qwen2-moe-lora.yaml b/examples/qwen/qwen2-moe-lora.yaml new file mode 100644 index 0000000000..c59b282d0a --- /dev/null +++ b/examples/qwen/qwen2-moe-lora.yaml @@ -0,0 +1,64 @@ +base_model: Qwen/Qwen1.5-MoE-A2.7B +trust_remote_code: true + +load_in_8bit: false +load_in_4bit: false +strict: false + +datasets: + - path: mhenrichsen/alpaca_2k_test + type: alpaca +dataset_prepared_path: +val_set_size: 0.05 +output_dir: ./out + +sequence_len: 1024 # supports up to 32k +sample_packing: false +pad_to_sequence_len: false + +adapter: lora +lora_model_dir: +lora_r: 32 +lora_alpha: 16 +lora_dropout: 0.05 +lora_target_linear: true +lora_fan_in_fan_out: + +wandb_project: +wandb_entity: +wandb_watch: +wandb_name: +wandb_log_model: + +gradient_accumulation_steps: 4 +micro_batch_size: 1 +num_epochs: 4 +optimizer: paged_adamw_8bit +lr_scheduler: cosine +learning_rate: 0.0002 + +train_on_inputs: false +group_by_length: false +bf16: auto +fp16: +tf32: true + +gradient_checkpointing: true +gradient_checkpointing_kwargs: + use_reentrant: false +early_stopping_patience: +resume_from_checkpoint: +local_rank: +logging_steps: 1 +xformers_attention: +flash_attention: true + +warmup_steps: 10 +evals_per_epoch: 4 +saves_per_epoch: 1 +debug: +deepspeed: +weight_decay: 0.0 +fsdp: +fsdp_config: +special_tokens: diff --git a/examples/qwen/qwen2-moe-qlora.yaml b/examples/qwen/qwen2-moe-qlora.yaml new file mode 100644 index 0000000000..2242eac0d4 --- /dev/null +++ b/examples/qwen/qwen2-moe-qlora.yaml @@ -0,0 +1,64 @@ +base_model: Qwen/Qwen1.5-MoE-A2.7B +trust_remote_code: true + +load_in_8bit: false +load_in_4bit: true +strict: false + +datasets: + - path: mhenrichsen/alpaca_2k_test + type: alpaca +dataset_prepared_path: +val_set_size: 0.05 +output_dir: ./out + +sequence_len: 1024 # supports up to 32k +sample_packing: false +pad_to_sequence_len: false + +adapter: lora +lora_model_dir: +lora_r: 32 +lora_alpha: 16 +lora_dropout: 0.05 +lora_target_linear: true +lora_fan_in_fan_out: + +wandb_project: +wandb_entity: +wandb_watch: +wandb_name: +wandb_log_model: + +gradient_accumulation_steps: 4 +micro_batch_size: 1 +num_epochs: 4 +optimizer: paged_adamw_8bit +lr_scheduler: cosine +learning_rate: 0.0002 + +train_on_inputs: false +group_by_length: false +bf16: auto +fp16: +tf32: true + +gradient_checkpointing: true +gradient_checkpointing_kwargs: + use_reentrant: false +early_stopping_patience: +resume_from_checkpoint: +local_rank: +logging_steps: 1 +xformers_attention: +flash_attention: true + +warmup_steps: 10 +evals_per_epoch: 4 +saves_per_epoch: 1 +debug: +deepspeed: +weight_decay: 0.0 +fsdp: +fsdp_config: +special_tokens: diff --git a/requirements.txt b/requirements.txt index b3db07d056..8733885d56 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ --extra-index-url https://huggingface.github.io/autogptq-index/whl/cu118/ packaging==23.2 -peft==0.9.0 -transformers @ git+https://github.com/huggingface/transformers.git@73a73b415e36f41481369f6129cb4b62bb127a78 +peft==0.10.0 +transformers @ git+https://github.com/huggingface/transformers.git@43d17c18360ac9c3d3491389328e2fe55fe8f9ce tokenizers==0.15.0 bitsandbytes==0.43.0 accelerate==0.28.0 @@ -39,4 +39,4 @@ s3fs gcsfs # adlfs -trl @ git+https://github.com/huggingface/trl.git@304e208f778a5442c30cdda500348226cdc97d90 +trl @ git+https://github.com/huggingface/trl.git@0ee349dcd43b0f4b3169449f16751c38ac4a609f diff --git a/src/axolotl/monkeypatch/multipack.py b/src/axolotl/monkeypatch/multipack.py index a8f5e7a84f..c1eb3127d2 100644 --- a/src/axolotl/monkeypatch/multipack.py +++ b/src/axolotl/monkeypatch/multipack.py @@ -12,6 +12,7 @@ SUPPORTED_MULTIPACK_MODEL_TYPES = [ "mixtral", "qwen2", + "qwen2_moe", "falcon", "phi", "gemma", @@ -31,6 +32,10 @@ def patch_for_multipack(model_type, model_name=None): transformers.models.qwen2.modeling_qwen2._get_unpad_data = ( # pylint: disable=protected-access get_unpad_data ) + elif model_type == "qwen2_moe": + transformers.models.qwen2_moe.modeling_qwen2_moe._get_unpad_data = ( # pylint: disable=protected-access + get_unpad_data + ) elif model_type == "falcon": transformers.models.falcon.modeling_falcon._get_unpad_data = ( # pylint: disable=protected-access get_unpad_data diff --git a/src/axolotl/utils/models.py b/src/axolotl/utils/models.py index 31686f6006..adf13e3c06 100644 --- a/src/axolotl/utils/models.py +++ b/src/axolotl/utils/models.py @@ -456,7 +456,7 @@ def load_model( "bnb_4bit_quant_type": "nf4", "bnb_4bit_quant_storage": torch.bfloat16, } - if cfg.model_config_type == "jamba" and not cfg.deepspeed: + if not cfg.deepspeed: # for some reason, this causes the loss to be off by an order of magnitude # but deepspeed needs this still in bfloat16 bnb_config["bnb_4bit_quant_storage"] = torch.float32