Skip to content

Commit

Permalink
Merge pull request #230 from tushar2407/version0.1.6
Browse files Browse the repository at this point in the history
Version0.1.6
  • Loading branch information
StochasticRomanAgeev authored Jul 12, 2023
2 parents 3166cef + eefec32 commit 5eac0c4
Show file tree
Hide file tree
Showing 21 changed files with 1,713 additions and 350 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -170,8 +170,8 @@ model = BaseModel.load("x/distilgpt2_lora_finetuned_alpaca")
- [x] INT4 LLaMA LoRA fine-tuning with INT4 generation
- [x] Support for a `Generic model` wrapper
- [x] Support for `Falcon-7B` model
- [X] INT4 low-precision fine-tuning support
- [ ] Evaluation of LLM models
- [ ] INT4 low-precision fine-tuning support
- [ ] INT3, INT2, INT1 low-precision fine-tuning support
- [ ] Support for Stable Diffusion

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "xturing"
version = "0.1.5"
version = "0.1.6"
description = "Fine-tuning, evaluation and data generation for LLMs"

authors = [
Expand Down
2 changes: 1 addition & 1 deletion src/xturing/__about__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.1.5"
__version__ = "0.1.6"
158 changes: 93 additions & 65 deletions src/xturing/config/finetuning_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,80 +14,87 @@ defaults:
optimizer_name: adamw
output_dir: saved_model

llama:
bloom:
learning_rate: 5e-5
weight_decay: 0.01
num_train_epochs: 3
optimizer_name: cpu_adam

llama_lora:
learning_rate: 1e-4
weight_decay: 0.01
num_train_epochs: 3
batch_size: 1

llama_lora_int8:
bloom_lora:
learning_rate: 1e-4
weight_decay: 0.01
num_train_epochs: 3
batch_size: 8
max_length: 256
batch_size: 4

llama_lora_int4:
bloom_lora_int8:
learning_rate: 1e-4
weight_decay: 0.01
num_train_epochs: 3
batch_size: 8
max_length: 256

gptj:
cerebras:
learning_rate: 5e-5
weight_decay: 0.01
num_train_epochs: 3
optimizer_name: cpu_adam

gptj_lora:
cerebras_lora:
learning_rate: 1e-4
weight_decay: 0.01
num_train_epochs: 3
batch_size: 1
batch_size: 4

gptj_lora_int8:
cerebras_lora_int8:
learning_rate: 1e-4
weight_decay: 0.01
num_train_epochs: 3
batch_size: 8
max_length: 256

gpt2:
distilgpt2:
learning_rate: 1e-3
weight_decay: 0.01
num_train_epochs: 3
batch_size: 8

gpt2_lora:
distilgpt2_lora:
learning_rate: 3e-3
weight_decay: 0.01
num_train_epochs: 3
batch_size: 16

gpt2_lora_int8:
learning_rate: 3e-3
falcon:
learning_rate: 5e-5
weight_decay: 0.01
num_train_epochs: 3
batch_size: 16
batch_size: 1
max_length: 256

distilgpt2:
learning_rate: 1e-3
falcon_int8:
learning_rate: 1e-4
weight_decay: 0.01
num_train_epochs: 3
batch_size: 1
max_length: 256

falcon_lora:
learning_rate: 1e-4
weight_decay: 0.01
num_train_epochs: 3
batch_size: 1

falcon_lora_int8:
learning_rate: 1e-4
weight_decay: 0.01
num_train_epochs: 3
batch_size: 8
max_length: 256

distilgpt2_lora:
learning_rate: 3e-3
falcon_lora_kbit:
learning_rate: 1e-4
weight_decay: 0.01
num_train_epochs: 3
batch_size: 16
batch_size: 8
max_length: 256

galactica:
learning_rate: 5e-5
Expand All @@ -108,109 +115,130 @@ galactica_lora_int8:
batch_size: 8
max_length: 256

opt:
learning_rate: 5e-5
weight_decay: 0.01
num_train_epochs: 3

opt_lora:
generic:
learning_rate: 1e-4
weight_decay: 0.01
num_train_epochs: 3
batch_size: 1
batch_size: 8
max_length: 256

opt_lora_int8:
generic_int8:
learning_rate: 1e-4
weight_decay: 0.01
num_train_epochs: 3
batch_size: 8
max_length: 256

cerebras:
learning_rate: 5e-5
generic_lora:
learning_rate: 1e-4
weight_decay: 0.01
num_train_epochs: 3
batch_size: 8
max_length: 256

cerebras_lora:
generic_lora_int8:
learning_rate: 1e-4
weight_decay: 0.01
num_train_epochs: 3
batch_size: 4
batch_size: 8
max_length: 256

cerebras_lora_int8:
generic_lora_kbit:
learning_rate: 1e-4
weight_decay: 0.01
num_train_epochs: 3
batch_size: 8
max_length: 256

bloom:
gptj:
learning_rate: 5e-5
weight_decay: 0.01
num_train_epochs: 3
optimizer_name: cpu_adam

bloom_lora:
gptj_lora:
learning_rate: 1e-4
weight_decay: 0.01
num_train_epochs: 3
batch_size: 4
batch_size: 1

bloom_lora_int8:
gptj_lora_int8:
learning_rate: 1e-4
weight_decay: 0.01
num_train_epochs: 3
batch_size: 8
max_length: 256

generic:
learning_rate: 1e-4
gpt2:
learning_rate: 1e-3
weight_decay: 0.01
num_train_epochs: 3
batch_size: 8
max_length: 256

generic_int8:
learning_rate: 1e-4
gpt2_lora:
learning_rate: 3e-3
weight_decay: 0.01
num_train_epochs: 3
batch_size: 8
max_length: 256
batch_size: 16

generic_int8_lora:
gpt2_lora_int8:
learning_rate: 3e-3
weight_decay: 0.01
num_train_epochs: 3
batch_size: 16

llama:
learning_rate: 5e-5
weight_decay: 0.01
num_train_epochs: 3
optimizer_name: cpu_adam

llama_lora:
learning_rate: 1e-4
weight_decay: 0.01
num_train_epochs: 3
batch_size: 8
max_length: 256
batch_size: 1

generic_lora:
llama_lora_int8:
learning_rate: 1e-4
weight_decay: 0.01
num_train_epochs: 3
batch_size: 8
max_length: 256

falcon:
learning_rate: 5e-5
weight_decay: 0.01

llama_lora_kbit:
learning_rate: 3e-4
num_train_epochs: 3
batch_size: 1
max_length: 256
lora_r: 32
lora_alpha: 128
lora_groupsize: 128
lora_dropout: 0.05
seed: 0
cache: False
seqlen: 2048
kl_weight: 1.0
ce_weight: 200.0
save_freq: 1
trainable_kl_weight: False
trainable_ce_weight: False
weight_decay: 1e-5
intra_save_freq: 200
groupsize: 128

falcon_int8:
learning_rate: 1e-4
opt:
learning_rate: 5e-5
weight_decay: 0.01
num_train_epochs: 3
batch_size: 1
max_length: 256

falcon_lora:
opt_lora:
learning_rate: 1e-4
weight_decay: 0.01
num_train_epochs: 3
batch_size: 1

falcon_lora_int8:
opt_lora_int8:
learning_rate: 1e-4
weight_decay: 0.01
num_train_epochs: 3
Expand Down
Loading

0 comments on commit 5eac0c4

Please sign in to comment.