Skip to content

Commit

Permalink
feat(configs): update shell for test diff bsz
Browse files Browse the repository at this point in the history
  • Loading branch information
huangting4201 committed Oct 24, 2023
1 parent 0d3592a commit b4e21fa
Show file tree
Hide file tree
Showing 199 changed files with 24 additions and 35,197 deletions.
11 changes: 5 additions & 6 deletions configs/13B_template.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@

DO_ALERT = False

SEQ_LEN = {seq_len}
JOB_NAME = "13b_train_" + str(SEQ_LEN) + "_" + str({sp}) + "_" + str({checkpoint})
SEQ_LEN = 4096
JOB_NAME = "13b_train_" + str({micro_bsz}) + "_" + str({sp}) + "_" + str({checkpoint})
HIDDEN_SIZE = 5120
NUM_ATTENTION_HEAD = 40
MLP_RATIO = 8 / 3
Expand Down Expand Up @@ -50,9 +49,9 @@
data = dict(
seq_len=SEQ_LEN,
# micro_num means the number of micro_batch contained in one gradient update
micro_num=4,
micro_num=1,
# packed_length = micro_bsz * SEQ_LEN
micro_bsz=2,
micro_bsz={micro_bsz},
# defaults to the value of micro_num
valid_micro_num=4,
# defaults to 0, means disable evaluate
Expand Down Expand Up @@ -126,7 +125,7 @@
)

model = dict(
checkpoint={checkpoint}, # The proportion of layers for activation aheckpointing, the optional value are True/False/[0-1]
checkpoint={checkpoint},
num_attention_heads=NUM_ATTENTION_HEAD,
embed_split_hidden=True,
vocab_size=VOCAB_SIZE,
Expand Down
180 changes: 0 additions & 180 deletions configs/13B_train/131072_flash-attn_ckpt_False.py

This file was deleted.

180 changes: 0 additions & 180 deletions configs/13B_train/131072_flash-attn_ckpt_True.py

This file was deleted.

Loading

0 comments on commit b4e21fa

Please sign in to comment.