Skip to content

Commit

Permalink
add test
Browse files Browse the repository at this point in the history
  • Loading branch information
yingtongxiong committed Oct 25, 2023
1 parent 41cfa1a commit 0bac166
Show file tree
Hide file tree
Showing 200 changed files with 33 additions and 35,183 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -149,5 +149,9 @@ memory_trace
13b_train*/
30b_train*/
fstp_logs/
configs/7B_train/*
configs/13B_train/*
configs/30B_train/*

atb
pip
8 changes: 4 additions & 4 deletions configs/13B_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
DO_ALERT = False

SEQ_LEN = {seq_len}
JOB_NAME = "13b_train_" + str(SEQ_LEN) + "_" + str({sp}) + "_" + str({checkpoint})
JOB_NAME = "13b_train_" + str(SEQ_LEN) + "_" + str({sp}) + "_" + str({intern_overlap}) + "_" + str({checkpoint})
HIDDEN_SIZE = 5120
NUM_ATTENTION_HEAD = 40
MLP_RATIO = 8 / 3
Expand Down Expand Up @@ -50,9 +50,9 @@
data = dict(
seq_len=SEQ_LEN,
# micro_num means the number of micro_batch contained in one gradient update
micro_num=4,
micro_num=1,
# packed_length = micro_bsz * SEQ_LEN
micro_bsz=2,
micro_bsz=1,
# defaults to the value of micro_num
valid_micro_num=4,
# defaults to 0, means disable evaluate
Expand Down Expand Up @@ -91,7 +91,7 @@
hybrid_zero_optimizer = dict(
# Enable low_level_optimzer overlap_communication
overlap_sync_grad=True,
overlap_sync_param=True,
overlap_sync_param=False,
# bucket size for nccl communication params
reduce_bucket_size=512 * 1024 * 1024,
# grad clipping
Expand Down
180 changes: 0 additions & 180 deletions configs/13B_train/131072_flash-attn_ckpt_False.py

This file was deleted.

Loading

0 comments on commit 0bac166

Please sign in to comment.