Skip to content

Commit

Permalink
undo yaml changes
Browse files Browse the repository at this point in the history
  • Loading branch information
dakinggg committed Sep 29, 2023
1 parent 7cffafe commit aa324c6
Showing 1 changed file with 9 additions and 19 deletions.
28 changes: 9 additions & 19 deletions scripts/train/yamls/pretrain/mpt-125m.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ max_seq_len: 2048
global_seed: 17

# Run Name
run_name: test-mlflow-register-3
run_name: # If left blank, will be read from env var $RUN_NAME

# Model
model:
Expand All @@ -31,7 +31,7 @@ train_loader:
dataset:
local: ${data_local}
remote: ${data_remote}
split: train_small
split: train
shuffle: true
max_seq_len: ${max_seq_len}
shuffle_seed: ${global_seed}
Expand All @@ -43,7 +43,7 @@ eval_loader:
dataset:
local: ${data_local}
remote: ${data_remote}
split: val_small
split: val
shuffle: false
max_seq_len: ${max_seq_len}
shuffle_seed: ${global_seed}
Expand All @@ -70,16 +70,16 @@ algorithms:
clipping_type: norm
clipping_threshold: 1.0

max_duration: 10ba # ~ 2.5B tokens
max_duration: 4800ba # ~ 2.5B tokens
eval_interval: 500ba
eval_first: false
eval_subset_num_batches: 2
global_train_batch_size: 2
eval_subset_num_batches: -1
global_train_batch_size: 256

# System
seed: ${global_seed}
device_eval_batch_size: 1
device_train_microbatch_size: 1
device_eval_batch_size: 16
device_train_microbatch_size: 16
# device_train_microbatch_size: auto
precision: amp_bf16

Expand All @@ -104,24 +104,14 @@ callbacks:
lr_monitor: {}
memory_monitor: {}
runtime_estimator: {}
hf_checkpointer:
save_interval: 10ba
precision: bfloat16
save_folder: ./{run_name}/checkpoints
log_to_mlflow: true
uc_prefix: main.danielking

loggers:
mlflow:
experiment_name: /Users/[email protected]/mlflow-logging-test

# loggers:
# wandb: {}

# Checkpoint to local filesystem or remote object store
# save_interval: 500ba
# save_num_checkpoints_to_keep: 1 # Important, this cleans up checkpoints saved to DISK
save_folder: ./{run_name}/checkpoints
# save_folder: ./{run_name}/checkpoints
# save_folder: s3://my-bucket/my-folder/{run_name}/checkpoints

# Load from local filesystem or remote object store
Expand Down

0 comments on commit aa324c6

Please sign in to comment.