Skip to content

Commit

Permalink
apply feedback of referencing trainer's pre-existing clip val
Browse files Browse the repository at this point in the history
Signed-off-by: Terry Kong <[email protected]>
  • Loading branch information
terrykong committed Jan 21, 2025
1 parent 2a699b7 commit 149f4aa
Show file tree
Hide file tree
Showing 10 changed files with 10 additions and 8 deletions.
2 changes: 1 addition & 1 deletion examples/nlp/gpt/conf/gpt_dpo.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ trainer:
devices: 8
accelerator: gpu
precision: bf16
gradient_clip_val: 0.0 # No need to change. Megatron Core optimizer uses this value
gradient_clip_val: ${trainer.dpo.gradient_clip_val} # No need to change. Megatron Core optimizer uses this value

# dpo specific args
dpo:
Expand Down
1 change: 1 addition & 0 deletions examples/nlp/gpt/conf/gpt_knowledge_distillation.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ trainer:
devices: 1
accelerator: gpu
precision: bf16
gradient_clip_val: ${trainer.knowledge_distillation.gradient_clip_val} # No need to change. Megatron Core optimizer uses this value

knowledge_distillation:
max_epochs: 1
Expand Down
2 changes: 1 addition & 1 deletion examples/nlp/gpt/conf/gpt_kto.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ trainer:
devices: 8
accelerator: gpu
precision: bf16
gradient_clip_val: 0.0 # No need to change. Megatron Core optimizer uses this value
gradient_clip_val: ${trainer.kto.gradient_clip_val} # No need to change. Megatron Core optimizer uses this value

# kto specific args
kto:
Expand Down
2 changes: 1 addition & 1 deletion examples/nlp/gpt/conf/gpt_ppo_actor.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ trainer:
devices: 8
accelerator: gpu
precision: bf16
gradient_clip_val: 0.0 # No need to change. Megatron Core optimizer uses this value
gradient_clip_val: ${trainer.ppo.gradient_clip_val} # No need to change. Megatron Core optimizer uses this value

ppo:
# How many steps we train warmup the critic for (without training the policy)
Expand Down
2 changes: 1 addition & 1 deletion examples/nlp/gpt/conf/gpt_ppo_critic.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ trainer:
devices: 8
accelerator: gpu
precision: bf16
gradient_clip_val: 0.0 # No need to change. Megatron Core optimizer uses this value
gradient_clip_val: ${trainer.ppo.gradient_clip_val} # No need to change. Megatron Core optimizer uses this value

ppo:
port: 5556
Expand Down
1 change: 1 addition & 0 deletions examples/nlp/gpt/conf/gpt_reinforce_actor.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ trainer:
devices: 8
accelerator: gpu
precision: bf16
gradient_clip_val: ${trainer.reinforce.gradient_clip_val} # No need to change. Megatron Core optimizer uses this value

reinforce:

Expand Down
2 changes: 1 addition & 1 deletion examples/nlp/gpt/conf/gpt_rs_actor.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ trainer:
devices: 8
accelerator: gpu
precision: bf16
gradient_clip_val: 0.0 # No need to change. Megatron Core optimizer uses this value
gradient_clip_val: ${trainer.rs.gradient_clip_val} # No need to change. Megatron Core optimizer uses this value

rs:
max_epochs: 1
Expand Down
2 changes: 1 addition & 1 deletion examples/nlp/gpt/conf/gpt_sft.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ trainer:
devices: 1
accelerator: gpu
precision: bf16
gradient_clip_val: 0.0 # No need to change. Megatron Core optimizer uses this value
gradient_clip_val: ${trainer.sft.gradient_clip_val} # No need to change. Megatron Core optimizer uses this value

sft:
max_epochs: 1
Expand Down
2 changes: 1 addition & 1 deletion examples/nlp/gpt/conf/gpt_spin.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ trainer:
devices: 8
accelerator: gpu
precision: bf16-mixed
gradient_clip_val: 0.0 # No need to change. Megatron Core optimizer uses this value
gradient_clip_val: ${trainer.spin.gradient_clip_val} # No need to change. Megatron Core optimizer uses this value

# spin specific args
spin:
Expand Down
2 changes: 1 addition & 1 deletion examples/nlp/gpt/conf/training_rm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ trainer:
devices: 8
accelerator: gpu
precision: bf16
gradient_clip_val: 0.0 # No need to change. Megatron Core optimizer uses this value
gradient_clip_val: ${trainer.rm.gradient_clip_val} # No need to change. Megatron Core optimizer uses this value

# rm specific args
rm:
Expand Down

0 comments on commit 149f4aa

Please sign in to comment.