apply feedback of referencing trainer's pre-existing clip val

Signed-off-by: Terry Kong <[email protected]>
NVIDIA · Jan 21, 2025 · 149f4aa · 149f4aa
1 parent 2a699b7
commit 149f4aa
Show file tree

Hide file tree

Showing 10 changed files with 10 additions and 8 deletions.
diff --git a/examples/nlp/gpt/conf/gpt_dpo.yaml b/examples/nlp/gpt/conf/gpt_dpo.yaml
@@ -6,7 +6,7 @@ trainer:
   devices: 8
   accelerator: gpu
   precision: bf16
-  gradient_clip_val: 0.0  # No need to change. Megatron Core optimizer uses this value
+  gradient_clip_val: ${trainer.dpo.gradient_clip_val}  # No need to change. Megatron Core optimizer uses this value
 
   # dpo specific args
   dpo:

diff --git a/examples/nlp/gpt/conf/gpt_knowledge_distillation.yaml b/examples/nlp/gpt/conf/gpt_knowledge_distillation.yaml
@@ -5,6 +5,7 @@ trainer:
   devices: 1
   accelerator: gpu
   precision: bf16
+  gradient_clip_val: ${trainer.knowledge_distillation.gradient_clip_val}  # No need to change. Megatron Core optimizer uses this value
 
   knowledge_distillation:
     max_epochs: 1

diff --git a/examples/nlp/gpt/conf/gpt_kto.yaml b/examples/nlp/gpt/conf/gpt_kto.yaml
@@ -6,7 +6,7 @@ trainer:
   devices: 8
   accelerator: gpu
   precision: bf16
-  gradient_clip_val: 0.0  # No need to change. Megatron Core optimizer uses this value
+  gradient_clip_val: ${trainer.kto.gradient_clip_val}  # No need to change. Megatron Core optimizer uses this value
 
   # kto specific args
   kto:

diff --git a/examples/nlp/gpt/conf/gpt_ppo_actor.yaml b/examples/nlp/gpt/conf/gpt_ppo_actor.yaml
@@ -7,7 +7,7 @@ trainer:
   devices: 8
   accelerator: gpu
   precision: bf16
-  gradient_clip_val: 0.0  # No need to change. Megatron Core optimizer uses this value
+  gradient_clip_val: ${trainer.ppo.gradient_clip_val}  # No need to change. Megatron Core optimizer uses this value
 
   ppo:
     # How many steps we train warmup the critic for (without training the policy)

diff --git a/examples/nlp/gpt/conf/gpt_ppo_critic.yaml b/examples/nlp/gpt/conf/gpt_ppo_critic.yaml
@@ -6,7 +6,7 @@ trainer:
   devices: 8
   accelerator: gpu
   precision: bf16
-  gradient_clip_val: 0.0  # No need to change. Megatron Core optimizer uses this value
+  gradient_clip_val: ${trainer.ppo.gradient_clip_val}  # No need to change. Megatron Core optimizer uses this value
 
   ppo:
     port: 5556

diff --git a/examples/nlp/gpt/conf/gpt_reinforce_actor.yaml b/examples/nlp/gpt/conf/gpt_reinforce_actor.yaml
@@ -7,6 +7,7 @@ trainer:
   devices: 8
   accelerator: gpu
   precision: bf16
+  gradient_clip_val: ${trainer.reinforce.gradient_clip_val}  # No need to change. Megatron Core optimizer uses this value
 
   reinforce:
 

diff --git a/examples/nlp/gpt/conf/gpt_rs_actor.yaml b/examples/nlp/gpt/conf/gpt_rs_actor.yaml
@@ -7,7 +7,7 @@ trainer:
   devices: 8
   accelerator: gpu
   precision: bf16
-  gradient_clip_val: 0.0  # No need to change. Megatron Core optimizer uses this value
+  gradient_clip_val: ${trainer.rs.gradient_clip_val}  # No need to change. Megatron Core optimizer uses this value
 
   rs:
     max_epochs: 1

diff --git a/examples/nlp/gpt/conf/gpt_sft.yaml b/examples/nlp/gpt/conf/gpt_sft.yaml
@@ -5,7 +5,7 @@ trainer:
   devices: 1
   accelerator: gpu
   precision: bf16
-  gradient_clip_val: 0.0  # No need to change. Megatron Core optimizer uses this value
+  gradient_clip_val: ${trainer.sft.gradient_clip_val}  # No need to change. Megatron Core optimizer uses this value
 
   sft:
     max_epochs: 1

diff --git a/examples/nlp/gpt/conf/gpt_spin.yaml b/examples/nlp/gpt/conf/gpt_spin.yaml
@@ -6,7 +6,7 @@ trainer:
   devices: 8
   accelerator: gpu
   precision: bf16-mixed
-  gradient_clip_val: 0.0  # No need to change. Megatron Core optimizer uses this value
+  gradient_clip_val: ${trainer.spin.gradient_clip_val}  # No need to change. Megatron Core optimizer uses this value
 
   # spin specific args
   spin:

diff --git a/examples/nlp/gpt/conf/training_rm.yaml b/examples/nlp/gpt/conf/training_rm.yaml
@@ -6,7 +6,7 @@ trainer:
   devices: 8
   accelerator: gpu
   precision: bf16
-  gradient_clip_val: 0.0  # No need to change. Megatron Core optimizer uses this value
+  gradient_clip_val: ${trainer.rm.gradient_clip_val}  # No need to change. Megatron Core optimizer uses this value
 
   # rm specific args
   rm: