diff --git a/examples/nlp/gpt/conf/gpt_ppo_actor.yaml b/examples/nlp/gpt/conf/gpt_ppo_actor.yaml index 4b6439d6f..68edc27ce 100644 --- a/examples/nlp/gpt/conf/gpt_ppo_actor.yaml +++ b/examples/nlp/gpt/conf/gpt_ppo_actor.yaml @@ -10,7 +10,7 @@ trainer: ppo: max_steps: -1 # max PPO steps (-1 to go through the whole train set) - val_check_interval: 10 + val_check_interval: 10 save_interval: ${.val_check_interval} gradient_clip_val: 1.0 @@ -49,7 +49,7 @@ remote_critic_rm: # must match the same flag in the critic config combine_rm_and_critic_server: True - # reward model server, specify if + # reward model server, specify if # combine_rm_and_critic server is False reward_model: name: reward_model @@ -57,7 +57,7 @@ remote_critic_rm: port: 5555 critic: - name: + name: train: critic_train infer: critic_infer save: critic_save @@ -146,15 +146,15 @@ model: # miscellaneous seed: 1234 - + optim: name: distributed_fused_adam bucket_cap_mb: 200 overlap_grad_sync: False contiguous_grad_buffer: True lr: 9e-7 - weight_decay: 0.1 - betas: + weight_decay: 0.1 + betas: - 0.9 - 0.98 sched: