Skip to content

Commit

Permalink
Fix trailing spaces in PPO actor config
Browse files Browse the repository at this point in the history
Signed-off-by: Olivier Delalleau <[email protected]>
  • Loading branch information
odelalleau committed Jan 10, 2024
1 parent ada1fb2 commit 916260f
Showing 1 changed file with 6 additions and 6 deletions.
12 changes: 6 additions & 6 deletions examples/nlp/gpt/conf/gpt_ppo_actor.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ trainer:

ppo:
max_steps: -1 # max PPO steps (-1 to go through the whole train set)
val_check_interval: 10
val_check_interval: 10
save_interval: ${.val_check_interval}
gradient_clip_val: 1.0

Expand Down Expand Up @@ -49,15 +49,15 @@ remote_critic_rm:
# must match the same flag in the critic config
combine_rm_and_critic_server: True

# reward model server, specify if
# reward model server, specify if
# combine_rm_and_critic server is False
reward_model:
name: reward_model
ip: localhost
port: 5555

critic:
name:
name:
train: critic_train
infer: critic_infer
save: critic_save
Expand Down Expand Up @@ -146,15 +146,15 @@ model:

# miscellaneous
seed: 1234

optim:
name: distributed_fused_adam
bucket_cap_mb: 200
overlap_grad_sync: False
contiguous_grad_buffer: True
lr: 9e-7
weight_decay: 0.1
betas:
weight_decay: 0.1
betas:
- 0.9
- 0.98
sched:
Expand Down

0 comments on commit 916260f

Please sign in to comment.