Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/main' into polynet
Browse files Browse the repository at this point in the history
  • Loading branch information
ahottung committed Jun 3, 2024
2 parents 061aa21 + 6b28168 commit 3785590
Show file tree
Hide file tree
Showing 66 changed files with 2,988 additions and 498 deletions.
2 changes: 1 addition & 1 deletion configs/env/default.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@ name: tsp

generator_params:
num_loc: 20
loc_distribution: uniform
loc_distribution: uniform
File renamed without changes.
13 changes: 13 additions & 0 deletions configs/env/fjsp/15j-10m.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
_target_: rl4co.envs.FJSPEnv
name: fjsp

generator_params:
num_jobs: 15
num_machines: 10
min_ops_per_job: 8
max_ops_per_job: 12
min_processing_time: 1
max_processing_time: 20
min_eligible_ma_per_op: 1

data_dir: ${paths.root_dir}/data/fjsp
13 changes: 13 additions & 0 deletions configs/env/fjsp/20j-10m.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
_target_: rl4co.envs.FJSPEnv
name: fjsp

generator_params:
num_jobs: 20
num_machines: 10
min_ops_per_job: 8
max_ops_per_job: 12
min_processing_time: 1
max_processing_time: 20
min_eligible_ma_per_op: 1

data_dir: ${paths.root_dir}/data/fjsp
13 changes: 13 additions & 0 deletions configs/env/fjsp/20j-5m.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
_target_: rl4co.envs.FJSPEnv
name: fjsp

generator_params:
num_jobs: 20
num_machines: 5
min_ops_per_job: 4
max_ops_per_job: 6
min_processing_time: 1
max_processing_time: 20
min_eligible_ma_per_op: 1

data_dir: ${paths.root_dir}/data/fjsp
11 changes: 11 additions & 0 deletions configs/env/jssp/10j-10m.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
_target_: rl4co.envs.JSSPEnv
name: jssp

generator_params:
num_jobs: 10
num_machines: 10
min_processing_time: 1
max_processing_time: 99

data_dir: ${paths.root_dir}/data/jssp/taillard
test_file: ${env.generator_params.num_jobs}j_${env.generator_params.num_machines}m
11 changes: 11 additions & 0 deletions configs/env/jssp/15j-15m.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
_target_: rl4co.envs.JSSPEnv
name: jssp

generator_params:
num_jobs: 15
num_machines: 15
min_processing_time: 1
max_processing_time: 99

data_dir: ${paths.root_dir}/data/jssp/taillard
test_file: ${env.generator_params.num_jobs}j_${env.generator_params.num_machines}m
11 changes: 11 additions & 0 deletions configs/env/jssp/20j-20m.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
_target_: rl4co.envs.JSSPEnv
name: jssp

generator_params:
num_jobs: 20
num_machines: 20
min_processing_time: 1
max_processing_time: 99

data_dir: ${paths.root_dir}/data/jssp/taillard
test_file: ${env.generator_params.num_jobs}j_${env.generator_params.num_machines}m
11 changes: 11 additions & 0 deletions configs/env/jssp/6j-6m.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
_target_: rl4co.envs.JSSPEnv
name: jssp

generator_params:
num_jobs: 6
num_machines: 6
min_processing_time: 1
max_processing_time: 99

data_dir: ${paths.root_dir}/data/jssp/taillard
test_file: ${env.generator_params.num_jobs}j_${env.generator_params.num_machines}m
57 changes: 57 additions & 0 deletions configs/experiment/routing/tsp-stepwise-ppo.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# @package _global_

defaults:
- override /model: l2d.yaml
- override /callbacks: default.yaml
- override /trainer: default.yaml
- override /logger: wandb.yaml

env:
_target_: rl4co.envs.TSPEnv4PPO
generator_params:
num_loc: 20

logger:
wandb:
project: "rl4co"
tags: ["am-stepwise-ppo", "${env.name}"]
group: ${env.name}${env.generator_params.num_loc}
name: ppo-${env.name}${env.generator_params.num_loc}

trainer:
max_epochs: 10
precision: 32-true

embed_dim: 256
num_heads: 8
model:
_target_: rl4co.models.StepwisePPO
policy:
_target_: rl4co.models.L2DPolicy4PPO
decoder:
_target_: rl4co.models.zoo.l2d.decoder.L2DDecoder
env_name: ${env.name}
embed_dim: ${embed_dim}
feature_extractor:
_target_: rl4co.models.zoo.am.encoder.AttentionModelEncoder
embed_dim: ${embed_dim}
num_heads: ${num_heads}
num_layers: 4
normalization: "batch"
env_name: "tsp"
actor:
_target_: rl4co.models.zoo.l2d.decoder.AttnActor
embed_dim: ${embed_dim}
num_heads: ${num_heads}
env_name: ${env.name}
embed_dim: ${embed_dim}
env_name: ${env.name}
het_emb: False
batch_size: 512
mini_batch_size: 512
train_data_size: 20000
val_data_size: 1_000
test_data_size: 1_000
reward_scale: scale
optimizer_kwargs:
lr: 1e-4
23 changes: 23 additions & 0 deletions configs/experiment/scheduling/am-pomo.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# @package _global_

defaults:
- scheduling/base

logger:
wandb:
tags: ["am-pomo", "${env.name}"]
name: "am-pomo-${env.name}-${env.generator_params.num_jobs}j-${env.generator_params.num_machines}m"

model:
_target_: rl4co.models.POMO
policy:
_target_: rl4co.models.L2DAttnPolicy
env_name: ${env.name}
scaling_factor: ${scaling_factor}
batch_size: 64
num_starts: 10
num_augment: 0
baseline: "shared"
metrics:
val: ["reward", "max_reward"]
test: ${model.metrics.val}
56 changes: 56 additions & 0 deletions configs/experiment/scheduling/am-ppo.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# @package _global_

defaults:
- scheduling/base

logger:
wandb:
tags: ["am-ppo", "${env.name}"]
name: "am-ppo-${env.name}-${env.generator_params.num_jobs}j-${env.generator_params.num_machines}m"

embed_dim: 256
num_heads: 8

model:
_target_: rl4co.models.StepwisePPO
policy:
_target_: rl4co.models.L2DPolicy4PPO
decoder:
_target_: rl4co.models.zoo.l2d.decoder.L2DDecoder
env_name: ${env.name}
embed_dim: ${embed_dim}
feature_extractor:
_target_: rl4co.models.zoo.matnet.matnet_w_sa.Encoder
embed_dim: ${embed_dim}
num_heads: ${num_heads}
num_layers: 4
normalization: "batch"
init_embedding:
_target_: rl4co.models.nn.env_embeddings.init.FJSPMatNetInitEmbedding
embed_dim: ${embed_dim}
scaling_factor: ${scaling_factor}
actor:
_target_: rl4co.models.zoo.l2d.decoder.L2DAttnActor
embed_dim: ${embed_dim}
num_heads: ${num_heads}
env_name: ${env.name}
scaling_factor: ${scaling_factor}
stepwise: True
env_name: ${env.name}
embed_dim: ${embed_dim}
scaling_factor: ${scaling_factor}
het_emb: True
batch_size: 128
val_batch_size: 512
test_batch_size: 64
# Song et al use 1000 iterations over batches of 20 = 20_000
# We train 10 epochs on a set of 2000 instance = 20_000
train_data_size: 2000
mini_batch_size: 512
reward_scale: scale
optimizer_kwargs:
lr: 1e-4

env:
stepwise_reward: True
_torchrl_mode: True
38 changes: 38 additions & 0 deletions configs/experiment/scheduling/base.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# @package _global_

defaults:
- override /model: l2d.yaml
- override /callbacks: default.yaml
- override /trainer: default.yaml
- override /logger: wandb.yaml

logger:
wandb:
project: "rl4co"
log_model: "all"
group: "${env.name}-${env.generator_params.num_jobs}-${env.generator_params.num_machines}"
tags: ???
name: ???

trainer:
max_epochs: 10
# NOTE for some reason l2d is extremely sensitive to precision
# ONLY USE 32-true for l2d!
precision: 32-true

seed: 12345678

scaling_factor: 20

model:
_target_: ???
batch_size: ???
train_data_size: 2_000
val_data_size: 1_000
test_data_size: 1_000
optimizer_kwargs:
lr: 1e-4
weight_decay: 1e-6
lr_scheduler: "ExponentialLR"
lr_scheduler_kwargs:
gamma: 0.95
35 changes: 35 additions & 0 deletions configs/experiment/scheduling/gnn-ppo.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# @package _global_

defaults:
- scheduling/base

logger:
wandb:
tags: ["gnn-ppo", "${env.name}"]
name: "gnn-ppo-${env.name}-${env.generator_params.num_jobs}j-${env.generator_params.num_machines}m"

# params from Song et al.
model:
_target_: rl4co.models.L2DPPOModel
policy_kwargs:
embed_dim: 128
num_encoder_layers: 3
scaling_factor: ${scaling_factor}
max_grad_norm: 1
ppo_epochs: 3
het_emb: False
batch_size: 128
val_batch_size: 512
test_batch_size: 64
mini_batch_size: 512
reward_scale: scale
optimizer_kwargs:
lr: 1e-4

trainer:
max_epochs: 10


env:
stepwise_reward: True
_torchrl_mode: True
27 changes: 27 additions & 0 deletions configs/experiment/scheduling/hgnn-pomo.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# @package _global_

defaults:
- scheduling/base

logger:
wandb:
tags: ["hgnn-pomo", "${env.name}"]
name: "hgnn-pomo-${env.name}-${env.generator_params.num_jobs}j-${env.generator_params.num_machines}m"

model:
_target_: rl4co.models.POMO
policy:
_target_: rl4co.models.L2DPolicy
env_name: ${env.name}
embed_dim: 256
num_encoder_layers: 3
stepwise_encoding: False
scaling_factor: ${scaling_factor}
het_emb: True
num_starts: 10
batch_size: 64
num_augment: 0
baseline: "shared"
metrics:
val: ["reward", "max_reward"]
test: ${model.metrics.val}
35 changes: 35 additions & 0 deletions configs/experiment/scheduling/hgnn-ppo.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# @package _global_

defaults:
- scheduling/base

logger:
wandb:
tags: ["hgnn-ppo", "${env.name}"]
name: "hgnn-ppo-${env.name}-${env.generator_params.num_jobs}j-${env.generator_params.num_machines}m"

# params from Song et al.
model:
_target_: rl4co.models.L2DPPOModel
policy_kwargs:
embed_dim: 128
num_encoder_layers: 3
scaling_factor: ${scaling_factor}
max_grad_norm: 1
ppo_epochs: 3
het_emb: True
batch_size: 128
val_batch_size: 512
test_batch_size: 64
mini_batch_size: 512
reward_scale: scale
optimizer_kwargs:
lr: 1e-4

trainer:
max_epochs: 10


env:
stepwise_reward: True
_torchrl_mode: True
Loading

0 comments on commit 3785590

Please sign in to comment.