Skip to content

Commit

Permalink
Update wandb & WIP effective action space (#15)
Browse files Browse the repository at this point in the history
* Update: wandb (run_bc_from_scratch, run_sb3_ppo)

---------

Co-authored-by: KevinJeon <[email protected]>
  • Loading branch information
KKGB and KevinJeon committed Oct 22, 2024
1 parent c20a363 commit e03d5c2
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 66 deletions.
19 changes: 2 additions & 17 deletions baselines/ippo/run_sb3_ppo.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,23 +34,8 @@ def func(progress_remaining: float) -> float:
return func


def train(exp_config: ExperimentConfig, scene_config: SceneConfig, action_type: str = "discrete"):
def train(env_config: EnvConfig, exp_config: ExperimentConfig, scene_config: SceneConfig, action_type: str = "discrete"):
"""Run PPO training with stable-baselines3."""

# CONFIG
env_config = EnvConfig(
dynamics_model="classic",
dx=torch.round(
torch.linspace(-6.0, 6.0, 20), decimals=3
),
dy=torch.round(
torch.linspace(-6.0, 6.0, 20), decimals=3
),
dyaw=torch.round(
torch.linspace(-np.pi, np.pi, 20), decimals=3
),
)

# MAKE SB3-COMPATIBLE ENVIRONMENT
env = SB3MultiAgentEnv(
config=env_config,
Expand Down Expand Up @@ -148,4 +133,4 @@ def train(exp_config: ExperimentConfig, scene_config: SceneConfig, action_type:
k_unique_scenes=exp_config.k_unique_scenes,
)

train(exp_config, scene_config, action_type="discrete")
train(env_config, exp_config, scene_config, action_type="discrete")
110 changes: 61 additions & 49 deletions pygpudrive/env/action_bounds.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,65 +3,70 @@
import os
import pickle
import re
from bisect import bisect_right

from pygpudrive.env.config import EnvConfig, SceneConfig, SelectionDiscipline
from pygpudrive.env.env_torch import GPUDriveTorchEnv

def update_bounds(scene_config, expert_path):
def set_effective_action_space(env_config, scene_config, expert_path, std_factor=1.0):
"""Set the effective action space based on the expert demonstrations."""
num_scenes = scene_config.num_scenes
discipline = scene_config.discipline

max_bound = []
min_bound = []

# find start index
if discipline == SelectionDiscipline.RANGE_N:
start_idx = scene_config.start_idx
elif discipline == SelectionDiscipline.RANDOM_N:
raise NotImplementedError("Random discipline is not supported.")
else:
start_idx = 0

# find max and min bounds in num_scenes
for num_scene in range(num_scenes):
file_idx = start_idx + num_scene
if num_scenes == 1 and discipline == SelectionDiscipline.RANGE_N:
idx = scene_config.start_idx

# sort expert files by index
expert_list = os.listdir(expert_path)
expert_list = sorted(expert_list, key=lambda x: int(re.search(r'\d+', x).group()))

# find the expert file that contains the scene
for i in range(len(expert_list) - 1):
first_file_idx = int(expert_list[i].split('_')[-1].split('.')[0])
second_file_idx = int(expert_list[i+1].split('_')[-1].split('.')[0])
if first_file_idx <= file_idx < second_file_idx:
local_file_num = file_idx - first_file_idx

# pickle load
with open(os.path.join(expert_path, expert_list[i]), 'rb') as f:
expert_list = pickle.load(f)
expert_scene = expert_list[local_file_num]
max_bound.append(expert_scene.max(dim=0).values)
min_bound.append(expert_scene.min(dim=0).values)
break

max_delta = torch.stack(max_bound).mean(dim=0)
min_delta = torch.stack(min_bound).mean(dim=0)
return max_delta, min_delta
file_indices = [int(file.split('_')[-1].split('.')[0]) for file in expert_list]
file_pos = bisect_right(file_indices, idx) - 1
pickle_idx = file_indices[file_pos]
local_idx = idx - pickle_idx

with open(os.path.join(expert_path, expert_list[file_pos]), 'rb') as f:
expert_data = pickle.load(f)
expert_actions = expert_data[local_idx]
means = expert_actions.mean(dim=0).tolist()
stds = expert_actions.std(dim=0).tolist()

config = EnvConfig(
dynamics_model=env_config.dynamics_model,
dx=torch.round(
torch.linspace(
means[0] - std_factor * stds[0],
means[0] + std_factor * stds[0], 100),
decimals=3),
dy=torch.round(
torch.linspace(
means[1] - std_factor * stds[1],
means[1] + std_factor * stds[1], 100),
decimals=3),
dyaw=torch.round(
torch.linspace(
means[2] - std_factor * stds[2],
means[2] + std_factor * stds[2], 300),
decimals=3)
)
return config

else:
return env_config


if __name__ == "__main__":
import argparse
from algorithms.il.data_generation import generate_state_action_pairs
parser = argparse.ArgumentParser('Select the dynamics model that you use')
parser.add_argument('--dynamics-model', '-dm', type=str, default='delta_local', choices=['delta_local', 'bicycle', 'classic'],)
parser.add_argument('--device', '-d', type=str, default='cuda', choices=['cpu', 'cuda'],)
parser.add_argument('--device', '-d', type=str, default='cpu', choices=['cpu', 'cuda'],)
parser.add_argument('--load-path', '-sp', type=str, default='/data/train_actions_pickles')
parser.add_argument('--num_worlds', type=int, default=1)
parser.add_argument('--start_idx', type=int, default=101)
parser.add_argument('--start_idx', type=int, default=0)
args = parser.parse_args()

torch.set_printoptions(precision=3, sci_mode=False)
NUM_WORLDS = args.num_worlds
NUM_WORLDS = 1
MAX_NUM_OBJECTS = 128

# Initialize configurations
Expand All @@ -70,21 +75,10 @@ def update_bounds(scene_config, expert_path):
start_idx=args.start_idx,
discipline=SelectionDiscipline.RANGE_N)

max_delta, min_delta = update_bounds(scene_config, args.load_path)

print(f"max dx (mean per {args.num_worlds} scenes) : ",max_delta[0].item())
print(f"min dx (mean per {args.num_worlds} scenes) : ",min_delta[0].item())
print(f"max dy (mean per {args.num_worlds} scenes) : ",max_delta[1].item())
print(f"min dy (mean per {args.num_worlds} scenes) : ",min_delta[1].item())
print(f"max dyaw (mean per {args.num_worlds} scenes) : ",max_delta[2].item())
print(f"min dyaw (mean per {args.num_worlds} scenes) : ",min_delta[2].item())

env_config = EnvConfig(
dynamics_model=args.dynamics_model,
dx=torch.round(torch.linspace(min_delta[0].item(), max_delta[0].item(), 100), decimals=3),
dy=torch.round(torch.linspace(min_delta[1].item(), max_delta[1].item(), 100), decimals=3),
dyaw=torch.round(torch.linspace(min_delta[2].item(), max_delta[2].item(), 300), decimals=3),
)
env_config = set_effective_action_space(env_config, scene_config, args.load_path, 1.0)

# Initialize environment
env = GPUDriveTorchEnv(
Expand All @@ -96,6 +90,24 @@ def update_bounds(scene_config, expert_path):
num_stack=5
)

# Generate expert actions and observations
(
expert_obs,
expert_actions,
next_expert_obs,
expert_dones,
goal_rate,
collision_rate
) = generate_state_action_pairs(
env=env,
use_action_indices=False, # Map action values to joint action index
make_video=True, # Record the trajectories as sanity check
render_index=[0, NUM_WORLDS], #start_idx, end_idx
save_path="./",
debug_world_idx=0,
debug_veh_idx=0,
)

env.close()
del env
del env_config
Expand Down

0 comments on commit e03d5c2

Please sign in to comment.