-
Notifications
You must be signed in to change notification settings - Fork 10
/
render_test.py
84 lines (68 loc) · 3.34 KB
/
render_test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
from __future__ import division
from setproctitle import setproctitle as ptitle
import os
import time
import torch
import logging
import numpy as np
import argparse
from tensorboardX import SummaryWriter
from model import build_model
from utils import setup_logger
from player_util import Agent
from environment import create_env
parser = argparse.ArgumentParser(description='render')
parser.add_argument('--seed', type=int, default=1, metavar='S', help='random seed (default: 1)')
parser.add_argument('--test-eps', type=int, default=5, metavar='M', help='testing episode length')
parser.add_argument('--env', default='simple', metavar='Pose-v0', help='environment to train on (default: Pose-v0|Pose-v1)')
parser.add_argument('--load-model-dir', default=None, help='folder to load trained high-level models from')
parser.add_argument('--load-executor-dir', default=None, help='folder to load trained low-level models from')
parser.add_argument('--env-steps', type=int, default=20, help='env steps')
parser.add_argument('--model', default='single', metavar='M', help='multi-shapleyV|')
parser.add_argument('--lstm-out', type=int, default=32, metavar='LO', help='lstm output size')
parser.add_argument('--mask', dest='mask', action='store_true', help='mask ToM and communication to those out of range')
parser.add_argument('--mask-actions', dest='mask_actions', action='store_true', help='mask unavailable actions to boost training')
parser.add_argument('--gpu-id', type=int, default=-1, nargs='+', help='GPUs to use [-1 CPU only] (default: -1)')
parser.add_argument('--render', dest='render', action='store_true', help='render test')
parser.add_argument('--render_save', dest='render_save', action='store_true', help='render save')
parser.add_argument('--num-agents', type=int, default=-1) # if -1, then the env will load the default setting
parser.add_argument('--num-targets', type=int, default=-1) # else, you can assign the number of agents and targets yourself
def render_test(args):
gpu_id = args.gpu_id
torch.manual_seed(args.seed)
if gpu_id >= 0:
torch.cuda.manual_seed(args.seed)
device = torch.device('cuda:' + str(gpu_id))
else:
device = torch.device('cpu')
env = create_env(args.env, args)
env.seed(args.seed)
player = Agent(None, env, args, None, device)
player.gpu_id = gpu_id
player.model = build_model(player.env, args, device).to(device)
player.model.eval()
saved_state = torch.load(args.load_model_dir)
player.model.load_state_dict(saved_state['model'],strict=False)
ave_reward_list = []
comm_cnt_list = []
comm_bit_list = []
for i_episode in range(args.test_eps):
player.reset()
comm_cnt = 0
comm_bit = 0
reward_sum_ep = 0
print(f"Episode:{i_episode}")
for i_step in range(args.env_steps):
player.action_test()
comm_cnt += player.comm_cnt
comm_bit += player.comm_bit
reward_sum_ep += player.reward
comm_cnt_list.append(comm_cnt/env.max_steps)
comm_bit_list.append(comm_bit/env.max_steps)
print('reward step',reward_sum_ep[0]/args.env_steps)
print('comm_edge', comm_cnt.data/args.env_steps)
print('comm_bandwidth', comm_bit.data/args.env_steps)
# print(comm_bit_list)
if __name__ == '__main__':
args = parser.parse_args()
render_test(args)