-
Notifications
You must be signed in to change notification settings - Fork 2
/
env_driver.py
100 lines (87 loc) · 3.35 KB
/
env_driver.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
from mlagents_envs.environment import UnityEnvironment
from mlagents_envs.side_channel.engine_configuration_channel import (
EngineConfigurationChannel,
)
from mlagents_envs.exception import (
UnityEnvironmentException,
UnityCommunicationException,
UnityCommunicatorStoppedException,
)
import numpy as np
# Name of the Unity environment binary to launch
ENV_NAME = "./rl_env_binary/Windows_build/Learning-Agents--r1"
engine_config_channel = EngineConfigurationChannel()
engine_config_channel.set_configuration_parameters(
width=1800, height=900, time_scale=1.0
)
env = UnityEnvironment(
file_name=ENV_NAME, seed=1, side_channels=[engine_config_channel]
)
env.reset() # Reset the environment
# Set the default brain to work with
behavior_name = env.get_behavior_names()[0]
behavior_spec = env.get_behavior_spec(behavior_name)
n_actions = behavior_spec.action_size # => 2
state_dims = np.sum(behavior_spec.observation_shapes) # total obs => 54
# --- Env Spec ---
if behavior_spec.is_action_continuous():
print("Action space is CONTINUOUS i.e {0, 0.1, 0.2}")
else:
print("Action space is DISCRETE i.e {0, 1, 2}")
print(behavior_spec.discrete_action_branches)
print("\nbehavior_spec.observation_shapes :: ", end="")
print(behavior_spec.observation_shapes) # => [(52,), (2,)]
# ----------------------------------------------------------------------
# Get the state/obs of an agent
step_result = env.get_steps(behavior_name) # shape(2,)
# Examine the state space for the first observation for the first agent
print("\nAgent observation: \n{}\n".format(step_result[0].obs))
# => [shape(1, 52), shape(1, 2)]
# There are 2 obs vectors (Ray cast vals & velocity vals)
for obs in step_result[0].obs:
print(obs.shape)
print(step_result[0].__dict__)
print(step_result[1].__dict__) # data filled at the end of episode.
try:
for episode in range(10): # running for 10 episodes.
print("Starting with a new episode...\n\n")
env.reset()
step_result = env.get_steps(behavior_name)
done = False
episode_rewards = 0
end_episode_rewards = 0
# i = 0;
while not done: # running for 1 episode i.e 5000 max_steps
n_agents = len(step_result[0])
# if behavior_spec.is_action_continuous():
action = np.random.randn(n_agents, n_actions)
action = np.clip(action, -1, 1)
print(action)
env.set_actions(behavior_name, action)
env.step()
step_result = env.get_steps(behavior_name)
episode_rewards += step_result[0].reward[0]
end_episode_rewards += (
step_result[1].reward[0] if len(step_result[1]) else 0
)
done = step_result[1].max_step[0] if len(step_result[1]) else False
# i += 1
print(
"\n\nTotal reward in this episode: {} :: {}".format(
episode_rewards, end_episode_rewards
)
)
# print(i) # will give 1000 as o/p when 5000 max_step is hit (i.e after 5 step a decision is asked).
except (
KeyboardInterrupt,
UnityCommunicationException,
UnityEnvironmentException,
UnityCommunicatorStoppedException,
) as ex:
print("-" * 100)
print("Exception has occured !!")
print("Testing of env was interrupted.")
print("-" * 100)
finally:
print("Closing the env")
env.close()