Skip to content

Commit

Permalink
implementing some tests and some bugfix for MaskedEnvironment
Browse files Browse the repository at this point in the history
  • Loading branch information
BDonnot committed Jan 11, 2024
1 parent 3c5196f commit f1310c5
Show file tree
Hide file tree
Showing 3 changed files with 153 additions and 152 deletions.
21 changes: 15 additions & 6 deletions grid2op/Environment/maskedEnvironment.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,14 @@ class MaskedEnvironment(Environment): # TODO heritage ou alors on met un truc d
.. warning::
At time of writing, the behaviour of "obs.simulate" is not modified
"""
CAN_SKIP_TS = False # some steps can be more than one time steps
# some kind of infinity value
# NB we multiply np.finfo(dt_float).max by a small number (1e-7) to avoid overflow
# indeed, _hard_overflow_threshold is multiply by the flow on the lines
INF_VAL_THM_LIM = 1e-7 * np.finfo(dt_float).max

# some kind of infinity value
INF_VAL_TS_OVERFLOW_ALLOW = np.iinfo(dt_int).max - 1

def __init__(self,
grid2op_env: Union[Environment, dict],
lines_of_interest):
Expand All @@ -38,7 +45,7 @@ def __init__(self,
elif isinstance(grid2op_env, dict):
super().__init__(**grid2op_env)
else:
raise EnvError(f"For TimedOutEnvironment you need to provide "
raise EnvError(f"For MaskedEnvironment you need to provide "
f"either an Environment or a dict "
f"for grid2op_env. You provided: {type(grid2op_env)}")

Expand All @@ -62,10 +69,8 @@ def _make_lines_of_interest(self, lines_of_interest):

def _reset_vectors_and_timings(self):
super()._reset_vectors_and_timings()
self._hard_overflow_threshold[~self._lines_of_interest] = 1e-7 * np.finfo(dt_float).max # some kind of infinity value
# NB we multiply np.finfo(dt_float).max by a small number to avoid overflow
# indeed, _hard_overflow_threshold is multiply by the flow on the lines
self._nb_timestep_overflow_allowed[~self._lines_of_interest] = np.iinfo(dt_int).max - 1 # some kind of infinity value
self._hard_overflow_threshold[~self._lines_of_interest] = type(self).INF_VAL_THM_LIM
self._nb_timestep_overflow_allowed[~self._lines_of_interest] = type(self).INF_VAL_TS_OVERFLOW_ALLOW

def get_kwargs(self, with_backend=True, with_chronics_handler=True):
res = {}
Expand All @@ -79,6 +84,10 @@ def get_params_for_runner(self):
res["other_env_kwargs"] = {"lines_of_interest": copy.deepcopy(self._lines_of_interest)}
return res

def _custom_deepcopy_for_copy(self, new_obj):
super()._custom_deepcopy_for_copy(new_obj)
new_obj._lines_of_interest = copy.deepcopy(self._lines_of_interest)

@classmethod
def init_obj_from_kwargs(cls,
other_env_kwargs,
Expand Down
1 change: 1 addition & 0 deletions grid2op/Runner/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -1137,6 +1137,7 @@ def run(
returned list are not necessarily sorted by this value)
- "cum_reward" the cumulative reward obtained by the :attr:`Runner.Agent` on this episode i
- "nb_time_step": the number of time steps played in this episode.
- "total_step": the total number of time steps possible in this episode.
- "episode_data" : [Optional] The :class:`EpisodeData` corresponding to this episode run only
if `add_detailed_output=True`
- "add_nb_highres_sim": [Optional] The estimated number of calls to high resolution simulator made
Expand Down
283 changes: 137 additions & 146 deletions grid2op/tests/test_MaskedEnvironment.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,9 @@
MultiDiscreteActSpace)


class TestMaskedEnvironment(unittest.TestCase):
def get_mask(self):
class TestMaskedEnvironment(unittest.TestCase):
@staticmethod
def get_mask():
mask = np.full(20, fill_value=False, dtype=bool)
mask[[0, 1, 4, 2, 3, 6, 5]] = True # THT part
return mask
Expand All @@ -30,9 +31,9 @@ def setUp(self) -> None:
with warnings.catch_warnings():
warnings.filterwarnings("ignore")
self.env_in = MaskedEnvironment(grid2op.make("l2rpn_case14_sandbox", test=True, _add_to_name=type(self).__name__),
lines_of_interest=self.get_mask())
lines_of_interest=TestMaskedEnvironment.get_mask())
self.env_out = MaskedEnvironment(grid2op.make("l2rpn_case14_sandbox", test=True, _add_to_name=type(self).__name__),
lines_of_interest=~self.get_mask())
lines_of_interest=~TestMaskedEnvironment.get_mask())
self.line_id = 3
th_lim = self.env_in.get_thermal_limit() * 2. # avoid all problem in general
th_lim[self.line_id] /= 10. # make sure to get trouble in line 3
Expand All @@ -41,10 +42,11 @@ def setUp(self) -> None:
# env_out: line is out of the area
self.env_out.set_thermal_limit(th_lim)

self._init_env(self.env_in)
self._init_env(self.env_out)

def _init_env(self, env):
TestMaskedEnvironment._init_env(self.env_in)
TestMaskedEnvironment._init_env(self.env_out)

@staticmethod
def _init_env(env):
env.set_id(0)
env.seed(0)
env.reset()
Expand All @@ -69,13 +71,13 @@ def test_ok(self):
obs_out, reward, done, info = self.env_out.step(act)
if i < 2: # 2 : 2 full steps already
assert obs_in.timestep_overflow[self.line_id] == i + 1, f"error for step {i}: {obs_in.timestep_overflow[self.line_id]}"
assert obs_out.timestep_overflow[self.line_id] == i + 1, f"error for step {i}: {obs_in.timestep_overflow[self.line_id]}"
assert obs_out.timestep_overflow[self.line_id] == i + 1, f"error for step {i}: {obs_out.timestep_overflow[self.line_id]}"
else:
# cooldown applied for line 3:
# - it disconnect stuff in `self.env_in`
# - it does not affect anything in `self.env_out`
assert not obs_in.line_status[self.line_id]
assert obs_out.timestep_overflow[self.line_id] == i + 1, f"error for step {i}: {obs_in.timestep_overflow[self.line_id]}"
assert obs_out.timestep_overflow[self.line_id] == i + 1, f"error for step {i}: {obs_out.timestep_overflow[self.line_id]}"

def test_reset(self):
# timestep_overflow should be 0 initially even if the flow is too high
Expand All @@ -84,155 +86,144 @@ def test_reset(self):
assert obs.rho[self.line_id] > 1.


class TestTimedOutEnvironmentCpy(TestMaskedEnvironment):
class TestMaskedEnvironmentCpy(TestMaskedEnvironment):
def setUp(self) -> None:
super().setUp()
init_int = self.env_in.copy()
init_out = self.env_out.copy()
self.env0 = self.env_in.copy()
self.env1 = self.env_out.copy()
init_int = self.env_in
init_out = self.env_out
self.env_in = self.env_in.copy()
self.env_out = self.env_out.copy()
init_int.close()
init_out.close()


# class TestTOEnvRunner(unittest.TestCase):
# def get_timeout_ms(self):
# return 200

# def setUp(self) -> None:
# with warnings.catch_warnings():
# warnings.filterwarnings("ignore")
# self.env1 = TimedOutEnvironment(grid2op.make("l2rpn_case14_sandbox", test=True, _add_to_name=type(self).__name__),
# time_out_ms=self.get_timeout_ms())
# params = self.env1.parameters
# params.NO_OVERFLOW_DISCONNECTION = True
# self.env1.change_parameters(params)
# self.cum_reward = 645.70208
# self.max_iter = 10
class TestMaskedEnvironmentRunner(unittest.TestCase):
def setUp(self) -> None:
TestMaskedEnvironment.setUp(self)
self.max_iter = 10

# def tearDown(self) -> None:
# self.env1.close()
# return super().tearDown()
def tearDown(self) -> None:
self.env_in.close()
self.env_out.close()
return super().tearDown()

# def test_runner_can_make(self):
# runner = Runner(**self.env1.get_params_for_runner())
# env2 = runner.init_env()
# assert isinstance(env2, TimedOutEnvironment)
# assert env2.time_out_ms == self.get_timeout_ms()

# def test_runner_noskip(self):
# agent = AgentOK(self.env1)
# runner = Runner(**self.env1.get_params_for_runner(),
# agentClass=None,
# agentInstance=agent)
# res = runner.run(nb_episode=1,
# max_iter=self.max_iter)
# _, _, cum_reward, timestep, max_ts = res[0]
# assert abs(cum_reward - self.cum_reward) <= 1e-5

# def test_runner_skip1(self):
# agent = AgentKO(self.env1)
# runner = Runner(**self.env1.get_params_for_runner(),
# agentClass=None,
# agentInstance=agent)
# res = runner.run(nb_episode=1,
# max_iter=self.max_iter)
# _, _, cum_reward, timestep, max_ts = res[0]
# assert abs(cum_reward - self.cum_reward) <= 1e-5

# def test_runner_skip2(self):
# agent = AgentKO2(self.env1)
# runner = Runner(**self.env1.get_params_for_runner(),
# agentClass=None,
# agentInstance=agent)
# res = runner.run(nb_episode=1,
# max_iter=self.max_iter)
# _, _, cum_reward, timestep, max_ts = res[0]
# assert abs(cum_reward - self.cum_reward) <= 1e-5

# def test_runner_skip2_2ep(self):
# agent = AgentKO2(self.env1)
# runner = Runner(**self.env1.get_params_for_runner(),
# agentClass=None,
# agentInstance=agent)
# res = runner.run(nb_episode=2,
# max_iter=self.max_iter)
# _, _, cum_reward, timestep, max_ts = res[0]
# assert abs(cum_reward - self.cum_reward) <= 1e-5
# _, _, cum_reward, timestep, max_ts = res[1]
# assert abs(cum_reward - 648.90795) <= 1e-5


# class TestTOEnvGym(unittest.TestCase):
# def get_timeout_ms(self):
# return 400.
def test_runner_can_make(self):
runner = Runner(**self.env_in.get_params_for_runner())
env2 = runner.init_env()
assert isinstance(env2, MaskedEnvironment)
assert (env2._lines_of_interest == self.env_in._lines_of_interest).all()

def test_runner(self):
# create the runner
runner_in = Runner(**self.env_in.get_params_for_runner())
runner_out = Runner(**self.env_out.get_params_for_runner())
res_in, *_ = runner_in.run(nb_episode=1, max_iter=self.max_iter, env_seeds=[0], episode_id=[0], add_detailed_output=True)
res_out, *_ = runner_out.run(nb_episode=1, max_iter=self.max_iter, env_seeds=[0], episode_id=[0], add_detailed_output=True)
res_in2, *_ = runner_in.run(nb_episode=1, max_iter=self.max_iter, env_seeds=[0], episode_id=[0])
# check correct results are obtained when agregated
assert res_in[3] == 10
assert res_in2[3] == 10
assert res_out[3] == 10
assert np.allclose(res_in[2], 645.4992065)
assert np.allclose(res_in2[2], 645.4992065)
assert np.allclose(res_out[2], 645.7020874)

# check detailed results
ep_data_in = res_in[-1]
ep_data_out = res_out[-1]
for i in range(self.max_iter + 1):
obs_in = ep_data_in.observations[i]
obs_out = ep_data_out.observations[i]
if i < 3:
assert obs_in.timestep_overflow[self.line_id] == i, f"error for step {i}: {obs_in.timestep_overflow[self.line_id]}"
assert obs_out.timestep_overflow[self.line_id] == i, f"error for step {i}: {obs_out.timestep_overflow[self.line_id]}"
else:
# cooldown applied for line 3:
# - it disconnect stuff in `self.env_in`
# - it does not affect anything in `self.env_out`
assert not obs_in.line_status[self.line_id], f"error for step {i}: line is not disconnected"
assert obs_out.timestep_overflow[self.line_id] == i, f"error for step {i}: {obs_out.timestep_overflow[self.line_id]}"

# def setUp(self) -> None:
# with warnings.catch_warnings():
# warnings.filterwarnings("ignore")
# self.env1 = TimedOutEnvironment(grid2op.make("l2rpn_case14_sandbox", test=True, _add_to_name=type(self).__name__),
# time_out_ms=self.get_timeout_ms())

class TestMaskedEnvironmentGym(unittest.TestCase):
def setUp(self) -> None:
TestMaskedEnvironment.setUp(self)

# def tearDown(self) -> None:
# self.env1.close()
# return super().tearDown()
def tearDown(self) -> None:
self.env_in.close()
self.env_out.close()
return super().tearDown()

# def test_gym_with_step(self):
# """test the step function also makes the 'do nothing'"""
# self.skipTest("On docker execution time is too unstable")
# env_gym = GymEnv(self.env1)
# env_gym.reset()

# agentok = AgentOK(env_gym)
# for i in range(10):
# act = agentok.act_gym(None, None, None)
# for k in act:
# act[k][:] = 0
# *_, info = env_gym.step(act)
# assert info["nb_do_nothing"] == 0
# assert info["nb_do_nothing_made"] == 0
# assert env_gym.init_env._nb_dn_last == 0

# env_gym.reset()
# agentko = AgentKO1(env_gym)
# for i in range(10):
# act = agentko.act_gym(None, None, None)
# for k in act:
# act[k][:] = 0
# *_, info = env_gym.step(act)
# assert info["nb_do_nothing"] == 1
# assert info["nb_do_nothing_made"] == 1
# assert env_gym.init_env._nb_dn_last == 1
def _aux_run_envs(self, act, env_gym_in, env_gym_out):
for i in range(10):
obs_in, reward, done, truncated, info = env_gym_in.step(act)
obs_out, reward, done, truncated, info = env_gym_out.step(act)
if i < 2: # 2 : 2 full steps already
assert obs_in["timestep_overflow"][self.line_id] == i + 1, f"error for step {i}: {obs_in['timestep_overflow'][self.line_id]}"
assert obs_out['timestep_overflow'][self.line_id] == i + 1, f"error for step {i}: {obs_out['timestep_overflow'][self.line_id]}"
else:
# cooldown applied for line 3:
# - it disconnect stuff in `self.env_in`
# - it does not affect anything in `self.env_out`
assert not obs_in["line_status"][self.line_id]
assert obs_out["timestep_overflow"][self.line_id] == i + 1, f"error for step {i}: {obs_out['timestep_overflow'][self.line_id]}"

def test_gym_with_step(self):
"""test the step function also disconnects (or not) the lines"""
env_gym_in = GymEnv(self.env_in)
env_gym_out = GymEnv(self.env_out)
act = {}
self._aux_run_envs(act, env_gym_in, env_gym_out)
env_gym_in.reset()
env_gym_out.reset()
self._aux_run_envs(act, env_gym_in, env_gym_out)

# def test_gym_normal(self):
# """test I can create the gym env"""
# env_gym = GymEnv(self.env1)
# env_gym.reset()

# def test_gym_box(self):
# """test I can create the gym env with box ob space and act space"""
# env_gym = GymEnv(self.env1)
# with warnings.catch_warnings():
# warnings.filterwarnings("ignore")
# env_gym.action_space = BoxGymActSpace(self.env1.action_space)
# env_gym.observation_space = BoxGymObsSpace(self.env1.observation_space)
# env_gym.reset()

# def test_gym_discrete(self):
# """test I can create the gym env with discrete act space"""
# env_gym = GymEnv(self.env1)
# with warnings.catch_warnings():
# warnings.filterwarnings("ignore")
# env_gym.action_space = DiscreteActSpace(self.env1.action_space)
# env_gym.reset()
def test_gym_normal(self):
"""test I can create the gym env"""
env_gym = GymEnv(self.env_in)
env_gym.reset()

def test_gym_box(self):
"""test I can create the gym env with box ob space and act space"""
env_gym_in = GymEnv(self.env_in)
env_gym_out = GymEnv(self.env_out)
with warnings.catch_warnings():
warnings.filterwarnings("ignore")
env_gym_in.action_space = BoxGymActSpace(self.env_in.action_space)
env_gym_in.observation_space = BoxGymObsSpace(self.env_in.observation_space)
env_gym_out.action_space = BoxGymActSpace(self.env_out.action_space)
env_gym_out.observation_space = BoxGymObsSpace(self.env_out.observation_space)
env_gym_in.reset()
env_gym_out.reset()

def test_gym_discrete(self):
"""test I can create the gym env with discrete act space"""
env_gym_in = GymEnv(self.env_in)
env_gym_out = GymEnv(self.env_out)
with warnings.catch_warnings():
warnings.filterwarnings("ignore")
env_gym_in.action_space = DiscreteActSpace(self.env_in.action_space)
env_gym_out.action_space = DiscreteActSpace(self.env_out.action_space)
env_gym_in.reset()
env_gym_out.reset()
act = 0
self._aux_run_envs(act, env_gym_in, env_gym_out)


# def test_gym_multidiscrete(self):
# """test I can create the gym env with multi discrete act space"""
# env_gym = GymEnv(self.env1)
# with warnings.catch_warnings():
# warnings.filterwarnings("ignore")
# env_gym.action_space = MultiDiscreteActSpace(self.env1.action_space)
# env_gym.reset()
def test_gym_multidiscrete(self):
"""test I can create the gym env with multi discrete act space"""
env_gym_in = GymEnv(self.env_in)
env_gym_out = GymEnv(self.env_out)
with warnings.catch_warnings():
warnings.filterwarnings("ignore")
env_gym_in.action_space = MultiDiscreteActSpace(self.env_in.action_space)
env_gym_out.action_space = MultiDiscreteActSpace(self.env_out.action_space)
env_gym_in.reset()
env_gym_out.reset()
act = env_gym_in.action_space.sample()
act[:] = 0
self._aux_run_envs(act, env_gym_in, env_gym_out)


if __name__ == "__main__":
Expand Down

0 comments on commit f1310c5

Please sign in to comment.