Skip to content

Commit

Permalink
adding a fix for issue #14
Browse files Browse the repository at this point in the history
  • Loading branch information
BDonnot committed Jun 16, 2020
1 parent d2aece9 commit bb69932
Show file tree
Hide file tree
Showing 6 changed files with 180 additions and 70 deletions.
13 changes: 7 additions & 6 deletions l2rpn_baselines/test/test_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,14 +70,15 @@ def load_module(self):
return "PandapowerOPFAgent"


class TestPandapowerGeirina(TestImport, unittest.TestCase):
def load_module(self):
return "Geirina"
# because it deactivates the eager mode
# class TestPandapowerGeirina(TestImport, unittest.TestCase):
# def load_module(self):
# return "Geirina"


class TestAsynchronousActorCritic(TestImport, unittest.TestCase):
def load_module(self):
return "AsynchronousActorCritic"
# class TestAsynchronousActorCritic(TestImport, unittest.TestCase):
# def load_module(self):
# return "AsynchronousActorCritic"


if __name__ == "__main__":
Expand Down
86 changes: 65 additions & 21 deletions l2rpn_baselines/test/test_train_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,11 @@
import warnings
import tempfile

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import grid2op
from l2rpn_baselines.utils import TrainingParam, NNParam
from grid2op.Environment import MultiEnvironment

from l2rpn_baselines.utils import TrainingParam, NNParam, make_multi_env
from l2rpn_baselines.DeepQSimple import train as train_dqn
from l2rpn_baselines.DeepQSimple import evaluate as eval_dqn
from l2rpn_baselines.DuelQSimple import train as train_d3qs
Expand All @@ -32,22 +35,19 @@
from l2rpn_baselines.SliceRDQN import evaluate as eval_srqn
from l2rpn_baselines.SliceRDQN import SliceRDQN_Config as srdqn_cfg

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'


class TestDeepQSimple(unittest.TestCase):
def test_train_eval(self):
tp = TrainingParam()
tp.buffer_size = 100
tp.minibatch_size = 8
tp.update_freq = 32
tp.min_observation = 32
tmp_dir = tempfile.mkdtemp()
with warnings.catch_warnings():
warnings.filterwarnings("ignore")
env = grid2op.make("rte_case5_example", test=True)
li_attr_obs_X = ["day_of_week", "hour_of_day", "minute_of_hour", "prod_p", "prod_v", "load_p", "load_q",
"actual_dispatch", "target_dispatch", "topo_vect", "time_before_cooldown_line",
"time_before_cooldown_sub", "rho", "timestep_overflow", "line_status"]
li_attr_obs_X = ["prod_p", "load_p", "rho"]

# neural network architecture
observation_size = NNParam.get_obs_size(env, li_attr_obs_X)
Expand Down Expand Up @@ -85,20 +85,67 @@ def test_train_eval(self):
verbose=False,
save_gif=False)

def test_train_eval_multi(self):
tp = TrainingParam()
tp.buffer_size = 100
tp.minibatch_size = 8
tp.update_freq = 32
tp.min_observation = 32
tmp_dir = tempfile.mkdtemp()
with warnings.catch_warnings():
warnings.filterwarnings("ignore")
env_init = grid2op.make("rte_case5_example", test=True)
env = make_multi_env(env_init, 2)

li_attr_obs_X = ["prod_p", "load_p", "rho"]

# neural network architecture
observation_size = NNParam.get_obs_size(env, li_attr_obs_X)
sizes = [100, 50, 10] # sizes of each hidden layers
kwargs_archi = {'observation_size': observation_size,
'sizes': sizes,
'activs': ["relu" for _ in sizes], # all relu activation function
"list_attr_obs": li_attr_obs_X}

kwargs_converters = {"all_actions": None,
"set_line_status": False,
"change_bus_vect": True,
"set_topo_vect": False
}
nm_ = "AnneOnymous"
train_dqn(env,
name=nm_,
iterations=100,
save_path=tmp_dir,
load_path=None,
logs_dir=tmp_dir,
training_param=tp,
verbose=False,
kwargs_converters=kwargs_converters,
kwargs_archi=kwargs_archi)

baseline_2 = eval_dqn(env_init,
name=nm_,
load_path=tmp_dir,
logs_path=tmp_dir,
nb_episode=1,
nb_process=1,
max_steps=30,
verbose=False,
save_gif=False)

class TestDuelQSimple(unittest.TestCase):
def test_train_eval(self):
tp = TrainingParam()
tp.buffer_size = 100
tp.minibatch_size = 8
tp.update_freq = 32
tp.min_observation = 32
tmp_dir = tempfile.mkdtemp()
with warnings.catch_warnings():
warnings.filterwarnings("ignore")
env = grid2op.make("rte_case5_example", test=True)
li_attr_obs_X = ["day_of_week", "hour_of_day", "minute_of_hour", "prod_p", "prod_v", "load_p", "load_q",
"actual_dispatch", "target_dispatch", "topo_vect", "time_before_cooldown_line",
"time_before_cooldown_sub", "rho", "timestep_overflow", "line_status"]
li_attr_obs_X = ["prod_p", "load_p", "rho"]

# neural network architecture
observation_size = NNParam.get_obs_size(env, li_attr_obs_X)
Expand Down Expand Up @@ -143,13 +190,12 @@ def test_train_eval(self):
tp.buffer_size = 100
tp.minibatch_size = 8
tp.update_freq = 32
tp.min_observation = 32
tmp_dir = tempfile.mkdtemp()
with warnings.catch_warnings():
warnings.filterwarnings("ignore")
env = grid2op.make("rte_case5_example", test=True)
li_attr_obs_X = ["day_of_week", "hour_of_day", "minute_of_hour", "prod_p", "prod_v", "load_p", "load_q",
"actual_dispatch", "target_dispatch", "topo_vect", "time_before_cooldown_line",
"time_before_cooldown_sub", "rho", "timestep_overflow", "line_status"]
li_attr_obs_X = ["prod_p", "load_p", "rho"]

# neural network architecture
observation_size = NNParam.get_obs_size(env, li_attr_obs_X)
Expand Down Expand Up @@ -201,20 +247,15 @@ def test_train_eval(self):
tp.buffer_size = 100
tp.minibatch_size = 8
tp.update_freq = 32
tp.min_observation = 32
tmp_dir = tempfile.mkdtemp()
with warnings.catch_warnings():
warnings.filterwarnings("ignore")
env = grid2op.make("rte_case5_example", test=True)
li_attr_obs_X = ["day_of_week", "hour_of_day", "minute_of_hour", "prod_p", "prod_v", "load_p", "load_q",
"actual_dispatch", "target_dispatch", "topo_vect", "time_before_cooldown_line",
"time_before_cooldown_sub", "rho", "timestep_overflow", "line_status"]

# neural network architecture
li_attr_obs_X = ["day_of_week", "hour_of_day", "minute_of_hour", "prod_p", "prod_v", "load_p", "load_q",
"actual_dispatch", "target_dispatch", "topo_vect", "time_before_cooldown_line",
"time_before_cooldown_sub", "timestep_overflow", "line_status", "rho"]
li_attr_obs_Tau = ["rho", "line_status"]
sizes = [800, 800, 800, 494, 494, 494]
li_attr_obs_X = ["prod_p", "load_p", "rho"]
li_attr_obs_Tau = ["line_status"]
sizes = [100, 50, 10]

x_dim = NNParam.get_obs_size(env, li_attr_obs_X)
tau_dims = [NNParam.get_obs_size(env, [el]) for el in li_attr_obs_Tau]
Expand Down Expand Up @@ -257,6 +298,7 @@ def test_train_eval(self):
verbose=False,
save_gif=False)


class TestD3QN(unittest.TestCase):
def test_train_eval(self):
tmp_dir = tempfile.mkdtemp()
Expand Down Expand Up @@ -294,6 +336,7 @@ def test_train_eval(self):

assert eval_res is not None


class TestRDQN(unittest.TestCase):
def test_train_eval(self):
tmp_dir = tempfile.mkdtemp()
Expand Down Expand Up @@ -329,6 +372,7 @@ def test_train_eval(self):

assert eval_res is not None


class TestSRDQN(unittest.TestCase):
def test_train_eval(self):
tmp_dir = tempfile.mkdtemp()
Expand Down
98 changes: 56 additions & 42 deletions l2rpn_baselines/utils/DeepQAgent.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from tqdm import tqdm
import tensorflow as tf

import grid2op
from grid2op.Exceptions import Grid2OpException
from grid2op.Agent import AgentWithConverter
from grid2op.Converter import IdToAct
Expand Down Expand Up @@ -111,7 +112,6 @@ def __init__(self,
name="DeepQAgent",
store_action=True,
istraining=False,
nb_env=1,
filter_action_fun=None,
verbose=False,
**kwargs_converters):
Expand All @@ -122,7 +122,7 @@ def __init__(self,

# and now back to the origin implementation
self.replay_buffer = None
self.__nb_env = nb_env
self.__nb_env = None

self.deep_q = None
self._training_param = None
Expand Down Expand Up @@ -306,8 +306,6 @@ def load(self, path):
conv_path = os.path.join(tmp_me, "{}.npy".format(nm_attr))
if os.path.exists(conv_path):
setattr(self, nm_attr, np.load(file=conv_path))
else:
raise RuntimeError("Impossible to find the data \"{}.npy\" at \"{}\"".format(nm_attr, tmp_me))

def save(self, path):
"""
Expand Down Expand Up @@ -336,7 +334,9 @@ def save(self, path):
# TODO save the "oversampling" part, and all the other info
for nm_attr in ["_time_step_lived", "_nb_chosen", "_proba"]:
conv_path = os.path.join(tmp_me, "{}.npy".format(nm_attr))
np.save(arr=getattr(self, nm_attr), file=conv_path)
attr_ = getattr(self, nm_attr)
if attr_ is not None:
np.save(arr=attr_, file=conv_path)

def train(self,
env,
Expand Down Expand Up @@ -404,6 +404,14 @@ def train(self,
UPDATE_FREQ = training_param.update_tensorboard_freq # update tensorboard every "UPDATE_FREQ" steps
SAVING_NUM = training_param.save_model_each

if isinstance(env, grid2op.Environment.Environment):
self.__nb_env = 1
else:
import warnings
nb_env = env.nb_env
warnings.warn("Training using {} environments".format(nb_env))
self.__nb_env = nb_env

self.init_obs_extraction(env)

training_step = self._training_param.last_step
Expand Down Expand Up @@ -435,22 +443,24 @@ def train(self,

# for non uniform random sampling of the scenarios
th_size = None
if _CACHE_AVAILABLE_DEEPQAGENT:
if isinstance(env.chronics_handler.real_data, MultifolderWithCache):
th_size = env.chronics_handler.real_data.cache_size
if th_size is None:
th_size = len(env.chronics_handler.real_data.subpaths)

self._prev_obs_num = 0
# number of time step lived per possible scenarios
if self._time_step_lived is None or self._time_step_lived.shape[0] != th_size:
self._time_step_lived = np.zeros(th_size, dtype=np.uint64)
# number of time a given scenario has been played
if self._nb_chosen is None or self._nb_chosen.shape[0] != th_size:
self._nb_chosen = np.zeros(th_size, dtype=np.uint)
# number of time a given scenario has been played
if self._proba is None or self._proba.shape[0] != th_size:
self._proba = np.ones(th_size, dtype=np.float64)
if self.__nb_env == 1:
# TODO make this available for multi env too
if _CACHE_AVAILABLE_DEEPQAGENT:
if isinstance(env.chronics_handler.real_data, MultifolderWithCache):
th_size = env.chronics_handler.real_data.cache_size
if th_size is None:
th_size = len(env.chronics_handler.real_data.subpaths)

# number of time step lived per possible scenarios
if self._time_step_lived is None or self._time_step_lived.shape[0] != th_size:
self._time_step_lived = np.zeros(th_size, dtype=np.uint64)
# number of time a given scenario has been played
if self._nb_chosen is None or self._nb_chosen.shape[0] != th_size:
self._nb_chosen = np.zeros(th_size, dtype=np.uint)
# number of time a given scenario has been played
if self._proba is None or self._proba.shape[0] != th_size:
self._proba = np.ones(th_size, dtype=np.float64)

self._prev_id = 0
# this is for the "limit the episode length" depending on your previous success
Expand Down Expand Up @@ -485,6 +495,7 @@ def train(self,
temp_reward = np.array([temp_reward], dtype=np.float32)
temp_done = np.array([temp_done], dtype=np.bool)
info = [info]

new_state = self._convert_obs_train(temp_observation_obj)
self._updage_illegal_ambiguous(training_step, info)
done, reward, total_reward, alive_frame, epoch_num \
Expand Down Expand Up @@ -673,7 +684,8 @@ def _need_reset(self, env, observation_num, epoch_num, done, new_state):

# update the number of time steps it has live
ts_lived = observation_num - self._prev_obs_num
self._time_step_lived[self._prev_id] += ts_lived
if self._time_step_lived is not None:
self._time_step_lived[self._prev_id] += ts_lived
self._prev_obs_num = observation_num
if self._training_param.oversampling_rate is not None:
# proba = np.sqrt(1. / (self._time_step_lived +1))
Expand All @@ -694,7 +706,8 @@ def _need_reset(self, env, observation_num, epoch_num, done, new_state):
self._prev_id %= self._time_step_lived.shape[0]

env.reset()
self._nb_chosen[self._prev_id] += 1
if self._nb_chosen is not None:
self._nb_chosen[self._prev_id] += 1

# random fast forward between now and next week
if self._training_param.random_sample_datetime_start is not None:
Expand Down Expand Up @@ -783,17 +796,17 @@ def _save_tensorboard(self, step, epoch_num, UPDATE_FREQ, epoch_rewards, epoch_a
# print the top k scenarios the "hardest" (ie chosen the most number of times
if self.verbose:
top_k = 10
array_ = np.argsort(self._nb_chosen)[-top_k:][::-1]
print("hardest scenarios\n{}".format(array_))
print("They have been chosen respectively\n{}".format(self._nb_chosen[array_]))
# print("Associated proba are\n{}".format(self._proba[array_]))
print("The number of timesteps played is\n{}".format(self._time_step_lived[array_]))
print("avg (accross all scenarios) number of timsteps played {}"
"".format(np.mean(self._time_step_lived)))
print("Time alive: {}".format(self._time_step_lived[array_] / (self._nb_chosen[array_] + 1)))
print("Avg time alive: {}".format(np.mean(self._time_step_lived / (self._nb_chosen + 1 ))))
# print("avg (accross all scenarios) proba {}"
# "".format(np.mean(self._proba)))
if self._nb_chosen is not None:
array_ = np.argsort(self._nb_chosen)[-top_k:][::-1]
print("hardest scenarios\n{}".format(array_))
print("They have been chosen respectively\n{}".format(self._nb_chosen[array_]))
# print("Associated proba are\n{}".format(self._proba[array_]))
print("The number of timesteps played is\n{}".format(self._time_step_lived[array_]))
print("avg (accross all scenarios) number of timsteps played {}"
"".format(np.mean(self._time_step_lived)))
print("Time alive: {}".format(self._time_step_lived[array_] / (self._nb_chosen[array_] + 1)))
print("Avg time alive: {}".format(np.mean(self._time_step_lived / (self._nb_chosen + 1 ))))

with self._tf_writer.as_default():
last_alive = epoch_alive[(epoch_num-1)]
last_reward = epoch_rewards[(epoch_num-1)]
Expand Down Expand Up @@ -885,12 +898,13 @@ def _save_tensorboard(self, step, epoch_num, UPDATE_FREQ, epoch_rewards, epoch_a
self.nb_do_nothing = 0
self._nb_updated_act_tensorboard = 0


tf.summary.histogram(
"timestep_lived", self._time_step_lived, step=step_tb, buckets=None,
description="number of time steps lived for all scenarios"
)
tf.summary.histogram(
"nb_chosen", self._nb_chosen, step=step_tb, buckets=None,
description="number of times this scenarios has been played"
)
if self._time_step_lived is not None:
tf.summary.histogram(
"timestep_lived", self._time_step_lived, step=step_tb, buckets=None,
description="number of time steps lived for all scenarios"
)
if self._nb_chosen is not None:
tf.summary.histogram(
"nb_chosen", self._nb_chosen, step=step_tb, buckets=None,
description="number of times this scenarios has been played"
)
2 changes: 1 addition & 1 deletion l2rpn_baselines/utils/TrainingParam.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ def __init__(self,

self.buffer_size = buffer_size
self.minibatch_size = minibatch_size
self.min_observation = min_observation # 5000
self.min_observation = min_observation
self._final_epsilon = float(final_epsilon) # have on average 1 random action per day of approx 288 timesteps at the end (never kill completely the exploration)
self._initial_epsilon = float(initial_epsilon)
self.step_for_final_epsilon = float(step_for_final_epsilon)
Expand Down
Loading

0 comments on commit bb69932

Please sign in to comment.