diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 594ea2e..f991d2c 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -4,6 +4,17 @@ Change Log -------- - stack multiple states in `utils/DeepQAgent` +[0.4.1] - 2020-06-16 +----------------------- +- [FIXED] `Issue 14 `_ clearer interface and get rid + of the "nb_env" in some baselines constructor. A helper function + `make_multi_env` has also been created to help the creation of the appropariate multi environment. +- [FIXED] `Issue 13 `_ the name have been properly updated +- [FIXED] `Issue 12 `_ the appropriate documentation for the + SAC baselines and all the kind +- [FIXED] `Issue 9 `_ no more hard coded global variables for + most of the baselines. + [0.4.0] - 2020-06-xx -------------------- - [ADDED] convenience way to modify the architecture of the neural networks diff --git a/docs/DeepQSimple.rst b/docs/DeepQSimple.rst index 6e5376b..b572462 100644 --- a/docs/DeepQSimple.rst +++ b/docs/DeepQSimple.rst @@ -6,6 +6,7 @@ Description This file serves as an concrete example on how to implement a baseline, even more concretely than the "do nothing" baseline. Don't expect to obtain state of the art method with this simple method however. +An example to train this model is available in the train function :ref:`Example-deepqsimple` Exported class -------------- diff --git a/docs/DuelQLeapNet.rst b/docs/DuelQLeapNet.rst index a5bdd6d..37dfdbd 100644 --- a/docs/DuelQLeapNet.rst +++ b/docs/DuelQLeapNet.rst @@ -13,6 +13,8 @@ powerlines based on the injection and the topology. In this baseline, we use this very same architecture to model the Q function. The D3QN RL method is used. +An example to train this model is available in the train function :ref:`Example-leapnet`. + Exported class -------------- You can use this class with: diff --git a/docs/DuelQSimple.rst b/docs/DuelQSimple.rst index be0ea8b..947a727 100644 --- a/docs/DuelQSimple.rst +++ b/docs/DuelQSimple.rst @@ -8,6 +8,7 @@ Description This file serves as an concrete example on how to implement a baseline, even more concretely than the "do nothing" baseline. Don't expect to obtain state of the art method with this simple method however. +An example to train this model is available in the train function :ref:`Example-duelqsimple`. Exported class -------------- diff --git a/docs/SAC.rst b/docs/SAC.rst index 92f0690..772368e 100644 --- a/docs/SAC.rst +++ b/docs/SAC.rst @@ -9,6 +9,7 @@ Description ----------- This module proposes an implementation of the SAC algorithm. +An example to train this model is available in the train function :ref:`Example-sac`. Exported class -------------- diff --git a/docs/conf.py b/docs/conf.py index 8f6ccb7..2b56df9 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -22,7 +22,7 @@ author = 'Benjamin DONNOT' # The full version, including alpha/beta/rc tags -release = '0.4.0' +release = '0.4.1' version = '0.4' # -- General configuration --------------------------------------------------- diff --git a/l2rpn_baselines/DeepQSimple/train.py b/l2rpn_baselines/DeepQSimple/train.py index d327314..3089845 100755 --- a/l2rpn_baselines/DeepQSimple/train.py +++ b/l2rpn_baselines/DeepQSimple/train.py @@ -26,7 +26,6 @@ def train(env, save_path=None, load_path=None, logs_dir=None, - nb_env=1, training_param=None, filter_action_fun=None, kwargs_converters={}, @@ -56,10 +55,6 @@ def train(env, logs_dir: ``str`` Where to store the tensorboard generated logs during the training. ``None`` if you don't want to log them. - nb_env: ``int`` - Number of environments used in parrallel. Note that if nb_env > 1, some functions might not be usable. Also, - if nb_env > 1 make sure that the `env` argument is a grid2op MultiEnvMultiProcess. - training_param: :class:`l2rpn_baselines.utils.TrainingParam` The parameters describing the way you will train your model. @@ -84,9 +79,13 @@ def train(env, baseline: :class:`DeepQSimple` The trained baseline. + + .. _Example-deepqsimple: + Examples --------- - Here is an example on how to train a DeepSimple baseline. + + Here is an example on how to train a DeepQSimple baseline. First define a python script, for example @@ -94,7 +93,7 @@ def train(env, import grid2op from grid2op.Reward import L2RPNReward - from l2rpn_baselines.utils import TrainingParam + from l2rpn_baselines.utils import TrainingParam, NNParam from l2rpn_baselines.DeepQSimple import train # define the environment @@ -111,7 +110,7 @@ def train(env, "time_before_cooldown_sub", "rho", "timestep_overflow", "line_status"] # neural network architecture - observation_size = DeepQ_NNParam.get_obs_size(env, li_attr_obs_X) + observation_size = NNParam.get_obs_size(env, li_attr_obs_X) sizes = [800, 800, 800, 494, 494, 494] # sizes of each hidden layers kwargs_archi = {'observation_size': observation_size, 'sizes': sizes, @@ -134,7 +133,6 @@ def train(env, save_path="/WHERE/I/SAVED/THE/MODEL", load_path=None, logs_dir="/WHERE/I/SAVED/THE/LOGS", - nb_env=1, training_param=tp, kwargs_converters=kwargs_converters, kwargs_archi=kwargs_archi) @@ -177,7 +175,6 @@ def train(env, nn_archi=nn_archi, name=name, istraining=True, - nb_env=nb_env, verbose=verbose, **kwargs_converters ) diff --git a/l2rpn_baselines/DuelQLeapNet/train.py b/l2rpn_baselines/DuelQLeapNet/train.py index 9211271..f507937 100755 --- a/l2rpn_baselines/DuelQLeapNet/train.py +++ b/l2rpn_baselines/DuelQLeapNet/train.py @@ -26,7 +26,6 @@ def train(env, save_path=None, load_path=None, logs_dir=None, - nb_env=1, training_param=None, filter_action_fun=None, verbose=True, @@ -56,10 +55,6 @@ def train(env, logs_dir: ``str`` Where to store the tensorboard generated logs during the training. ``None`` if you don't want to log them. - nb_env: ``int`` - Number of environments used in parrallel. Note that if nb_env > 1, some functions might not be usable. Also, - if nb_env > 1 make sure that the `env` argument is a grid2op MultiEnvMultiProcess. - training_param: :class:`l2rpn_baselines.utils.TrainingParam` The parameters describing the way you will train your model. @@ -84,9 +79,12 @@ def train(env, baseline: :class:`DuelQLeapNet` The trained baseline. + + .. _Example-leapnet: + Examples --------- - Here is an example on how to train a DeepSimple baseline. + Here is an example on how to train a DuelQLeapNet baseline. First define a python script, for example @@ -95,7 +93,7 @@ def train(env, import grid2op from grid2op.Reward import L2RPNReward from l2rpn_baselines.utils import TrainingParam - from l2rpn_baselines.DuelQLeapNet import train + from l2rpn_baselines.DuelQLeapNet import train, LeapNet_NNParam # define the environment env = grid2op.make("l2rpn_case14_sandbox", @@ -141,14 +139,15 @@ def train(env, } # define the name of the model nm_ = "AnneOnymous" + save_path = "/WHERE/I/SAVED/THE/MODEL" + logs_dir = "/WHERE/I/SAVED/THE/LOGS" try: train(env, name=nm_, iterations=10000, - save_path="/WHERE/I/SAVED/THE/MODEL", + save_path=save_path, load_path=None, - logs_dir="/WHERE/I/SAVED/THE/LOGS", - nb_env=1, + logs_dir=logs_dir, training_param=tp, kwargs_converters=kwargs_converters, kwargs_archi=kwargs_archi) @@ -191,8 +190,8 @@ def train(env, nn_archi=nn_archi, name=name, istraining=True, - nb_env=nb_env, filter_action_fun=filter_action_fun, + verbose=verbose, **kwargs_converters ) @@ -343,7 +342,7 @@ def __call__(self, action, env, has_error, is_done, is_illegal, is_ambiguous): # limit the number of time steps played per scenarios tp.step_increase_nb_iter = 100 # None to deactivate it tp.min_iter = 10 - tp.update_nb_iter(100) # once 100 scenarios are solved, increase of "step_increase_nb_iter" + tp.update_nb_iter = 100 # once 100 scenarios are solved, increase of "step_increase_nb_iter" # oversampling hard scenarios tp.oversampling_rate = 3 # None to deactivate it @@ -374,9 +373,10 @@ def __call__(self, action, env, has_error, is_done, is_illegal, is_ambiguous): # nn architecture li_attr_obs_X = ["day_of_week", "hour_of_day", "minute_of_hour", "prod_p", "prod_v", "load_p", "load_q", "actual_dispatch", "target_dispatch", "topo_vect", "time_before_cooldown_line", - "time_before_cooldown_sub", "timestep_overflow", "line_status", "rho"] - li_attr_obs_Tau = ["rho", "line_status"] - sizes = [800, 800, 800, 494, 494, 494] + "time_before_cooldown_sub", "timestep_overflow", "line_status", "rho", "line_status"] + # li_attr_obs_Tau = ["rho", "line_status"] + li_attr_obs_Tau = [] + sizes = [512, 512, 256, 256] x_dim = LeapNet_NNParam.get_obs_size(env_init, li_attr_obs_X) tau_dims = [LeapNet_NNParam.get_obs_size(env_init, [el]) for el in li_attr_obs_Tau] @@ -402,7 +402,8 @@ def __call__(self, action, env, has_error, is_done, is_illegal, is_ambiguous): nb_env=args.nb_env, training_param=tp, kwargs_converters=kwargs_converters, - kwargs_archi=kwargs_archi) + kwargs_archi=kwargs_archi, + verbose=True) finally: env.close() if args.nb_env > 1: diff --git a/l2rpn_baselines/DuelQSimple/train.py b/l2rpn_baselines/DuelQSimple/train.py index 937f0ed..239f851 100755 --- a/l2rpn_baselines/DuelQSimple/train.py +++ b/l2rpn_baselines/DuelQSimple/train.py @@ -26,7 +26,6 @@ def train(env, save_path=None, load_path=None, logs_dir=None, - nb_env=1, training_param=None, filter_action_fun=None, verbose=True, @@ -57,10 +56,6 @@ def train(env, logs_dir: ``str`` Where to store the tensorboard generated logs during the training. ``None`` if you don't want to log them. - nb_env: ``int`` - Number of environments used in parrallel. Note that if nb_env > 1, some functions might not be usable. Also, - if nb_env > 1 make sure that the `env` argument is a grid2op MultiEnvMultiProcess. - verbose: ``bool`` If you want something to be printed on the terminal (a better logging strategy will be put at some point) @@ -85,9 +80,13 @@ def train(env, baseline: :class:`DeepQSimple` The trained baseline. + + .. _Example-duelqsimple: + Examples --------- - Here is an example on how to train a DeepSimple baseline. + + Here is an example on how to train a DuelQSimple baseline. First define a python script, for example @@ -95,7 +94,7 @@ def train(env, import grid2op from grid2op.Reward import L2RPNReward - from l2rpn_baselines.utils import TrainingParam + from l2rpn_baselines.utils import TrainingParam, NNParam from l2rpn_baselines.DuelQSimple import train # define the environment @@ -112,7 +111,7 @@ def train(env, "time_before_cooldown_sub", "rho", "timestep_overflow", "line_status"] # neural network architecture - observation_size = DeepQ_NNParam.get_obs_size(env, li_attr_obs_X) + observation_size = NNParam.get_obs_size(env, li_attr_obs_X) sizes = [800, 800, 800, 494, 494, 494] # sizes of each hidden layers kwargs_archi = {'observation_size': observation_size, 'sizes': sizes, @@ -135,7 +134,6 @@ def train(env, save_path="/WHERE/I/SAVED/THE/MODEL", load_path=None, logs_dir="/WHERE/I/SAVED/THE/LOGS", - nb_env=1, training_param=tp, kwargs_converters=kwargs_converters, kwargs_archi=kwargs_archi) @@ -178,7 +176,6 @@ def train(env, nn_archi=nn_archi, name=name, istraining=True, - nb_env=nb_env, verbose=verbose, **kwargs_converters ) diff --git a/l2rpn_baselines/SAC/SAC_NN.py b/l2rpn_baselines/SAC/SAC_NN.py index f57d9c5..e01a8cd 100644 --- a/l2rpn_baselines/SAC/SAC_NN.py +++ b/l2rpn_baselines/SAC/SAC_NN.py @@ -39,12 +39,14 @@ class SAC_NN(BaseDeepQ): """ def __init__(self, nn_params, - training_param=None): + training_param=None, + verbose=False): if training_param is None: training_param = TrainingParam() BaseDeepQ.__init__(self, nn_params, - training_param) + training_param, + verbose=verbose) # TODO add as meta param the number of "Q" you want to use (here 2) # TODO add as meta param size and types of the networks @@ -248,7 +250,8 @@ def load_network(self, path, name=None, ext="h5"): self.model_Q = load_model('{}.{}'.format(path_modelQ, ext)) self.model_Q2 = load_model('{}.{}'.format(path_modelQ2, ext)) self.model_policy = load_model('{}.{}'.format(path_policy, ext)) - print("Succesfully loaded network.") + if self.verbose: + print("Succesfully loaded network.") def target_train(self): """ diff --git a/l2rpn_baselines/SAC/train.py b/l2rpn_baselines/SAC/train.py index a801ef6..2262f5c 100755 --- a/l2rpn_baselines/SAC/train.py +++ b/l2rpn_baselines/SAC/train.py @@ -26,7 +26,6 @@ def train(env, save_path=None, load_path=None, logs_dir=None, - nb_env=1, training_param=None, filter_action_fun=None, verbose=True, @@ -56,10 +55,6 @@ def train(env, logs_dir: ``str`` Where to store the tensorboard generated logs during the training. ``None`` if you don't want to log them. - nb_env: ``int`` - Number of environments used in parrallel. Note that if nb_env > 1, some functions might not be usable. Also, - if nb_env > 1 make sure that the `env` argument is a grid2op MultiEnvMultiProcess. - verbose: ``bool`` If you want something to be printed on the terminal (a better logging strategy will be put at some point) @@ -84,9 +79,12 @@ def train(env, baseline: :class:`DeepQSimple` The trained baseline. + + .. _Example-sac: + Examples --------- - Here is an example on how to train a DeepSimple baseline. + Here is an example on how to train a SAC baseline. First define a python script, for example @@ -94,7 +92,7 @@ def train(env, import grid2op from grid2op.Reward import L2RPNReward - from l2rpn_baselines.utils import TrainingParam + from l2rpn_baselines.utils import TrainingParam, NNParam from l2rpn_baselines.SAC import train # define the environment @@ -111,7 +109,7 @@ def train(env, "time_before_cooldown_sub", "rho", "timestep_overflow", "line_status"] # neural network architecture - observation_size = DeepQ_NNParam.get_obs_size(env, li_attr_obs_X) + observation_size = NNParam.get_obs_size(env, li_attr_obs_X) sizes_q = [800, 800, 800, 494, 494, 494] # sizes of each hidden layers sizes_v = [800, 800] # sizes of each hidden layers sizes_pol = [800, 800, 800, 494, 494, 494] # sizes of each hidden layers @@ -141,7 +139,6 @@ def train(env, save_path="/WHERE/I/SAVED/THE/MODEL", load_path=None, logs_dir="/WHERE/I/SAVED/THE/LOGS", - nb_env=1, training_param=tp, kwargs_converters=kwargs_converters, kwargs_archi=kwargs_archi) @@ -184,7 +181,6 @@ def train(env, nn_archi=nn_archi, name=name, istraining=True, - nb_env=nb_env, verbose=verbose, **kwargs_converters ) diff --git a/l2rpn_baselines/test/test_import.py b/l2rpn_baselines/test/test_import.py index 3d5ee1d..b022062 100644 --- a/l2rpn_baselines/test/test_import.py +++ b/l2rpn_baselines/test/test_import.py @@ -70,14 +70,15 @@ def load_module(self): return "PandapowerOPFAgent" -class TestPandapowerGeirina(TestImport, unittest.TestCase): - def load_module(self): - return "Geirina" +# because it deactivates the eager mode +# class TestPandapowerGeirina(TestImport, unittest.TestCase): +# def load_module(self): +# return "Geirina" -class TestAsynchronousActorCritic(TestImport, unittest.TestCase): - def load_module(self): - return "AsynchronousActorCritic" +# class TestAsynchronousActorCritic(TestImport, unittest.TestCase): +# def load_module(self): +# return "AsynchronousActorCritic" if __name__ == "__main__": diff --git a/l2rpn_baselines/test/test_train_eval.py b/l2rpn_baselines/test/test_train_eval.py index eec3f6c..f867286 100644 --- a/l2rpn_baselines/test/test_train_eval.py +++ b/l2rpn_baselines/test/test_train_eval.py @@ -11,9 +11,14 @@ import unittest import warnings import tempfile +import logging + +os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' +logging.getLogger('tensorflow').setLevel(logging.FATAL) import grid2op -from l2rpn_baselines.utils import TrainingParam, NNParam + +from l2rpn_baselines.utils import TrainingParam, NNParam, make_multi_env from l2rpn_baselines.DeepQSimple import train as train_dqn from l2rpn_baselines.DeepQSimple import evaluate as eval_dqn from l2rpn_baselines.DuelQSimple import train as train_d3qs @@ -32,8 +37,6 @@ from l2rpn_baselines.SliceRDQN import evaluate as eval_srqn from l2rpn_baselines.SliceRDQN import SliceRDQN_Config as srdqn_cfg -os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' - class TestDeepQSimple(unittest.TestCase): def test_train_eval(self): @@ -41,13 +44,12 @@ def test_train_eval(self): tp.buffer_size = 100 tp.minibatch_size = 8 tp.update_freq = 32 + tp.min_observation = 32 tmp_dir = tempfile.mkdtemp() with warnings.catch_warnings(): warnings.filterwarnings("ignore") env = grid2op.make("rte_case5_example", test=True) - li_attr_obs_X = ["day_of_week", "hour_of_day", "minute_of_hour", "prod_p", "prod_v", "load_p", "load_q", - "actual_dispatch", "target_dispatch", "topo_vect", "time_before_cooldown_line", - "time_before_cooldown_sub", "rho", "timestep_overflow", "line_status"] + li_attr_obs_X = ["prod_p", "load_p", "rho"] # neural network architecture observation_size = NNParam.get_obs_size(env, li_attr_obs_X) @@ -69,7 +71,6 @@ def test_train_eval(self): save_path=tmp_dir, load_path=None, logs_dir=tmp_dir, - nb_env=1, training_param=tp, verbose=False, kwargs_converters=kwargs_converters, @@ -85,6 +86,54 @@ def test_train_eval(self): verbose=False, save_gif=False) + def test_train_eval_multi(self): + tp = TrainingParam() + tp.buffer_size = 100 + tp.minibatch_size = 8 + tp.update_freq = 32 + tp.min_observation = 32 + tmp_dir = tempfile.mkdtemp() + with warnings.catch_warnings(): + warnings.filterwarnings("ignore") + env_init = grid2op.make("rte_case5_example", test=True) + env = make_multi_env(env_init, 2) + + li_attr_obs_X = ["prod_p", "load_p", "rho"] + + # neural network architecture + observation_size = NNParam.get_obs_size(env, li_attr_obs_X) + sizes = [100, 50, 10] # sizes of each hidden layers + kwargs_archi = {'observation_size': observation_size, + 'sizes': sizes, + 'activs': ["relu" for _ in sizes], # all relu activation function + "list_attr_obs": li_attr_obs_X} + + kwargs_converters = {"all_actions": None, + "set_line_status": False, + "change_bus_vect": True, + "set_topo_vect": False + } + nm_ = "AnneOnymous" + train_dqn(env, + name=nm_, + iterations=100, + save_path=tmp_dir, + load_path=None, + logs_dir=tmp_dir, + training_param=tp, + verbose=False, + kwargs_converters=kwargs_converters, + kwargs_archi=kwargs_archi) + + baseline_2 = eval_dqn(env_init, + name=nm_, + load_path=tmp_dir, + logs_path=tmp_dir, + nb_episode=1, + nb_process=1, + max_steps=30, + verbose=False, + save_gif=False) class TestDuelQSimple(unittest.TestCase): def test_train_eval(self): @@ -92,13 +141,12 @@ def test_train_eval(self): tp.buffer_size = 100 tp.minibatch_size = 8 tp.update_freq = 32 + tp.min_observation = 32 tmp_dir = tempfile.mkdtemp() with warnings.catch_warnings(): warnings.filterwarnings("ignore") env = grid2op.make("rte_case5_example", test=True) - li_attr_obs_X = ["day_of_week", "hour_of_day", "minute_of_hour", "prod_p", "prod_v", "load_p", "load_q", - "actual_dispatch", "target_dispatch", "topo_vect", "time_before_cooldown_line", - "time_before_cooldown_sub", "rho", "timestep_overflow", "line_status"] + li_attr_obs_X = ["prod_p", "load_p", "rho"] # neural network architecture observation_size = NNParam.get_obs_size(env, li_attr_obs_X) @@ -120,7 +168,6 @@ def test_train_eval(self): save_path=tmp_dir, load_path=None, logs_dir=tmp_dir, - nb_env=1, training_param=tp, verbose=False, kwargs_converters=kwargs_converters, @@ -143,13 +190,12 @@ def test_train_eval(self): tp.buffer_size = 100 tp.minibatch_size = 8 tp.update_freq = 32 + tp.min_observation = 32 tmp_dir = tempfile.mkdtemp() with warnings.catch_warnings(): warnings.filterwarnings("ignore") env = grid2op.make("rte_case5_example", test=True) - li_attr_obs_X = ["day_of_week", "hour_of_day", "minute_of_hour", "prod_p", "prod_v", "load_p", "load_q", - "actual_dispatch", "target_dispatch", "topo_vect", "time_before_cooldown_line", - "time_before_cooldown_sub", "rho", "timestep_overflow", "line_status"] + li_attr_obs_X = ["prod_p", "load_p", "rho"] # neural network architecture observation_size = NNParam.get_obs_size(env, li_attr_obs_X) @@ -178,7 +224,6 @@ def test_train_eval(self): save_path=tmp_dir, load_path=None, logs_dir=tmp_dir, - nb_env=1, training_param=tp, verbose=False, kwargs_converters=kwargs_converters, @@ -201,20 +246,15 @@ def test_train_eval(self): tp.buffer_size = 100 tp.minibatch_size = 8 tp.update_freq = 32 + tp.min_observation = 32 tmp_dir = tempfile.mkdtemp() with warnings.catch_warnings(): warnings.filterwarnings("ignore") env = grid2op.make("rte_case5_example", test=True) - li_attr_obs_X = ["day_of_week", "hour_of_day", "minute_of_hour", "prod_p", "prod_v", "load_p", "load_q", - "actual_dispatch", "target_dispatch", "topo_vect", "time_before_cooldown_line", - "time_before_cooldown_sub", "rho", "timestep_overflow", "line_status"] - # neural network architecture - li_attr_obs_X = ["day_of_week", "hour_of_day", "minute_of_hour", "prod_p", "prod_v", "load_p", "load_q", - "actual_dispatch", "target_dispatch", "topo_vect", "time_before_cooldown_line", - "time_before_cooldown_sub", "timestep_overflow", "line_status", "rho"] - li_attr_obs_Tau = ["rho", "line_status"] - sizes = [800, 800, 800, 494, 494, 494] + li_attr_obs_X = ["prod_p", "load_p", "rho"] + li_attr_obs_Tau = ["line_status"] + sizes = [100, 50, 10] x_dim = NNParam.get_obs_size(env, li_attr_obs_X) tau_dims = [NNParam.get_obs_size(env, [el]) for el in li_attr_obs_Tau] @@ -241,7 +281,6 @@ def test_train_eval(self): save_path=tmp_dir, load_path=None, logs_dir=tmp_dir, - nb_env=1, training_param=tp, verbose=False, kwargs_converters=kwargs_converters, @@ -257,6 +296,7 @@ def test_train_eval(self): verbose=False, save_gif=False) + class TestD3QN(unittest.TestCase): def test_train_eval(self): tmp_dir = tempfile.mkdtemp() @@ -294,6 +334,7 @@ def test_train_eval(self): assert eval_res is not None + class TestRDQN(unittest.TestCase): def test_train_eval(self): tmp_dir = tempfile.mkdtemp() @@ -329,6 +370,7 @@ def test_train_eval(self): assert eval_res is not None + class TestSRDQN(unittest.TestCase): def test_train_eval(self): tmp_dir = tempfile.mkdtemp() diff --git a/l2rpn_baselines/utils/BaseDeepQ.py b/l2rpn_baselines/utils/BaseDeepQ.py index d21c811..2e1d8d8 100644 --- a/l2rpn_baselines/utils/BaseDeepQ.py +++ b/l2rpn_baselines/utils/BaseDeepQ.py @@ -71,10 +71,12 @@ class BaseDeepQ(ABC): def __init__(self, nn_params, - training_param=None): + training_param=None, + verbose=False): self._action_size = nn_params.action_size self._observation_size = nn_params.observation_size self._nn_archi = nn_params + self.verbose = verbose if training_param is None: self._training_param = TrainingParam() @@ -207,7 +209,8 @@ def load_network(self, path, name=None, ext="h5"): with warnings.catch_warnings(): warnings.filterwarnings("ignore") self._target_model = load_model('{}.{}'.format(path_target_model, ext), custom_objects=self._custom_objects) - print("Succesfully loaded network.") + if self.verbose: + print("Succesfully loaded network.") def target_train(self): """ diff --git a/l2rpn_baselines/utils/DeepQAgent.py b/l2rpn_baselines/utils/DeepQAgent.py index 6bd8c69..f8334e0 100644 --- a/l2rpn_baselines/utils/DeepQAgent.py +++ b/l2rpn_baselines/utils/DeepQAgent.py @@ -11,6 +11,7 @@ from tqdm import tqdm import tensorflow as tf +import grid2op from grid2op.Exceptions import Grid2OpException from grid2op.Agent import AgentWithConverter from grid2op.Converter import IdToAct @@ -111,7 +112,6 @@ def __init__(self, name="DeepQAgent", store_action=True, istraining=False, - nb_env=1, filter_action_fun=None, verbose=False, **kwargs_converters): @@ -122,7 +122,7 @@ def __init__(self, # and now back to the origin implementation self.replay_buffer = None - self.__nb_env = nb_env + self.__nb_env = None self.deep_q = None self._training_param = None @@ -306,8 +306,6 @@ def load(self, path): conv_path = os.path.join(tmp_me, "{}.npy".format(nm_attr)) if os.path.exists(conv_path): setattr(self, nm_attr, np.load(file=conv_path)) - else: - raise RuntimeError("Impossible to find the data \"{}.npy\" at \"{}\"".format(nm_attr, tmp_me)) def save(self, path): """ @@ -336,7 +334,9 @@ def save(self, path): # TODO save the "oversampling" part, and all the other info for nm_attr in ["_time_step_lived", "_nb_chosen", "_proba"]: conv_path = os.path.join(tmp_me, "{}.npy".format(nm_attr)) - np.save(arr=getattr(self, nm_attr), file=conv_path) + attr_ = getattr(self, nm_attr) + if attr_ is not None: + np.save(arr=attr_, file=conv_path) def train(self, env, @@ -353,7 +353,7 @@ def train(self, Parameters ---------- - env: :class:`grid2op.Environment.Environment` + env: :class:`grid2op.Environment.Environment` or :class:`grid2op.Environment.MultiEnvironment` The environment used to train your model. iterations: ``int`` @@ -404,6 +404,14 @@ def train(self, UPDATE_FREQ = training_param.update_tensorboard_freq # update tensorboard every "UPDATE_FREQ" steps SAVING_NUM = training_param.save_model_each + if isinstance(env, grid2op.Environment.Environment): + self.__nb_env = 1 + else: + import warnings + nb_env = env.nb_env + warnings.warn("Training using {} environments".format(nb_env)) + self.__nb_env = nb_env + self.init_obs_extraction(env) training_step = self._training_param.last_step @@ -435,22 +443,24 @@ def train(self, # for non uniform random sampling of the scenarios th_size = None - if _CACHE_AVAILABLE_DEEPQAGENT: - if isinstance(env.chronics_handler.real_data, MultifolderWithCache): - th_size = env.chronics_handler.real_data.cache_size - if th_size is None: - th_size = len(env.chronics_handler.real_data.subpaths) - self._prev_obs_num = 0 - # number of time step lived per possible scenarios - if self._time_step_lived is None or self._time_step_lived.shape[0] != th_size: - self._time_step_lived = np.zeros(th_size, dtype=np.uint64) - # number of time a given scenario has been played - if self._nb_chosen is None or self._nb_chosen.shape[0] != th_size: - self._nb_chosen = np.zeros(th_size, dtype=np.uint) - # number of time a given scenario has been played - if self._proba is None or self._proba.shape[0] != th_size: - self._proba = np.ones(th_size, dtype=np.float64) + if self.__nb_env == 1: + # TODO make this available for multi env too + if _CACHE_AVAILABLE_DEEPQAGENT: + if isinstance(env.chronics_handler.real_data, MultifolderWithCache): + th_size = env.chronics_handler.real_data.cache_size + if th_size is None: + th_size = len(env.chronics_handler.real_data.subpaths) + + # number of time step lived per possible scenarios + if self._time_step_lived is None or self._time_step_lived.shape[0] != th_size: + self._time_step_lived = np.zeros(th_size, dtype=np.uint64) + # number of time a given scenario has been played + if self._nb_chosen is None or self._nb_chosen.shape[0] != th_size: + self._nb_chosen = np.zeros(th_size, dtype=np.uint) + # number of time a given scenario has been played + if self._proba is None or self._proba.shape[0] != th_size: + self._proba = np.ones(th_size, dtype=np.float64) self._prev_id = 0 # this is for the "limit the episode length" depending on your previous success @@ -485,6 +495,7 @@ def train(self, temp_reward = np.array([temp_reward], dtype=np.float32) temp_done = np.array([temp_done], dtype=np.bool) info = [info] + new_state = self._convert_obs_train(temp_observation_obj) self._updage_illegal_ambiguous(training_step, info) done, reward, total_reward, alive_frame, epoch_num \ @@ -588,7 +599,7 @@ def _train_model(self, training_step): loss = self.deep_q.train(s_batch, a_batch, r_batch, d_batch, s2_batch, tf_writer) # save learning rate for later - self._train_lr = self.deep_q.optimizer_model._decayed_lr('float32').numpy() + self._train_lr = self.deep_q._optimizer_model._decayed_lr('float32').numpy() self.__graph_saved = True if not np.all(np.isfinite(loss)): # if the loss is not finite i stop the learning @@ -673,7 +684,8 @@ def _need_reset(self, env, observation_num, epoch_num, done, new_state): # update the number of time steps it has live ts_lived = observation_num - self._prev_obs_num - self._time_step_lived[self._prev_id] += ts_lived + if self._time_step_lived is not None: + self._time_step_lived[self._prev_id] += ts_lived self._prev_obs_num = observation_num if self._training_param.oversampling_rate is not None: # proba = np.sqrt(1. / (self._time_step_lived +1)) @@ -694,7 +706,8 @@ def _need_reset(self, env, observation_num, epoch_num, done, new_state): self._prev_id %= self._time_step_lived.shape[0] env.reset() - self._nb_chosen[self._prev_id] += 1 + if self._nb_chosen is not None: + self._nb_chosen[self._prev_id] += 1 # random fast forward between now and next week if self._training_param.random_sample_datetime_start is not None: @@ -783,17 +796,17 @@ def _save_tensorboard(self, step, epoch_num, UPDATE_FREQ, epoch_rewards, epoch_a # print the top k scenarios the "hardest" (ie chosen the most number of times if self.verbose: top_k = 10 - array_ = np.argsort(self._nb_chosen)[-top_k:][::-1] - print("hardest scenarios\n{}".format(array_)) - print("They have been chosen respectively\n{}".format(self._nb_chosen[array_])) - # print("Associated proba are\n{}".format(self._proba[array_])) - print("The number of timesteps played is\n{}".format(self._time_step_lived[array_])) - print("avg (accross all scenarios) number of timsteps played {}" - "".format(np.mean(self._time_step_lived))) - print("Time alive: {}".format(self._time_step_lived[array_] / (self._nb_chosen[array_] + 1))) - print("Avg time alive: {}".format(np.mean(self._time_step_lived / (self._nb_chosen + 1 )))) - # print("avg (accross all scenarios) proba {}" - # "".format(np.mean(self._proba))) + if self._nb_chosen is not None: + array_ = np.argsort(self._nb_chosen)[-top_k:][::-1] + print("hardest scenarios\n{}".format(array_)) + print("They have been chosen respectively\n{}".format(self._nb_chosen[array_])) + # print("Associated proba are\n{}".format(self._proba[array_])) + print("The number of timesteps played is\n{}".format(self._time_step_lived[array_])) + print("avg (accross all scenarios) number of timsteps played {}" + "".format(np.mean(self._time_step_lived))) + print("Time alive: {}".format(self._time_step_lived[array_] / (self._nb_chosen[array_] + 1))) + print("Avg time alive: {}".format(np.mean(self._time_step_lived / (self._nb_chosen + 1 )))) + with self._tf_writer.as_default(): last_alive = epoch_alive[(epoch_num-1)] last_reward = epoch_rewards[(epoch_num-1)] @@ -885,12 +898,13 @@ def _save_tensorboard(self, step, epoch_num, UPDATE_FREQ, epoch_rewards, epoch_a self.nb_do_nothing = 0 self._nb_updated_act_tensorboard = 0 - - tf.summary.histogram( - "timestep_lived", self._time_step_lived, step=step_tb, buckets=None, - description="number of time steps lived for all scenarios" - ) - tf.summary.histogram( - "nb_chosen", self._nb_chosen, step=step_tb, buckets=None, - description="number of times this scenarios has been played" - ) + if self._time_step_lived is not None: + tf.summary.histogram( + "timestep_lived", self._time_step_lived, step=step_tb, buckets=None, + description="number of time steps lived for all scenarios" + ) + if self._nb_chosen is not None: + tf.summary.histogram( + "nb_chosen", self._nb_chosen, step=step_tb, buckets=None, + description="number of times this scenarios has been played" + ) diff --git a/l2rpn_baselines/utils/TrainingParam.py b/l2rpn_baselines/utils/TrainingParam.py index 3621640..df46c5e 100644 --- a/l2rpn_baselines/utils/TrainingParam.py +++ b/l2rpn_baselines/utils/TrainingParam.py @@ -151,7 +151,7 @@ def __init__(self, self.buffer_size = buffer_size self.minibatch_size = minibatch_size - self.min_observation = min_observation # 5000 + self.min_observation = min_observation self._final_epsilon = float(final_epsilon) # have on average 1 random action per day of approx 288 timesteps at the end (never kill completely the exploration) self._initial_epsilon = float(initial_epsilon) self.step_for_final_epsilon = float(step_for_final_epsilon) @@ -184,38 +184,33 @@ def __init__(self, self.update_tensorboard_freq = update_tensorboard_freq self.save_model_each = save_model_each + self.max_iter_fun = self.default_max_iter_fun self._compute_exp_facto() @property def final_epsilon(self): - """return the final epsilon allowed by this instance""" return self._final_epsilon @final_epsilon.setter def final_epsilon(self, final_epsilon): - """used to update the final_epsilon""" self._final_epsilon = final_epsilon self._compute_exp_facto() @property def initial_epsilon(self): - """get the intial epsilon used for epsilon greedy""" return self._initial_epsilon @initial_epsilon.setter def initial_epsilon(self, initial_epsilon): - """used to update the initial_epsilon attribute""" self._initial_epsilon = initial_epsilon self._compute_exp_facto() @property def update_nb_iter(self): - """update the total number of iteration you want to make""" return self._update_nb_iter @update_nb_iter.setter def update_nb_iter(self, update_nb_iter): - """update the total number of iteration you want to make""" self._update_nb_iter = update_nb_iter if self._update_nb_iter is not None and self._update_nb_iter > 0: self._1_update_nb_iter = 1.0 / self._update_nb_iter diff --git a/l2rpn_baselines/utils/__init__.py b/l2rpn_baselines/utils/__init__.py index c047252..1a86597 100644 --- a/l2rpn_baselines/utils/__init__.py +++ b/l2rpn_baselines/utils/__init__.py @@ -5,12 +5,14 @@ # you can obtain one at http://mozilla.org/MPL/2.0/. # SPDX-License-Identifier: MPL-2.0 # This file is part of L2RPN Baselines, L2RPN Baselines a repository to host baselines for l2rpn competitions. +__version__ = "0.4.1" __all__ = [ "cli_eval", "cli_train", "str2bool", "save_log_gif", + "make_multi_env", "zip_for_codalab", "train_generic", "TrainingParam", @@ -26,9 +28,10 @@ from l2rpn_baselines.utils.save_log_gif import save_log_gif from l2rpn_baselines.utils.zip_for_codalab import zip_for_codalab from l2rpn_baselines.utils.train_generic import train_generic +from l2rpn_baselines.utils.make_multi_env import make_multi_env from l2rpn_baselines.utils.TrainingParam import TrainingParam from l2rpn_baselines.utils.NNParam import NNParam from l2rpn_baselines.utils.ReplayBuffer import ReplayBuffer from l2rpn_baselines.utils.BaseDeepQ import BaseDeepQ -from l2rpn_baselines.utils.DeepQAgent import DeepQAgent \ No newline at end of file +from l2rpn_baselines.utils.DeepQAgent import DeepQAgent diff --git a/l2rpn_baselines/utils/make_multi_env.py b/l2rpn_baselines/utils/make_multi_env.py new file mode 100644 index 0000000..6222f28 --- /dev/null +++ b/l2rpn_baselines/utils/make_multi_env.py @@ -0,0 +1,57 @@ +# Copyright (c) 2020, RTE (https://www.rte-france.com) +# See AUTHORS.txt +# This Source Code Form is subject to the terms of the Mozilla Public License, version 2.0. +# If a copy of the Mozilla Public License, version 2.0 was not distributed with this file, +# you can obtain one at http://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# This file is part of L2RPN Baselines, L2RPN Baselines a repository to host baselines for l2rpn competitions. + +import warnings +from grid2op.Environment import Environment +try: + from grid2op.Environment import MultiEnvironment +except ImportError: + # name will be change as of grid2op >= 1.0.0 + try: + from grid2op.Environment import MultiEnvMultiProcess as MultiEnvironment + except ImportError as exc: + raise exc + + +def make_multi_env(env_init, nb_env): + """ + This function creates a multi environment compatible with what is expected in the baselines. In particular, it + adds the observation_space, the action_space and the reward_range attribute. + + The way this function works is explained in the getting_started of grid2op. + + Attributes + ----------- + env_init: :class:`grid2op.Environment.Environment` + The environment to duplicates + nb_env: ``int`` + The number of environment on with which you want to interact at the same time + + Returns + ------- + res: :class:`grid2op.Environment.MultiEnvironment` or :class:`grid2op.Environment.Environment` + A copy of the initial environment (if nb_env = 1) or a MultiEnvironment based on the initial environment + if nb_env >= 2. + + """ + res = None + nb_env = int(nb_env) + + if nb_env <= 0: + raise RuntimeError("Impossible to create a negative number of environments") + + if nb_env == 1: + warnings.warn("You asked to create 1 environment. We didn't use the MultiEnvironment for that. We instead " + "created a copy of your initial environment.") + res = Environment(**env_init.get_kwargs()) + else: + res = MultiEnvironment(nb_env, env_init) + res.observation_space = env_init.observation_space + res.action_space = env_init.action_space + res.reward_range = env_init.reward_range + return res \ No newline at end of file diff --git a/setup.py b/setup.py index ea41ad8..7bd14fb 100644 --- a/setup.py +++ b/setup.py @@ -8,12 +8,19 @@ import setuptools from setuptools import setup -__version__ = "0.4.0" +__version__ = "0.4.1" pkgs = { "required": [ - "grid2op[challenge,optional]>=0.9.1.post1" + "grid2op[optional]>=0.9.1.post1", + "tensorflow>=2.2.0", + "Keras>=2.3.1", + "torch>=1.4.0", + "statsmodels>=0.11.1", + "scikit-learn>=0.22.2.post1", + "gym>=0.17.1", + "scipy>=1.4.1", ], "extras": { "docs": [ @@ -22,7 +29,8 @@ "sphinx-rtd-theme>=0.4.3", "sphinxcontrib-trio>=1.1.0", "autodocsumm>=0.1.13" - ] + ], + "challenge": ["grid2op[challenge]>=0.9.1.post1"] } }