Skip to content

Commit

Permalink
small modifications; better docstrings
Browse files Browse the repository at this point in the history
  • Loading branch information
FilippoAiraldi committed Apr 26, 2024
1 parent 19f760d commit fe94d86
Show file tree
Hide file tree
Showing 5 changed files with 15 additions and 8 deletions.
2 changes: 2 additions & 0 deletions examples/q_learning.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import numpy.typing as npt
from csnlp import Nlp
from csnlp.wrappers import Mpc
from gymnasium.spaces import Box
from gymnasium.wrappers import TimeLimit

from mpcrl import LearnableParameter, LearnableParametersDict, LstdQLearningAgent
Expand All @@ -37,6 +38,7 @@ class LtiSystem(gym.Env[npt.NDArray[np.floating], float]):
a_bnd = (-1, 1) # bounds of control input
w = np.asarray([[1e2], [1e2]]) # penalty weight for bound violations
e_bnd = (-1e-1, 0) # uniform noise bounds
action_space = Box(*a_bnd, (nu,), np.float64)

def reset(
self,
Expand Down
6 changes: 3 additions & 3 deletions src/mpcrl/agents/common/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,8 @@ def __init__(
values. Use this to specify fixed parameters, that is, non-learnable. If
`None`, then no fixed parameter is assumed.
exploration : ExplorationStrategy, optional
Exploration strategy for inducing exploration in the MPC policy. By default
`None`, in which case `NoExploration` is used.
Exploration strategy for inducing exploration in the online MPC policy. By
default `None`, in which case `NoExploration` is used.
warmstart: "last" or "last-successful" or WarmStartStrategy, optional
The warmstart strategy for the MPC's NLP. If `last-successful`, the last
successful solution is used to warm start the solver for the next iteration.
Expand Down Expand Up @@ -332,7 +332,7 @@ def state_value(
"""
V = self._V
exploration = self._exploration
exploration_mode = self._exploration.mode
exploration_mode = exploration.mode
na = V.na
if deterministic or exploration_mode is None or not exploration.can_explore():
pert = None
Expand Down
4 changes: 2 additions & 2 deletions src/mpcrl/agents/lstd_dpg.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,8 +105,8 @@ def __init__(
their bounds and values. This dict is complementary with `fixed_parameters`,
which contains the MPC parameters that are not learnt by the agent.
exploration : ExplorationStrategy, optional
Exploration strategy for inducing exploration in the MPC policy (it is
mandatory to explore in DPG).
Exploration strategy for inducing exploration in the online MPC policy (it
is mandatory to explore in DPG).
fixed_parameters : dict[str, array_like] or collection of, optional
A dict (or collection of dict, in case of `csnlp.MultistartNlp`) whose keys
are the names of the MPC parameters and the values are their corresponding
Expand Down
8 changes: 5 additions & 3 deletions src/mpcrl/agents/lstd_q_learning.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,8 +100,10 @@ def __init__(
values. Use this to specify fixed parameters, that is, non-learnable. If
`None`, then no fixed parameter is assumed.
exploration : ExplorationStrategy, optional
Exploration strategy for inducing exploration in the MPC policy. By default
`None`, in which case `NoExploration` is used in the fixed-MPC agent.
Exploration strategy for inducing exploration in the online MPC policy. By
default `None`, in which case `NoExploration` is used. Should not be set
when offpolicy learning, as the exploration should be taken care in the
offpolicy data generation.
experience : int or ExperienceReplay, optional
The container for experience replay memory. If `None` is passed, then a
memory with length 1 is created, i.e., it keeps only the latest memory
Expand Down Expand Up @@ -252,7 +254,7 @@ def _init_sensitivity(
x_lam_p = cs.vertcat(nlp.primal_dual, nlp.p)

# compute first order sensitivity
snlp = NlpSensitivity(self._Q.nlp, theta)
snlp = NlpSensitivity(nlp, theta)
gradient = snlp.jacobians["L-p"] # exact gradient, i.e., dQ/dtheta

if hessian_type == "none":
Expand Down
3 changes: 3 additions & 0 deletions src/mpcrl/core/exploration.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,9 @@ def __repr__(self) -> str:
class NoExploration(ExplorationStrategy):
"""Strategy where no exploration is allowed at any time or, in other words, the
policy is always deterministic (only based on the current state, and not perturbed).
This is a special kind of `ExplorationStrategy`, the only one without any
`hook` and `mode`.
"""

def __init__(self) -> None:
Expand Down

0 comments on commit fe94d86

Please sign in to comment.