From 57cb327d3d34aacb83e51da2e4955d447f726eb6 Mon Sep 17 00:00:00 2001 From: Hamda <93351861+HamdaHmida@users.noreply.github.com> Date: Fri, 18 Oct 2024 09:18:04 +0100 Subject: [PATCH] Up variable encoding (#414) - Add a variable space that allows for variable-length state array in reinforcement learning - Update the Unified Planning domain to allow PDDL states to be represented as variable-length arrays when used in reinforcement learning --- examples/up_native_solvers.py | 25 ++++++ pyproject.toml | 2 +- skdecide/hub/domain/up/up.py | 161 +++++++++++++++++++++++++++++++--- skdecide/hub/space/gym/gym.py | 46 ++++++++++ 4 files changed, 222 insertions(+), 12 deletions(-) diff --git a/examples/up_native_solvers.py b/examples/up_native_solvers.py index 08d3c20e3a..8e32cf9727 100644 --- a/examples/up_native_solvers.py +++ b/examples/up_native_solvers.py @@ -150,3 +150,28 @@ max_framerate=30, outcome_formatter=None, ) + +# Example 3: Solving the same numeric example with the variable state encoding of UPDomain + +domain_factory = lambda: UPDomain( + problem, + state_encoding="variable", + action_encoding="int", +) + +print("Initialise Solver ... \n") +solver = RayRLlib( + domain_factory=domain_factory, + algo_class=DQN, + train_iterations=1, +) + +solver.solve() + +rollout( + domain_factory(), + solver, + num_episodes=1, + max_steps=100, + outcome_formatter=None, +) diff --git a/pyproject.toml b/pyproject.toml index 3482231a5f..9b8c0b6dca 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -68,7 +68,7 @@ up-enhsp = { version = ">=0.0.25", python = ">=3.10", optional = true } up-pyperplan = { version = ">=1.1.0", python = ">=3.10", optional = true } cartopy = { version = ">=0.22.0", python = ">=3.9", optional = true } pygrib = [ - { version = ">=2.1.5", platform = "linux", optional = true }, + { version = "<=2.1.5", platform = "linux", optional = true }, { version = ">=2.1.5", platform = "darwin", optional = true }, ] diff --git a/skdecide/hub/domain/up/up.py b/skdecide/hub/domain/up/up.py index e8facb5a00..398ad6f19a 100644 --- a/skdecide/hub/domain/up/up.py +++ b/skdecide/hub/domain/up/up.py @@ -25,7 +25,7 @@ from skdecide.core import EmptySpace, ImplicitSpace, Space, Value from skdecide.domains import DeterministicPlanningDomain from skdecide.hub.space.gym import ListSpace, SetSpace -from skdecide.hub.space.gym.gym import BoxSpace, DictSpace, DiscreteSpace, GymSpace +from skdecide.hub.space.gym.gym import BoxSpace, DictSpace, DiscreteSpace, VariableSpace from skdecide.utils import logger @@ -167,6 +167,8 @@ def __init__( fluent_domains: dict[FNode, tuple[Union[int, float], Union[int, float]]] = None, state_encoding: str = "native", action_encoding: str = "native", + max_len: int = 2000, + max_actions: int = 20, **simulator_params, ): """Initialize UPDomain. @@ -174,8 +176,10 @@ def __init__( # Parameters problem: The Unified Planning problem (Problem) to wrap. fluent_domains: Dictionary of min and max fluent values by fluent represented as a Unified Planning's FNode (must be provided only if get_observation_space() is used) - state_encoding: Encoding of the state (observation) which must be one of "native", "dictionary" or "vector" (warning: if action_masking is "vector" then the state automatically becomes a dictionary which separates the action masking vector from the real state as defined here) + state_encoding: Encoding of the state (observation) which must be one of "native", "dictionary", "vector" or "variable" (warning: if action_masking is "vector" then the state automatically becomes a dictionary which separates the action masking vector from the real state as defined here) action_encoding: Encoding of the action which must be either "native" or "int" + max_len: Maximum number of fluents in the case of using variable state encoding + max_actions: Maximum number of actions in the case of using variable state encoding simulator_params: Optional parameters to pass to the UP sequential simulator """ self._problem = problem @@ -204,17 +208,21 @@ def __init__( self._states_np2up = None self._actions_up2np = None self._actions_np2up = None - if self._state_encoding != "native": - if self._state_encoding not in ["dictionary", "vector"]: - raise RuntimeError( - "State encoding must be one of 'native', 'dictionary' or 'vector'" - ) - self._init_state_encoding_() + self.max_len = max_len # used only in the variable state encoding + self.max_actions = max_actions # used only in the variable state encoding + if self._action_encoding != "native": if self._action_encoding != "int": raise RuntimeError("Action encoding must be either 'native' or 'int'") self._init_action_encoding_() + if self._state_encoding != "native": + if self._state_encoding not in ["dictionary", "vector", "variable"]: + raise RuntimeError( + "State encoding must be one of 'native', 'dictionary', 'vector' or 'variable'" + ) + self._init_state_encoding_() + def _init_state_encoding_(self): def fnode_lower_bound(fn): if fn.fluent().type.lower_bound is not None: @@ -240,12 +248,56 @@ def fnode_upper_bound(fn): self._fnodes_vars_ordering = [] self._states_up2np = {} self._states_np2up = {} + + if self._state_encoding == "variable": + self.objects = [] + self.max_param = 0 + for i, a in enumerate(self._actions_np2up): + if len(a.up_parameters) > self.max_param: + self.max_param = len(a.up_parameters) + for p in a.up_parameters: + if p not in self.objects: + self.objects.append(p) + + self.n2id = { + i.name: self._problem.fluents.index(i) + 1 + for i in self._problem.fluents + } + self.id2n = { + self._problem.fluents.index(i) + 1: i.name + for i in self._problem.fluents + } + + self.variable_mapping = {} + self.inv_mapping = {} + self.bools = [] + self.bool_val = {} + self.non_bool_val = {} + init_state = self._simulator.get_initial_state() static_fluents = self._problem.get_static_fluents() self._static_fluent_values = {} ci = init_state while ci is not None: for fn, fv in ci._values.items(): + if self._state_encoding == "variable": + if fn.fluent().type.is_bool_type(): + if self.n2id[fn.fluent().name] not in self.bools: + self.bools.append(self.n2id[fn.fluent().name]) + if int(fv.constant_value()) not in self.bool_val.keys(): + self.bool_val[int(fv.constant_value())] = fv + else: + self.bool_val[int(fv.constant_value())] = fv + fluent = np.array([-1 for _ in range(self.max_param + 2)]) + fluent[0] = self.n2id[fn.fluent().name] + fluent[-1] = int(fv.constant_value()) + c = 1 + for j in fn._content.args: + fluent[c] = self.objects.index(j) + c += 1 + + self.variable_mapping[(fn, fv)] = (fluent[:-1], fluent[-1]) + self.inv_mapping[tuple(fluent[:-1])] = fn if ( fn.fluent() not in static_fluents and fn.fluent().name != "total-cost" @@ -295,11 +347,14 @@ def fnode_upper_bound(fn): elif fn.fluent().type.is_time_type(): raise RuntimeError("Time types not handled by UPDomain") elif fn.fluent().name != "total-cost": - self._static_fluent_values[fn] = fv + if self._state_encoding != "variable": + self._static_fluent_values[fn] = fv ci = ci._father def _convert_to_skup_state_(self, state): - if self._state_encoding == "native": + if state is None: + return None + elif self._state_encoding == "native": return state elif self._state_encoding == "dictionary": kstate = frozenset(state.items()) @@ -327,6 +382,25 @@ def _convert_to_skup_state_(self, state): self._states_up2np[skup_state] = state self._states_np2up[kstate] = skup_state return skup_state + elif self._state_encoding == "variable": + values = {} + for fluent in state: + if tuple(fluent[:-1]) in self.inv_mapping.keys(): + k = self.inv_mapping[tuple(fluent[:-1])] + if fluent[0] in self.bools: + values[k] = self.bool_val[fluent[-1]] + else: + values[k] = Int(int(fluent[-1])) + else: + for k in self.variable_mapping.keys(): + if self.variable_mapping[k][0].all() == fluent[:-1].all(): + if fluent[0] in self.bools: + values[k[0]] = self.bool_val[fluent[-1]] + else: + values[k[0]] = Int(int(fluent[-1])) + + values.update(self._static_fluent_values) + return SkUPState(UPState(values)) else: return None @@ -367,6 +441,33 @@ def _convert_from_skup_state_(self, skup_state: SkUPState): self._states_np2up[tuple(state.flatten())] = skup_state self._states_up2np[skup_state] = state return state + elif self._state_encoding == "variable": + state = [] + try: + ci = skup_state.up_state + except: + ci = skup_state + while ci is not None: + for fn, val in ci._values.items(): + if (fn, val) in self.variable_mapping.keys(): + state.append( + np.append( + self.variable_mapping[(fn, val)][0], + self.variable_mapping[(fn, val)][1], + ) + ) + else: + fluent = np.array([-1 for _ in range(self.max_param + 2)]) + fluent[0] = self.n2id[fn.fluent().name] + fluent[-1] = int(val.constant_value()) + c = 1 + for j in fn._content.args: + fluent[c] = self.objects.index(j) + c += 1 + state.append(fluent) + self.variable_mapping[(fn, val)] = (fluent[:-1], fluent[-1]) + self.inv_mapping[tuple(fluent[:-1])] = fn + return state else: return None @@ -407,6 +508,11 @@ def _get_next_state( next_state = SkUPState( self._simulator.apply(state.up_state, act.up_action, act.up_parameters) ) + if (self._state_encoding == "variable") and (next_state.up_state is not None): + for fn, fv in state.up_state._values.items(): + if fn not in next_state.up_state._values.keys(): + next_state.up_state._values[fn] = fv + if self._total_cost is not None: cost = ( next_state.up_state.get_value(self._total_cost).constant_value() - cost @@ -478,7 +584,10 @@ def _get_action_space_(self) -> D.T_agent[Space[D.T_event]]: self._init_action_encoding_() self._action_space = ListSpace(self._actions_np2up) elif self._action_encoding == "int": - self._action_space = DiscreteSpace(len(self._actions_np2up)) + if self._state_encoding != "variable": + self._action_space = DiscreteSpace(len(self._actions_np2up)) + else: + self._action_space = DiscreteSpace(self.max_actions) else: return None return self._action_space @@ -573,6 +682,36 @@ def _get_observation_space_(self) -> D.T_agent[Space[D.T_observation]]: else np.int32 ), ) + elif self._state_encoding == "variable": + self._observation_space = VariableSpace( + BoxSpace( + low=-1 + if np.array( + [ + self._fnodes_variables_map[fn][0] + for fn in self._fnodes_vars_ordering + ] + ).min() + > -1 + else np.array( + [ + self._fnodes_variables_map[fn][0] + for fn in self._fnodes_vars_ordering + ] + ).min(), + high=1000000, + shape=(self.max_param + 2,), + dtype=( + np.float32 + if any( + fn.fluent().type.is_real_type() + for fn in self._fnodes_vars_ordering + ) + else np.int32 + ), + ), + max_len=self.max_len, + ) else: return None return self._observation_space diff --git a/skdecide/hub/space/gym/gym.py b/skdecide/hub/space/gym/gym.py index 6af959df6a..2df488c060 100644 --- a/skdecide/hub/space/gym/gym.py +++ b/skdecide/hub/space/gym/gym.py @@ -510,3 +510,49 @@ def to_unwrapped(self, sample_n: Iterable[T]) -> Iterable[dict]: def from_unwrapped(self, sample_n: Iterable[dict]) -> Iterable[T]: # TODO: convert to simple types (get rid of ndarray created by gym dict space...)? return [self._data_class(**sample) for sample in sample_n] + + +class VariableSpace(GymSpace[T]): + + """This class wraps a gymnasium Space (gym.spaces.Space) to allow dynamic length of elements.""" + + def __init__( + self, + space: gym.Space, + max_len: int, + **kwargs, + ): + self._gym_space = space + self.max_len = max_len + self.size = (self.max_len, self._gym_space._shape[0]) + + def sample(self): + length = self.max_len + return list(np.array(self._gym_space.sample()) for _ in range(length)) + + def unwrapped(self): + return gym.spaces.Box( + low=self._gym_space.low.min(), + high=self._gym_space.high.max(), + shape=self.size, + ) + + def to_unwrapped(self, sample_n: Iterable[T]) -> Iterable: + return [ + np.pad( + np.array(v), + ((0, self.max_len - len(v)), (0, 0)), + mode="constant", + constant_values=0, + ) + for v in sample_n + ] + + def from_unwrapped(self, sample_n: Iterable) -> Iterable[T]: + return [ + np.array(ligne) + for ligne in [row for row in sample_n if not np.all(row == 0)] + ] + + def __repr__(self): + return f"RepeatedSpace({self._gym_space}, max_len={self.max_len})"