Skip to content

Commit

Permalink
Up variable encoding (#414)
Browse files Browse the repository at this point in the history
- Add a variable space that allows for variable-length state array in reinforcement learning
- Update the Unified Planning domain to allow PDDL states to be represented as variable-length arrays when used in reinforcement learning
  • Loading branch information
HamdaHmida authored Oct 18, 2024
1 parent 6bd5a1f commit 57cb327
Show file tree
Hide file tree
Showing 4 changed files with 222 additions and 12 deletions.
25 changes: 25 additions & 0 deletions examples/up_native_solvers.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,3 +150,28 @@
max_framerate=30,
outcome_formatter=None,
)

# Example 3: Solving the same numeric example with the variable state encoding of UPDomain

domain_factory = lambda: UPDomain(
problem,
state_encoding="variable",
action_encoding="int",
)

print("Initialise Solver ... \n")
solver = RayRLlib(
domain_factory=domain_factory,
algo_class=DQN,
train_iterations=1,
)

solver.solve()

rollout(
domain_factory(),
solver,
num_episodes=1,
max_steps=100,
outcome_formatter=None,
)
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ up-enhsp = { version = ">=0.0.25", python = ">=3.10", optional = true }
up-pyperplan = { version = ">=1.1.0", python = ">=3.10", optional = true }
cartopy = { version = ">=0.22.0", python = ">=3.9", optional = true }
pygrib = [
{ version = ">=2.1.5", platform = "linux", optional = true },
{ version = "<=2.1.5", platform = "linux", optional = true },
{ version = ">=2.1.5", platform = "darwin", optional = true },
]

Expand Down
161 changes: 150 additions & 11 deletions skdecide/hub/domain/up/up.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from skdecide.core import EmptySpace, ImplicitSpace, Space, Value
from skdecide.domains import DeterministicPlanningDomain
from skdecide.hub.space.gym import ListSpace, SetSpace
from skdecide.hub.space.gym.gym import BoxSpace, DictSpace, DiscreteSpace, GymSpace
from skdecide.hub.space.gym.gym import BoxSpace, DictSpace, DiscreteSpace, VariableSpace
from skdecide.utils import logger


Expand Down Expand Up @@ -167,15 +167,19 @@ def __init__(
fluent_domains: dict[FNode, tuple[Union[int, float], Union[int, float]]] = None,
state_encoding: str = "native",
action_encoding: str = "native",
max_len: int = 2000,
max_actions: int = 20,
**simulator_params,
):
"""Initialize UPDomain.
# Parameters
problem: The Unified Planning problem (Problem) to wrap.
fluent_domains: Dictionary of min and max fluent values by fluent represented as a Unified Planning's FNode (must be provided only if get_observation_space() is used)
state_encoding: Encoding of the state (observation) which must be one of "native", "dictionary" or "vector" (warning: if action_masking is "vector" then the state automatically becomes a dictionary which separates the action masking vector from the real state as defined here)
state_encoding: Encoding of the state (observation) which must be one of "native", "dictionary", "vector" or "variable" (warning: if action_masking is "vector" then the state automatically becomes a dictionary which separates the action masking vector from the real state as defined here)
action_encoding: Encoding of the action which must be either "native" or "int"
max_len: Maximum number of fluents in the case of using variable state encoding
max_actions: Maximum number of actions in the case of using variable state encoding
simulator_params: Optional parameters to pass to the UP sequential simulator
"""
self._problem = problem
Expand Down Expand Up @@ -204,17 +208,21 @@ def __init__(
self._states_np2up = None
self._actions_up2np = None
self._actions_np2up = None
if self._state_encoding != "native":
if self._state_encoding not in ["dictionary", "vector"]:
raise RuntimeError(
"State encoding must be one of 'native', 'dictionary' or 'vector'"
)
self._init_state_encoding_()
self.max_len = max_len # used only in the variable state encoding
self.max_actions = max_actions # used only in the variable state encoding

if self._action_encoding != "native":
if self._action_encoding != "int":
raise RuntimeError("Action encoding must be either 'native' or 'int'")
self._init_action_encoding_()

if self._state_encoding != "native":
if self._state_encoding not in ["dictionary", "vector", "variable"]:
raise RuntimeError(
"State encoding must be one of 'native', 'dictionary', 'vector' or 'variable'"
)
self._init_state_encoding_()

def _init_state_encoding_(self):
def fnode_lower_bound(fn):
if fn.fluent().type.lower_bound is not None:
Expand All @@ -240,12 +248,56 @@ def fnode_upper_bound(fn):
self._fnodes_vars_ordering = []
self._states_up2np = {}
self._states_np2up = {}

if self._state_encoding == "variable":
self.objects = []
self.max_param = 0
for i, a in enumerate(self._actions_np2up):
if len(a.up_parameters) > self.max_param:
self.max_param = len(a.up_parameters)
for p in a.up_parameters:
if p not in self.objects:
self.objects.append(p)

self.n2id = {
i.name: self._problem.fluents.index(i) + 1
for i in self._problem.fluents
}
self.id2n = {
self._problem.fluents.index(i) + 1: i.name
for i in self._problem.fluents
}

self.variable_mapping = {}
self.inv_mapping = {}
self.bools = []
self.bool_val = {}
self.non_bool_val = {}

init_state = self._simulator.get_initial_state()
static_fluents = self._problem.get_static_fluents()
self._static_fluent_values = {}
ci = init_state
while ci is not None:
for fn, fv in ci._values.items():
if self._state_encoding == "variable":
if fn.fluent().type.is_bool_type():
if self.n2id[fn.fluent().name] not in self.bools:
self.bools.append(self.n2id[fn.fluent().name])
if int(fv.constant_value()) not in self.bool_val.keys():
self.bool_val[int(fv.constant_value())] = fv
else:
self.bool_val[int(fv.constant_value())] = fv
fluent = np.array([-1 for _ in range(self.max_param + 2)])
fluent[0] = self.n2id[fn.fluent().name]
fluent[-1] = int(fv.constant_value())
c = 1
for j in fn._content.args:
fluent[c] = self.objects.index(j)
c += 1

self.variable_mapping[(fn, fv)] = (fluent[:-1], fluent[-1])
self.inv_mapping[tuple(fluent[:-1])] = fn
if (
fn.fluent() not in static_fluents
and fn.fluent().name != "total-cost"
Expand Down Expand Up @@ -295,11 +347,14 @@ def fnode_upper_bound(fn):
elif fn.fluent().type.is_time_type():
raise RuntimeError("Time types not handled by UPDomain")
elif fn.fluent().name != "total-cost":
self._static_fluent_values[fn] = fv
if self._state_encoding != "variable":
self._static_fluent_values[fn] = fv
ci = ci._father

def _convert_to_skup_state_(self, state):
if self._state_encoding == "native":
if state is None:
return None
elif self._state_encoding == "native":
return state
elif self._state_encoding == "dictionary":
kstate = frozenset(state.items())
Expand Down Expand Up @@ -327,6 +382,25 @@ def _convert_to_skup_state_(self, state):
self._states_up2np[skup_state] = state
self._states_np2up[kstate] = skup_state
return skup_state
elif self._state_encoding == "variable":
values = {}
for fluent in state:
if tuple(fluent[:-1]) in self.inv_mapping.keys():
k = self.inv_mapping[tuple(fluent[:-1])]
if fluent[0] in self.bools:
values[k] = self.bool_val[fluent[-1]]
else:
values[k] = Int(int(fluent[-1]))
else:
for k in self.variable_mapping.keys():
if self.variable_mapping[k][0].all() == fluent[:-1].all():
if fluent[0] in self.bools:
values[k[0]] = self.bool_val[fluent[-1]]
else:
values[k[0]] = Int(int(fluent[-1]))

values.update(self._static_fluent_values)
return SkUPState(UPState(values))
else:
return None

Expand Down Expand Up @@ -367,6 +441,33 @@ def _convert_from_skup_state_(self, skup_state: SkUPState):
self._states_np2up[tuple(state.flatten())] = skup_state
self._states_up2np[skup_state] = state
return state
elif self._state_encoding == "variable":
state = []
try:
ci = skup_state.up_state
except:
ci = skup_state
while ci is not None:
for fn, val in ci._values.items():
if (fn, val) in self.variable_mapping.keys():
state.append(
np.append(
self.variable_mapping[(fn, val)][0],
self.variable_mapping[(fn, val)][1],
)
)
else:
fluent = np.array([-1 for _ in range(self.max_param + 2)])
fluent[0] = self.n2id[fn.fluent().name]
fluent[-1] = int(val.constant_value())
c = 1
for j in fn._content.args:
fluent[c] = self.objects.index(j)
c += 1
state.append(fluent)
self.variable_mapping[(fn, val)] = (fluent[:-1], fluent[-1])
self.inv_mapping[tuple(fluent[:-1])] = fn
return state
else:
return None

Expand Down Expand Up @@ -407,6 +508,11 @@ def _get_next_state(
next_state = SkUPState(
self._simulator.apply(state.up_state, act.up_action, act.up_parameters)
)
if (self._state_encoding == "variable") and (next_state.up_state is not None):
for fn, fv in state.up_state._values.items():
if fn not in next_state.up_state._values.keys():
next_state.up_state._values[fn] = fv

if self._total_cost is not None:
cost = (
next_state.up_state.get_value(self._total_cost).constant_value() - cost
Expand Down Expand Up @@ -478,7 +584,10 @@ def _get_action_space_(self) -> D.T_agent[Space[D.T_event]]:
self._init_action_encoding_()
self._action_space = ListSpace(self._actions_np2up)
elif self._action_encoding == "int":
self._action_space = DiscreteSpace(len(self._actions_np2up))
if self._state_encoding != "variable":
self._action_space = DiscreteSpace(len(self._actions_np2up))
else:
self._action_space = DiscreteSpace(self.max_actions)
else:
return None
return self._action_space
Expand Down Expand Up @@ -573,6 +682,36 @@ def _get_observation_space_(self) -> D.T_agent[Space[D.T_observation]]:
else np.int32
),
)
elif self._state_encoding == "variable":
self._observation_space = VariableSpace(
BoxSpace(
low=-1
if np.array(
[
self._fnodes_variables_map[fn][0]
for fn in self._fnodes_vars_ordering
]
).min()
> -1
else np.array(
[
self._fnodes_variables_map[fn][0]
for fn in self._fnodes_vars_ordering
]
).min(),
high=1000000,
shape=(self.max_param + 2,),
dtype=(
np.float32
if any(
fn.fluent().type.is_real_type()
for fn in self._fnodes_vars_ordering
)
else np.int32
),
),
max_len=self.max_len,
)
else:
return None
return self._observation_space
46 changes: 46 additions & 0 deletions skdecide/hub/space/gym/gym.py
Original file line number Diff line number Diff line change
Expand Up @@ -510,3 +510,49 @@ def to_unwrapped(self, sample_n: Iterable[T]) -> Iterable[dict]:
def from_unwrapped(self, sample_n: Iterable[dict]) -> Iterable[T]:
# TODO: convert to simple types (get rid of ndarray created by gym dict space...)?
return [self._data_class(**sample) for sample in sample_n]


class VariableSpace(GymSpace[T]):

"""This class wraps a gymnasium Space (gym.spaces.Space) to allow dynamic length of elements."""

def __init__(
self,
space: gym.Space,
max_len: int,
**kwargs,
):
self._gym_space = space
self.max_len = max_len
self.size = (self.max_len, self._gym_space._shape[0])

def sample(self):
length = self.max_len
return list(np.array(self._gym_space.sample()) for _ in range(length))

def unwrapped(self):
return gym.spaces.Box(
low=self._gym_space.low.min(),
high=self._gym_space.high.max(),
shape=self.size,
)

def to_unwrapped(self, sample_n: Iterable[T]) -> Iterable:
return [
np.pad(
np.array(v),
((0, self.max_len - len(v)), (0, 0)),
mode="constant",
constant_values=0,
)
for v in sample_n
]

def from_unwrapped(self, sample_n: Iterable) -> Iterable[T]:
return [
np.array(ligne)
for ligne in [row for row in sample_n if not np.all(row == 0)]
]

def __repr__(self):
return f"RepeatedSpace({self._gym_space}, max_len={self.max_len})"

0 comments on commit 57cb327

Please sign in to comment.