From a7962f2ec7c6252f63ec84ec9f50a747ec8e2360 Mon Sep 17 00:00:00 2001 From: Mikel Date: Wed, 26 Jun 2024 14:49:51 +0200 Subject: [PATCH] Convert DiscreteActionWrapper into BinaryActionWrapper --- craftium-docs/docs/getting-started.md | 14 ++++++++------ craftium-docs/docs/reference.md | 2 +- craftium/__init__.py | 12 ++++++------ craftium/wrappers.py | 9 ++++----- 4 files changed, 19 insertions(+), 18 deletions(-) diff --git a/craftium-docs/docs/getting-started.md b/craftium-docs/docs/getting-started.md index 4305d964e..cec2714db 100644 --- a/craftium-docs/docs/getting-started.md +++ b/craftium-docs/docs/getting-started.md @@ -78,28 +78,30 @@ The rest of the parameters are optional, where the ones in the code section abov Note that `CraftiumEnv` environments define a fairly large action space with discrete and continuous values. For a complete specification on the default action space see the dedicated [page](./obs-and-acts.md#action-space). -However, many tasks don't require the complete action space and can be greatly simplified by considering only the relevant actions to solve the task at hand. For this reason, craftium comes with [`DiscreteActionWrapper`](./reference.md), that can be used to convert the default [`Dict`](https://gymnasium.farama.org/api/spaces/composite/#dict) action space into a simplified [`MultiDiscrete`](https://gymnasium.farama.org/api/spaces/fundamental/#multidiscrete) space. +However, many tasks don't require the complete action space and can be greatly simplified by considering only the relevant actions to solve the task at hand. For this reason, craftium comes with [`BinaryActionWrapper`](./reference.md), that can be used to convert the default [`Dict`](https://gymnasium.farama.org/api/spaces/composite/#dict) action space into a simplified [`MultiBinary`](https://gymnasium.farama.org/api/spaces/fundamental/#gymnasium.spaces.MultiBinary) space. For example, ```python -env = DiscreteActionWrapper( +from craftium.wrappers import BinaryActionWrapper + +env = BinaryActionWrapper( env, actions=["forward", "mouse x+", "mouse x-"], mouse_mov=0.5, ) ``` -`DiscreteActionWrapper` takes the `CraftiumEnv` to wrap as the first argument. Then, the `actions` parameters can be used to select the set of actions from the original action space that will be available in the wrapped environment (see the section on the [action space](./obs-and-acts.md#action-space) for the list of all available action names). In this example, the wrapped environment will only have 3 discrete actions: forward, move the mouse left, and move the mouse right. The last parameter, `mouse_mov` defines the magnitude of the mouse movement (must be in the [0, 1] range). +`BinaryActionWrapper` takes the `CraftiumEnv` to wrap as the first argument. Then, the `actions` parameters can be used to select the set of actions from the original action space that will be available in the wrapped environment (see the section on the [action space](./obs-and-acts.md#action-space) for the list of all available action names). In this example, the wrapped environment will only have 3 discrete actions: forward, move the mouse left, and move the mouse right. The last parameter, `mouse_mov` defines the magnitude of the mouse movement (must be in the [0, 1] range). If we print `env.action_space` before applying the wrapper, we get the following Gymnasium space: ```python Dict('aux1': Discrete(2), 'backward': Discrete(2), 'dig': Discrete(2), 'drop': Discrete(2), 'forward': Discrete(2), 'inventory': Discrete(2), 'jump': Discrete(2), 'left': Discrete(2), 'mouse': Box(-1.0, 1.0, (2,), float32), 'place': Discrete(2), 'right': Discrete(2), 'slot_1': Discrete(2), 'slot_2': Discrete(2), 'slot_3': Discrete(2), 'slot_4': Discrete(2), 'slot_5': Discrete(2), 'slot_6': Discrete(2), 'slot_7': Discrete(2), 'slot_8': Discrete(2), 'slot_9': Discrete(2), 'sneak': Discrete(2), 'zoom': Discrete(2)) ``` -After wrapping `env` with `DiscreteActionWrapper`, we get that `env.action_space` is: +After wrapping `env` with `BinaryActionWrapper`, we get that `env.action_space` is: ```python -MultiDiscrete([2 2 2]) +MultiBinary(3) ``` -Much simpler! The default action space has been reduced to a binary vector of only 3 elements. Finally, note that many of the [environments provided](./environments.md) by craftium employ `DiscreteActionWrapper` to simplify their optimization. +Much simpler! The default action space has been reduced to a binary vector of only 3 elements. Finally, note that many of the [environments provided](./environments.md) by craftium employ `BinaryActionWrapper` to simplify their optimization. diff --git a/craftium-docs/docs/reference.md b/craftium-docs/docs/reference.md index 11febe2c9..e778de248 100644 --- a/craftium-docs/docs/reference.md +++ b/craftium-docs/docs/reference.md @@ -7,4 +7,4 @@
## Wrappers -::: craftium.wrappers.DiscreteActionWrapper +::: craftium.wrappers.BinaryActionWrapper diff --git a/craftium/__init__.py b/craftium/__init__.py index 6ccf42ada..9774942e2 100644 --- a/craftium/__init__.py +++ b/craftium/__init__.py @@ -1,5 +1,5 @@ from .craftium_env import CraftiumEnv -from .wrappers import DiscreteActionWrapper +from .wrappers import BinaryActionWrapper from gymnasium.envs.registration import register, WrapperSpec @@ -25,8 +25,8 @@ entry_point="craftium.craftium_env:CraftiumEnv", additional_wrappers=[ WrapperSpec( - name="DiscreteActionWrapper", - entry_point="craftium.wrappers:DiscreteActionWrapper", + name="BinaryActionWrapper", + entry_point="craftium.wrappers:BinaryActionWrapper", kwargs=dict( actions=["forward", "mouse x+", "mouse x-"], mouse_mov=0.5, @@ -48,8 +48,8 @@ entry_point="craftium.craftium_env:CraftiumEnv", additional_wrappers=[ WrapperSpec( - name="DiscreteActionWrapper", - entry_point="craftium.wrappers:DiscreteActionWrapper", + name="BinaryActionWrapper", + entry_point="craftium.wrappers:BinaryActionWrapper", kwargs=dict( actions=["forward", "dig", "mouse x+", "mouse x-", "mouse y+", "mouse y-"], mouse_mov=0.5, @@ -61,7 +61,7 @@ env_dir=os.path.join(root_path, "craftium-envs/chop-tree"), obs_width=64, obs_height=64, - max_timesteps=2_000, + max_timesteps=500, init_frames=200, minetest_conf=dict( give_initial_stuff=True, diff --git a/craftium/wrappers.py b/craftium/wrappers.py index 4c9f2c03f..036200aaf 100644 --- a/craftium/wrappers.py +++ b/craftium/wrappers.py @@ -1,13 +1,12 @@ import numpy as np from gymnasium import ActionWrapper, Env -from gymnasium.spaces import MultiDiscrete +from gymnasium.spaces import MultiBinary from .craftium_env import ACTION_ORDER -class DiscreteActionWrapper(ActionWrapper): - """A Gymnasium `ActionWrapper` that translates craftium's `Dict` action space into a binary (discretized) action space. - Specifically into the `MultiDiscrete` space. +class BinaryActionWrapper(ActionWrapper): + """A Gymnasium `ActionWrapper` that translates craftium's `Dict` action space into a binary (discretized) action space [`MultiBiniary`](https://gymnasium.farama.org/api/spaces/fundamental/#gymnasium.spaces.MultiBinary). :param env: The environment to wrap. :param actions: A list of strings containing the names of the actions that will consititute the new action space. @@ -28,7 +27,7 @@ def __init__(self, env: Env, actions: list[str], mouse_mov: float = 0.5): f"Invalid action given. Valid actions are: {valid_actions}" # define the action space for gymnasium - self.action_space = MultiDiscrete(np.full(len(actions), 2)) + self.action_space = MultiBinary(len(actions)) # clip the mouse movement if needed self.mouse_mov = np.clip(mouse_mov, 0., 1.)