Skip to content

Commit

Permalink
Fix InitialStateValueEstimationEvaluator
Browse files Browse the repository at this point in the history
  • Loading branch information
takuseno committed Jul 21, 2023
1 parent cc535ff commit c27f0a4
Show file tree
Hide file tree
Showing 2 changed files with 261 additions and 2 deletions.
5 changes: 3 additions & 2 deletions d3rlpy/metrics/evaluators.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,8 +263,9 @@ def __call__(
episode, WINDOW_SIZE, dataset.transition_picker
):
# estimate action-value in initial states
actions = algo.predict([batch.observations[0]])
values = algo.predict_value([batch.observations[0]], actions)
first_obs = np.expand_dims(batch.observations[0], axis=0)
actions = algo.predict(first_obs)
values = algo.predict_value(first_obs, actions)
total_values.append(values[0])
return float(np.mean(total_values))

Expand Down
258 changes: 258 additions & 0 deletions tests/metrics/test_evaluators.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import numpy as np
import pytest

from d3rlpy.algos import DQNConfig, SACConfig
from d3rlpy.dataset import (
BasicTransitionPicker,
Episode,
Expand All @@ -29,6 +30,8 @@
RewardScaler,
)

from ..testing_utils import create_episode


def _convert_episode_to_batch(episode: Episode) -> TransitionMiniBatch:
transition_picker = BasicTransitionPicker()
Expand Down Expand Up @@ -153,6 +156,39 @@ def test_td_error_scorer(
assert np.allclose(score, np.mean(ref_errors))


@pytest.mark.parametrize("observation_shape", [(100,)])
@pytest.mark.parametrize("action_size", [2])
@pytest.mark.parametrize("episode_length", [10])
def test_td_error_scorer_with_algos(
observation_shape: Sequence[int],
action_size: int,
episode_length: int,
) -> None:
# test with DQN
discrete_episode = create_episode(
observation_shape,
action_size,
length=episode_length,
discrete_action=True,
)
discrete_replay_buffer = _create_replay_buffer([discrete_episode])
dqn = DQNConfig().create()
dqn.build_with_dataset(discrete_replay_buffer)
TDErrorEvaluator()(dqn, discrete_replay_buffer)

# test with SAC
episode = create_episode(
observation_shape,
action_size,
length=episode_length,
discrete_action=False,
)
replay_buffer = _create_replay_buffer([episode])
sac = SACConfig().create()
sac.build_with_dataset(replay_buffer)
TDErrorEvaluator()(sac, replay_buffer)


def ref_discounted_sum_of_advantage_score(
predict_value: Callable[[Observation, np.ndarray], np.ndarray],
observations: Observation,
Expand Down Expand Up @@ -222,6 +258,39 @@ def test_discounted_sum_of_advantage_scorer(
assert np.allclose(score, np.mean(ref_sums))


@pytest.mark.parametrize("observation_shape", [(100,)])
@pytest.mark.parametrize("action_size", [2])
@pytest.mark.parametrize("episode_length", [10])
def test_discounted_sum_of_advantage_with_algos(
observation_shape: Sequence[int],
action_size: int,
episode_length: int,
) -> None:
# test with DQN
discrete_episode = create_episode(
observation_shape,
action_size,
length=episode_length,
discrete_action=True,
)
discrete_replay_buffer = _create_replay_buffer([discrete_episode])
dqn = DQNConfig().create()
dqn.build_with_dataset(discrete_replay_buffer)
DiscountedSumOfAdvantageEvaluator()(dqn, discrete_replay_buffer)

# test with SAC
episode = create_episode(
observation_shape,
action_size,
length=episode_length,
discrete_action=False,
)
replay_buffer = _create_replay_buffer([episode])
sac = SACConfig().create()
sac.build_with_dataset(replay_buffer)
DiscountedSumOfAdvantageEvaluator()(sac, replay_buffer)


@pytest.mark.parametrize("observation_shape", [(100,)])
@pytest.mark.parametrize("action_size", [2])
@pytest.mark.parametrize("n_episodes", [100])
Expand Down Expand Up @@ -262,6 +331,39 @@ def test_average_value_estimation_scorer(
assert np.allclose(score, np.mean(total_values))


@pytest.mark.parametrize("observation_shape", [(100,)])
@pytest.mark.parametrize("action_size", [2])
@pytest.mark.parametrize("episode_length", [10])
def test_average_value_estimation_with_algos(
observation_shape: Sequence[int],
action_size: int,
episode_length: int,
) -> None:
# test with DQN
discrete_episode = create_episode(
observation_shape,
action_size,
length=episode_length,
discrete_action=True,
)
discrete_replay_buffer = _create_replay_buffer([discrete_episode])
dqn = DQNConfig().create()
dqn.build_with_dataset(discrete_replay_buffer)
AverageValueEstimationEvaluator()(dqn, discrete_replay_buffer)

# test with SAC
episode = create_episode(
observation_shape,
action_size,
length=episode_length,
discrete_action=False,
)
replay_buffer = _create_replay_buffer([episode])
sac = SACConfig().create()
sac.build_with_dataset(replay_buffer)
AverageValueEstimationEvaluator()(sac, replay_buffer)


@pytest.mark.parametrize("observation_shape", [(100,)])
@pytest.mark.parametrize("action_size", [2])
@pytest.mark.parametrize("n_episodes", [100])
Expand Down Expand Up @@ -302,6 +404,39 @@ def test_initial_state_value_estimation_scorer(
assert np.allclose(score, np.mean(total_values))


@pytest.mark.parametrize("observation_shape", [(100,)])
@pytest.mark.parametrize("action_size", [2])
@pytest.mark.parametrize("episode_length", [10])
def test_initial_state_value_estimation_with_algos(
observation_shape: Sequence[int],
action_size: int,
episode_length: int,
) -> None:
# test with DQN
discrete_episode = create_episode(
observation_shape,
action_size,
length=episode_length,
discrete_action=True,
)
discrete_replay_buffer = _create_replay_buffer([discrete_episode])
dqn = DQNConfig().create()
dqn.build_with_dataset(discrete_replay_buffer)
InitialStateValueEstimationEvaluator()(dqn, discrete_replay_buffer)

# test with SAC
episode = create_episode(
observation_shape,
action_size,
length=episode_length,
discrete_action=False,
)
replay_buffer = _create_replay_buffer([episode])
sac = SACConfig().create()
sac.build_with_dataset(replay_buffer)
InitialStateValueEstimationEvaluator()(sac, replay_buffer)


@pytest.mark.parametrize("observation_shape", [(100,)])
@pytest.mark.parametrize("action_size", [2])
@pytest.mark.parametrize("n_episodes", [100])
Expand Down Expand Up @@ -345,6 +480,41 @@ def test_soft_opc_scorer(
assert np.allclose(score, np.mean(success_values) - np.mean(all_values))


@pytest.mark.parametrize("observation_shape", [(100,)])
@pytest.mark.parametrize("action_size", [2])
@pytest.mark.parametrize("episode_length", [10])
@pytest.mark.parametrize("threshold", [5.0])
def test_soft_opc_wtth_algos(
observation_shape: Sequence[int],
action_size: int,
episode_length: int,
threshold: float,
) -> None:
# test with DQN
discrete_episode = create_episode(
observation_shape,
action_size,
length=episode_length,
discrete_action=True,
)
discrete_replay_buffer = _create_replay_buffer([discrete_episode])
dqn = DQNConfig().create()
dqn.build_with_dataset(discrete_replay_buffer)
SoftOPCEvaluator(threshold)(dqn, discrete_replay_buffer)

# test with SAC
episode = create_episode(
observation_shape,
action_size,
length=episode_length,
discrete_action=False,
)
replay_buffer = _create_replay_buffer([episode])
sac = SACConfig().create()
sac.build_with_dataset(replay_buffer)
SoftOPCEvaluator(threshold)(sac, replay_buffer)


@pytest.mark.parametrize("observation_shape", [(100,)])
@pytest.mark.parametrize("action_size", [2])
@pytest.mark.parametrize("n_episodes", [100])
Expand Down Expand Up @@ -384,6 +554,27 @@ def test_continuous_action_diff_scorer(
assert np.allclose(score, np.mean(total_diffs))


@pytest.mark.parametrize("observation_shape", [(100,)])
@pytest.mark.parametrize("action_size", [2])
@pytest.mark.parametrize("episode_length", [10])
def test_continuous_action_diff_with_algos(
observation_shape: Sequence[int],
action_size: int,
episode_length: int,
) -> None:
# test with SAC
episode = create_episode(
observation_shape,
action_size,
length=episode_length,
discrete_action=False,
)
replay_buffer = _create_replay_buffer([episode])
sac = SACConfig().create()
sac.build_with_dataset(replay_buffer)
ContinuousActionDiffEvaluator()(sac, replay_buffer)


@pytest.mark.parametrize("observation_shape", [(100,)])
@pytest.mark.parametrize("action_size", [2])
@pytest.mark.parametrize("n_episodes", [100])
Expand Down Expand Up @@ -423,6 +614,27 @@ def test_discrete_action_match_scorer(
assert np.allclose(score, np.mean(total_matches))


@pytest.mark.parametrize("observation_shape", [(100,)])
@pytest.mark.parametrize("action_size", [2])
@pytest.mark.parametrize("episode_length", [10])
def test_discrete_action_match_with_algos(
observation_shape: Sequence[int],
action_size: int,
episode_length: int,
) -> None:
# test with DQN
discrete_episode = create_episode(
observation_shape,
action_size,
length=episode_length,
discrete_action=True,
)
discrete_replay_buffer = _create_replay_buffer([discrete_episode])
dqn = DQNConfig().create()
dqn.build_with_dataset(discrete_replay_buffer)
DiscreteActionMatchEvaluator()(dqn, discrete_replay_buffer)


@pytest.mark.parametrize("observation_shape", [(100,)])
@pytest.mark.parametrize("action_size", [2])
@pytest.mark.parametrize("n_episodes", [100])
Expand Down Expand Up @@ -465,6 +677,29 @@ def test_compare_continuous_action_diff(
assert np.allclose(score, np.mean(total_diffs))


@pytest.mark.parametrize("observation_shape", [(100,)])
@pytest.mark.parametrize("action_size", [2])
@pytest.mark.parametrize("episode_length", [10])
def test_compare_continuous_action_diff_with_algos(
observation_shape: Sequence[int],
action_size: int,
episode_length: int,
) -> None:
# test with SAC
episode = create_episode(
observation_shape,
action_size,
length=episode_length,
discrete_action=False,
)
replay_buffer = _create_replay_buffer([episode])
sac1 = SACConfig().create()
sac1.build_with_dataset(replay_buffer)
sac2 = SACConfig().create()
sac2.build_with_dataset(replay_buffer)
CompareContinuousActionDiffEvaluator(sac1)(sac2, replay_buffer)


@pytest.mark.parametrize("observation_shape", [(100,)])
@pytest.mark.parametrize("action_size", [2])
@pytest.mark.parametrize("n_episodes", [100])
Expand Down Expand Up @@ -505,3 +740,26 @@ def test_compare_discrete_action_diff(
algo, _create_replay_buffer(episodes)
)
assert np.allclose(score, np.mean(total_matches))


@pytest.mark.parametrize("observation_shape", [(100,)])
@pytest.mark.parametrize("action_size", [2])
@pytest.mark.parametrize("episode_length", [10])
def test_compare_discrete_action_diff_with_algos(
observation_shape: Sequence[int],
action_size: int,
episode_length: int,
) -> None:
# test with DQN
discrete_episode = create_episode(
observation_shape,
action_size,
length=episode_length,
discrete_action=True,
)
discrete_replay_buffer = _create_replay_buffer([discrete_episode])
dqn1 = DQNConfig().create()
dqn1.build_with_dataset(discrete_replay_buffer)
dqn2 = DQNConfig().create()
dqn2.build_with_dataset(discrete_replay_buffer)
CompareDiscreteActionMatchEvaluator(dqn1)(dqn2, discrete_replay_buffer)

0 comments on commit c27f0a4

Please sign in to comment.