diff --git a/1-grid-world/4-q-learning/agent.py b/1-grid-world/4-q-learning/agent.py index 811b00b..daab810 100644 --- a/1-grid-world/4-q-learning/agent.py +++ b/1-grid-world/4-q-learning/agent.py @@ -9,7 +9,7 @@ def __init__(self, actions): self.actions = actions self.step_size = 0.01 self.discount_factor = 0.9 - self.epsilon = 0.9 + self.epsilon = 0.1 self.q_table = defaultdict(lambda: [0.0, 0.0, 0.0, 0.0]) # 샘플로부터 큐함수 업데이트 @@ -58,7 +58,7 @@ def arg_max(q_list): agent.learn(state, action, reward, next_state) state = next_state - + # 모든 큐함수를 화면에 표시 env.print_value_all(agent.q_table)