-
Notifications
You must be signed in to change notification settings - Fork 0
/
gym_train_multi.py
162 lines (133 loc) · 4.62 KB
/
gym_train_multi.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
import argparse
import gym
import os
import numpy as np
from skimage.measure import block_reduce
from neat_agent import look_around
from neat_agent import network
import neat
first_genome = None
PLAYER_CLR = np.array([240, 170, 103])
BLACK_CLR = np.array([0, 0, 0])
WALL_CLR = np.array([84, 92, 214])
SCORE_FACTOR = 3
LEVEL_FACTOR = 0.5
TIME_FACTOR = 1
class Agent(object):
"""The world's simplest agent!"""
def __init__(self, action_space):
self.action_space = action_space
# You should modify this function
def act(self, data, reward, done, brain):
vision = look_around.parse_observations(data)
if vision is None: # no player object on screen
return self.action_space.sample()
else:
move = get_move(brain.activate(vision))
return move
def get_move(outputs):
max = -1000
for output in outputs:
if output > max:
max = output
return outputs.index(max)
def eval_genome(genome, config):
brain = neat.nn.FeedForwardNetwork.create(genome, config)
parser = argparse.ArgumentParser(description=None)
parser.add_argument('--env_id', nargs='?', default='Berzerk-v0', help='Select the environment to run')
args = parser.parse_args()
# logger.set_level(logger.INFO)
env = gym.make(args.env_id)
outdir = 'random-agent-results'
env.seed(0)
agent = Agent(env.action_space)
episode_count = 100
reward = 0
done = False
score = 0
special_data = {}
special_data['ale.lives'] = 3
ob = env.reset()
data = block_reduce(ob, block_size=(2, 2, 1), func=np.max)
idle_count = 0
idle_limit = 100
steps = 0
max_steps = 0
is_level_end = True
extra_levels = -3
while not done:
if ob[4][4][0] == 0:
is_level_end = True
else:
if is_level_end:
# now the start of level
is_level_end = False
extra_levels += 1
if steps > max_steps:
max_steps = steps
steps = 0
steps += 1
action = agent.act(data, reward, done, brain)
if action <= 1 or action >= 10: # shooting and not moving
idle_count += 1
else:
idle_count = 0
if idle_count == idle_limit:
break
ob, reward, done, x = env.step(action)
score += reward
# env.render()
# Close the env and write monitor result info to disk
env.close()
if steps > max_steps:
max_steps = steps
fitness = ((SCORE_FACTOR*normalize_score(score)) - (TIME_FACTOR*normalize_time_penalty(steps))
+ (LEVEL_FACTOR*extra_levels))
print(fitness)
return fitness
def normalize_score(score):
max_score = 2000
normal_factor = 0.3
return (1/pow(max_score, normal_factor))*pow(score, normal_factor)
def normalize_time_penalty(steps):
allowed_steps = 250
deduction = 0.1 # deduction per step amount
step_amount = 50
if steps < allowed_steps:
return 0
print('OVER TIME LIMIT')
return (deduction / step_amount) * (steps-allowed_steps)
def normalize_exit_distance(distance):
board_height = 85
normal_factor = 0.7
if distance is not None:
return (-(1/pow(board_height, normal_factor)))*pow(distance, normal_factor)+1
return 0
def run(config_file):
# Load configuration.
config = neat.Config(neat.DefaultGenome, neat.DefaultReproduction,
neat.DefaultSpeciesSet, neat.DefaultStagnation,
config_file)
# print(config.genome_config.compatibility_weight_coefficient)
# Create the population, which is the top-level object for a NEAT run.
p = neat.Population(config)
# p = neat.Checkpointer.restore_checkpoint('checkpoints/fitness2/neat-checkpoint-195')
# Add a stdout reporter to show progress in the terminal.
p.add_reporter(neat.StdOutReporter(True))
stats = neat.StatisticsReporter()
p.add_reporter(stats)
p.add_reporter(neat.Checkpointer(1))
# Run until goal is met
pe = neat.ParallelEvaluator(6, eval_genome)
winner = p.run(pe.evaluate, 10000)
net = neat.nn.FeedForwardNetwork.create(winner, config)
network.save_network(net, 'network_data.dat')
## YOU MAY NOT MODIFY ANYTHING BELOW THIS LINE OR USE
## ANOTHER MAIN PROGRAM
if __name__ == '__main__':
# Determine path to configuration file. This path manipulation is
# here so that the script will run successfully regardless of the
# current working directory.
local_dir = os.path.dirname(__file__)
config_path = os.path.join(local_dir, 'config-feedforward')
run(config_path)