Skip to content

Commit

Permalink
Merge pull request #89 from wbap/develop
Browse files Browse the repository at this point in the history
Post Hackathon Project
  • Loading branch information
naoyuki-sakai authored Dec 8, 2017
2 parents 743a3a2 + 5c6fbbf commit 13f9873
Show file tree
Hide file tree
Showing 57 changed files with 1,666 additions and 540 deletions.
2 changes: 1 addition & 1 deletion agent/cognitive/interpreter.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,7 @@ def check_grounding(self):
mod_name = v[0]
class_name = v[1]
try:
mod = __import__(mod_name, globals(), locals(), [class_name], -1)
mod = __import__(mod_name, globals(), locals(), [class_name], 0)
Klass = getattr(mod, class_name)
component_instance = Klass()
except:
Expand Down
4 changes: 2 additions & 2 deletions agent/cognitive/module.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,12 @@ def set_model(self, feature_extractor):
def load_model(self, cnn_feature_extractor):
if os.path.exists(cnn_feature_extractor):
app_logger.info("loading... {}".format(cnn_feature_extractor))
self.feature_extractor = pickle.load(open(cnn_feature_extractor))
self.feature_extractor = pickle.load(open(cnn_feature_extractor, 'rb'))
app_logger.info("done")
else:
self.feature_extractor = CnnFeatureExtractor(self.use_gpu, self.model, self.model_type,
self.image_feature_dim)
pickle.dump(self.feature_extractor, open(cnn_feature_extractor, 'w'))
pickle.dump(self.feature_extractor, open(cnn_feature_extractor, 'wb'))
app_logger.info("pickle.dump finished")

def fire(self):
Expand Down
2 changes: 1 addition & 1 deletion agent/cognitive/service.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# coding: utf-8

import interpreter
from cognitive import interpreter
import brica1
import numpy as np

Expand Down
7 changes: 4 additions & 3 deletions agent/ml/cnn_feature_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,9 @@ def __init__(self, gpu, model, model_type, out_dim):
self.mean_image = mean_image[:, start:stop, start:stop].copy()

def predict(self, x):
y, = self.func(inputs={'data': x}, outputs=[self.outname], train=False)
return y
with chainer.using_config('train', False), chainer.no_backprop_mode():
y, = self.func(inputs={'data': x}, outputs=[self.outname])
return y

def __image_feature(self, camera_image):
x_batch = np.ndarray((self.batchsize, 3, self.in_size, self.in_size), dtype=np.float32)
Expand All @@ -62,7 +63,7 @@ def __image_feature(self, camera_image):
if self.gpu >= 0:
x_data = cuda.to_gpu(x_data)

x = chainer.Variable(x_data, volatile=True)
x = chainer.Variable(x_data)
feature = self.predict(x)

if self.gpu >= 0:
Expand Down
3 changes: 2 additions & 1 deletion agent/ml/experience.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import numpy as np
from chainer import cuda
from builtins import range


class Experience:
Expand Down Expand Up @@ -50,7 +51,7 @@ def replay(self, time):
r_replay = np.ndarray(shape=(self.replay_size, 1), dtype=np.float32)
s_dash_replay = np.ndarray(shape=(self.replay_size, self.hist_size, self.dim), dtype=np.float32)
episode_end_replay = np.ndarray(shape=(self.replay_size, 1), dtype=np.bool)
for i in xrange(self.replay_size):
for i in range(self.replay_size):
s_replay[i] = np.asarray(self.d[0][replay_index[i]], dtype=np.float32)
a_replay[i] = self.d[1][replay_index[i]]
r_replay[i] = self.d[2][replay_index[i]]
Expand Down
17 changes: 10 additions & 7 deletions agent/ml/q_net.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,11 @@

import copy
import numpy as np
from chainer import cuda, FunctionSet, Variable, optimizers
from chainer import cuda, Chain, Variable, optimizers
import chainer.functions as F
from chainer import links as L
from chainer import initializers as I
from builtins import range

from config.log import APP_KEY
import logging
Expand Down Expand Up @@ -32,9 +35,9 @@ def __init__(self, use_gpu, enable_controller, dim, epsilon, epsilon_delta, min_
app_logger.info("Initializing Q-Network...")

hidden_dim = 256
self.model = FunctionSet(
l4=F.Linear(self.dim*self.hist_size, hidden_dim, wscale=np.sqrt(2)),
q_value=F.Linear(hidden_dim, self.num_of_actions,
self.model = Chain(
l4=L.Linear(self.dim*self.hist_size, hidden_dim, initialW=I.HeNormal()),
q_value=L.Linear(hidden_dim, self.num_of_actions,
initialW=np.zeros((self.num_of_actions, hidden_dim),
dtype=np.float32))
)
Expand All @@ -44,7 +47,7 @@ def __init__(self, use_gpu, enable_controller, dim, epsilon, epsilon_delta, min_
self.model_target = copy.deepcopy(self.model)

self.optimizer = optimizers.RMSpropGraves(lr=0.00025, alpha=0.95, momentum=0.95, eps=0.0001)
self.optimizer.setup(self.model.collect_parameters())
self.optimizer.setup(self.model)

# History Data : D=[s, a, r, s_dash, end_episode_flag]
self.d = [np.zeros((self.data_size, self.hist_size, self.dim), dtype=np.uint8),
Expand Down Expand Up @@ -74,7 +77,7 @@ def forward(self, state, action, reward, state_dash, episode_end):
# make new array
target = np.array(q.data, dtype=np.float32)

for i in xrange(num_of_batch):
for i in range(num_of_batch):
if not episode_end[i][0]:
tmp_ = reward[i] + self.gamma * max_q_dash[i]
else:
Expand Down Expand Up @@ -148,7 +151,7 @@ def start(self, feature):

def update_model(self, replayed_experience):
if replayed_experience[0]:
self.optimizer.zero_grads()
self.optimizer.target.cleargrads()
loss, _ = self.forward(replayed_experience[1], replayed_experience[2],
replayed_experience[3], replayed_experience[4], replayed_experience[5])
loss.backward()
Expand Down
2 changes: 1 addition & 1 deletion agent/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
chainer==1.14
chainer==2.1.0
ws4py
cherrypy
msgpack-python
Expand Down
32 changes: 17 additions & 15 deletions agent/server.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
import argparse
import cPickle as pickle
import six.moves.cPickle as pickle
import io
import os

Expand All @@ -10,6 +10,7 @@
import numpy as np
from PIL import Image
from PIL import ImageOps
from builtins import range

from cognitive import interpreter
from ml.cnn_feature_extractor import CnnFeatureExtractor
Expand All @@ -34,29 +35,29 @@ def unpack(payload, depth_image_count=1, depth_image_dim=32*32):
dat = msgpack.unpackb(payload)

image = []
for i in xrange(depth_image_count):
image.append(Image.open(io.BytesIO(bytearray(dat['image'][i]))))
for i in range(depth_image_count):
image.append(Image.open(io.BytesIO(bytearray(dat[b'image'][i]))))

depth = []
for i in xrange(depth_image_count):
d = (Image.open(io.BytesIO(bytearray(dat['depth'][i]))))
for i in range(depth_image_count):
d = (Image.open(io.BytesIO(bytearray(dat[b'depth'][i]))))
depth.append(np.array(ImageOps.grayscale(d)).reshape(depth_image_dim))

reward = dat['reward']
reward = dat[b'reward']
observation = {"image": image, "depth": depth}
rotation = dat['rotation']
movement = dat['movement']
rotation = dat[b'rotation']
movement = dat[b'movement']

return reward, observation, rotation, movement


def unpack_reset(payload):
dat = msgpack.unpackb(payload)
reward = dat['reward']
success = dat['success']
failure = dat['failure']
elapsed = dat['elapsed']
finished = dat['finished']
reward = dat[b'reward']
success = dat[b'success']
failure = dat[b'failure']
elapsed = dat[b'elapsed']
finished = dat[b'finished']

return reward, success, failure, elapsed, finished

Expand All @@ -72,11 +73,11 @@ class Root(object):
def __init__(self, **kwargs):
if os.path.exists(CNN_FEATURE_EXTRACTOR):
app_logger.info("loading... {}".format(CNN_FEATURE_EXTRACTOR))
self.feature_extractor = pickle.load(open(CNN_FEATURE_EXTRACTOR))
self.feature_extractor = pickle.load(open(CNN_FEATURE_EXTRACTOR, 'rb'))
app_logger.info("done")
else:
self.feature_extractor = CnnFeatureExtractor(use_gpu, CAFFE_MODEL, MODEL_TYPE, image_feature_dim)
pickle.dump(self.feature_extractor, open(CNN_FEATURE_EXTRACTOR, 'w'))
pickle.dump(self.feature_extractor, open(CNN_FEATURE_EXTRACTOR, 'wb'))
app_logger.info("pickle.dump finished")

self.agent_service = AgentService(BRICA_CONFIG_FILE, self.feature_extractor)
Expand All @@ -95,6 +96,7 @@ def create(self, identifier):
feature = self.feature_extractor.feature(observation)
self.result_logger.initialize()
result = self.agent_service.create(reward, feature, identifier)
self.result_logger.add_agent(self.agent_service.agents[identifier])

outbound_logger.info('action: {}'.format(result))

Expand Down
15 changes: 11 additions & 4 deletions agent/tool/result_logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from config.log import TASK_RESULT_KEY, EPISODE_RESULT_KEY
import logging
import time
from sets import Set

episode_result_logger = logging.getLogger(EPISODE_RESULT_KEY)
task_result_logger = logging.getLogger(TASK_RESULT_KEY)
Expand All @@ -15,22 +16,28 @@ def __init__(self):
self.episode = 0
self.task = 1

episode_result_logger.info("task,episode,step,time")
task_result_logger.info("task,success,failure")
episode_result_logger.info("task,episode,step,time,agents")
task_result_logger.info("task,success,failure,agents")

self.agents = Set()

def initialize(self):
self.start_time = time.time()
self.steps = 0
self.episode += 1

def add_agent(self, agent):
self.agents.add(agent)

def step(self):
self.steps += 1

def report(self, success, failure, finished):
agent_ids = ':'.join([hex(id(agent)) for agent in self.agents])
elapsed_time = time.time() - self.start_time
if finished:
task_result_logger.info('{}, {}, {}'.format(self.task, success, failure))
task_result_logger.info('{}, {}, {}, {}'.format(self.task, success, failure, agent_ids))
self.task += 1
self.episode = 0

episode_result_logger.info('{}, {}, {}, {}'.format(self.task, self.episode, self.steps, elapsed_time))
episode_result_logger.info('{}, {}, {}, {}, {}'.format(self.task, self.episode, self.steps, elapsed_time, agent_ids))
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,6 @@ private static bool DisableValidate()
private static BuildTargetGroup[] buildTargetGroups = new BuildTargetGroup[]
{
BuildTargetGroup.Standalone,
BuildTargetGroup.WebPlayer,
BuildTargetGroup.Android,
BuildTargetGroup.iOS,
BuildTargetGroup.WP8,
Expand Down
Loading

0 comments on commit 13f9873

Please sign in to comment.