From 6997238f2662edfb5b2142998165a6448ae742ca Mon Sep 17 00:00:00 2001 From: Aaron Wuwf Date: Fri, 29 Apr 2016 12:53:29 +0000 Subject: [PATCH 1/4] SINGA-161 DLaaS Wrap SINGA into a Docker image, which can run in a mesos cluster Can run in training and testing modes. --- include/singa/neuralnet/neuron_layer.h | 5 +- src/neuralnet/neuron_layer/dummy.cc | 46 ++- src/neuralnet/neuron_layer/pooling.cc | 22 +- src/proto/job.proto | 12 +- thirdparty/install.sh | 16 +- tool/dlaas/main.py | 178 ++++++++++++ tool/dlaas/model.py | 34 +++ tool/dlaas/test.sh | 7 + tool/dlaas/train.sh | 7 + tool/docker/ubuntu-14.04/Dockerfile | 30 ++ tool/docker/ubuntu-14.04/build.sh | 4 + tool/python/singa/driver.i | 8 +- tool/python/singa/layer.py | 382 ++++++++++++++++++------- tool/python/singa/utils/utility.py | 10 + 14 files changed, 620 insertions(+), 141 deletions(-) create mode 100644 tool/dlaas/main.py create mode 100644 tool/dlaas/model.py create mode 100644 tool/dlaas/test.sh create mode 100644 tool/dlaas/train.sh create mode 100644 tool/docker/ubuntu-14.04/Dockerfile create mode 100644 tool/docker/ubuntu-14.04/build.sh diff --git a/include/singa/neuralnet/neuron_layer.h b/include/singa/neuralnet/neuron_layer.h index e6f0fd5c9f..f1bd4432e4 100644 --- a/include/singa/neuralnet/neuron_layer.h +++ b/include/singa/neuralnet/neuron_layer.h @@ -112,6 +112,7 @@ class DropoutLayer : public NeuronLayer { */ Blob mask_; }; + /** * This layer is dummy and do no real work. * It is used for testing purpose only. @@ -126,7 +127,7 @@ class DummyLayer: public NeuronLayer { void Setup(const LayerProto& proto, const vector& srclayers) override; void ComputeFeature(int flag, const vector& srclayers) override; void ComputeGradient(int flag, const vector& srclayers) override; - void Feed(int batchsize, vector& data, vector& aux_data); + void Feed(vector shape, vector* data, int op); Layer* ToLayer() { return this;} private: @@ -278,7 +279,7 @@ class PoolingLayer : public NeuronLayer { int kernel_x_, pad_x_, stride_x_; int kernel_y_, pad_y_, stride_y_; int batchsize_, channels_, height_, width_, pooled_height_, pooled_width_; - PoolingProto_PoolMethod pool_; + PoolMethod pool_; }; /** * Use book-keeping for BP following Caffe's pooling implementation diff --git a/src/neuralnet/neuron_layer/dummy.cc b/src/neuralnet/neuron_layer/dummy.cc index 9796407561..a3bec97b38 100644 --- a/src/neuralnet/neuron_layer/dummy.cc +++ b/src/neuralnet/neuron_layer/dummy.cc @@ -78,25 +78,53 @@ void DummyLayer::ComputeGradient(int flag, const vector& srclayers) { Copy(grad_, srclayers[0]->mutable_grad(this)); } -void DummyLayer::Feed(int batchsize, vector& data, vector& aux_data){ +void DummyLayer::Feed(vector shape, vector* data, int op){ - batchsize_ = batchsize; - // input data - if (data.size() > 0) { - int size = data.size(); + //batchsize_ = batchsize; + batchsize_ = shape[0]; + // dataset + if (op == 0) { + /* + size_t hdim = 1; + for (size_t i = 1; i < shape.size(); ++i) + hdim *= shape[i]; + //data_.Reshape({batchsize, (int)hdim}); + //shape.insert(shape.begin(),batchsize); + data_.Reshape(shape); + */ + //reshape data + data_.Reshape(shape); + CHECK_EQ(data_.count(), data->size()); + + int size = data->size(); float* ptr = data_.mutable_cpu_data(); for (int i = 0; i< size; i++) { - ptr[i] = data.at(i); + ptr[i] = data->at(i); } } - // auxiliary data, e.g., label - if (aux_data.size() > 0) { + // label + else { aux_data_.resize(batchsize_); for (int i = 0; i< batchsize_; i++) { - aux_data_[i] = static_cast(aux_data.at(i)); + aux_data_[i] = static_cast(data->at(i)); } } + + return; + + /* Wenfeng's input + batchsize_ = batchsize; + shape.insert(shape.begin(),batchsize); + data_.Reshape(shape); + + int size = data_.count() / batchsize_; + CHECK_EQ(size, data->size()); + float* ptr = data_.mutable_cpu_data(); + for (int i = 0; i< size; i++) + ptr[i] = data->at(i); + return; + */ } } // namespace singa diff --git a/src/neuralnet/neuron_layer/pooling.cc b/src/neuralnet/neuron_layer/pooling.cc index 4eda2e4097..07a88d9c65 100644 --- a/src/neuralnet/neuron_layer/pooling.cc +++ b/src/neuralnet/neuron_layer/pooling.cc @@ -58,8 +58,8 @@ void PoolingLayer::Setup(const LayerProto& conf, } pool_ = conf.pooling_conf().pool(); - CHECK(pool_ == PoolingProto_PoolMethod_AVG - || pool_ == PoolingProto_PoolMethod_MAX) + CHECK(pool_ == PoolMethod::AVG + || pool_ == PoolMethod::MAX) << "Padding implemented only for average and max pooling."; const auto& srcshape = srclayers[0]->data(this).shape(); int dim = srcshape.size(); @@ -83,9 +83,9 @@ void PoolingLayer::Setup(const LayerProto& conf, void PoolingLayer::ComputeFeature(int flag, const vector& srclayers) { auto src = Tensor4(srclayers[0]->mutable_data(this)); auto data = Tensor4(&data_); - if (pool_ == PoolingProto_PoolMethod_MAX) + if (pool_ == PoolMethod::MAX) data = expr::pool(src, kernel_x_, stride_x_); - else if (pool_ == PoolingProto_PoolMethod_AVG) + else if (pool_ == PoolMethod::AVG) data = expr::pool(src, kernel_x_, stride_x_) * (1.0f / (kernel_x_ * kernel_x_)); } @@ -99,9 +99,9 @@ void PoolingLayer::ComputeGradient(int flag, const vector& srclayers) { auto gsrc = Tensor4(srclayers[0]->mutable_grad(this)); auto data = Tensor4(&data_); auto grad = Tensor4(&grad_); - if (pool_ == PoolingProto_PoolMethod_MAX) + if (pool_ == PoolMethod::MAX) gsrc = expr::unpool(src, data, grad, kernel_x_, stride_x_); - else if (pool_ == PoolingProto_PoolMethod_AVG) + else if (pool_ == PoolMethod::AVG) gsrc = expr::unpool(src, data, grad, kernel_x_, stride_x_) * (1.0f / (kernel_x_ * kernel_x_)); } @@ -111,16 +111,16 @@ void PoolingLayer::ComputeGradient(int flag, const vector& srclayers) { void CPoolingLayer::Setup(const LayerProto& conf, const vector& srclayers) { PoolingLayer::Setup(conf, srclayers); - if (pool_ == PoolingProto_PoolMethod_MAX) + if (pool_ == PoolMethod::MAX) mask_.ReshapeLike(data_); } void CPoolingLayer::ComputeFeature(int flag, const vector& srclayers) { - if (pool_ == PoolingProto_PoolMethod_MAX) + if (pool_ == PoolMethod::MAX) ForwardMaxPooling(srclayers[0]->mutable_data(this)->mutable_cpu_data(), batchsize_, channels_, height_, width_, kernel_y_, kernel_x_, pad_y_, pad_y_, stride_y_, stride_x_, data_.mutable_cpu_data(), mask_.mutable_cpu_data()); - else if (pool_ == PoolingProto_PoolMethod_AVG) + else if (pool_ == PoolMethod::AVG) ForwardAvgPooling(srclayers[0]->mutable_data(this)->mutable_cpu_data(), batchsize_, channels_, height_, width_, kernel_y_, kernel_x_, pad_y_, pad_x_, stride_y_, stride_y_, data_.mutable_cpu_data()); @@ -129,12 +129,12 @@ void CPoolingLayer::ComputeFeature(int flag, const vector& srclayers) { } void CPoolingLayer::ComputeGradient(int flag, const vector& srclayers) { - if (pool_ == PoolingProto_PoolMethod_MAX) + if (pool_ == PoolMethod::MAX) BackwardMaxPooling(grad_.cpu_data(), mask_.cpu_data(), batchsize_, channels_, height_, width_, kernel_y_, kernel_x_, pad_y_, pad_x_, stride_y_, stride_y_, srclayers[0]->mutable_grad(this)->mutable_cpu_data()); - else if (pool_ == PoolingProto_PoolMethod_AVG) + else if (pool_ == PoolMethod::AVG) BackwardAvgPooling(grad_.cpu_data(), batchsize_, channels_, height_, width_, kernel_y_, kernel_x_, pad_y_, pad_x_, stride_y_, stride_x_, diff --git a/src/proto/job.proto b/src/proto/job.proto index b4aa9714d7..0e8ef9f9d5 100644 --- a/src/proto/job.proto +++ b/src/proto/job.proto @@ -522,15 +522,15 @@ message LRNProto { // offset optional float knorm = 34 [default = 1.0]; } - +enum PoolMethod { + MAX = 0; + AVG = 1; +} + message PoolingProto { // The kernel size (square) optional int32 kernel= 1 [default = 3]; - enum PoolMethod { - MAX = 0; - AVG = 1; - } - // The pooling method + // The pooling method optional PoolMethod pool = 30 [default = MAX]; // The padding size optional uint32 pad = 31 [default = 0]; diff --git a/thirdparty/install.sh b/thirdparty/install.sh index bc3c159934..9dcb274884 100755 --- a/thirdparty/install.sh +++ b/thirdparty/install.sh @@ -256,19 +256,19 @@ function install_protobuf() echo "install protobuf in $1"; ./configure --prefix=$1; make && make install; - #cd python; - #python setup.py build; - #python setup.py install --prefix=$1; - #cd ..; + cd python; + python setup.py build; + python setup.py install --prefix=$1; + cd ..; elif [ $# == 0 ] then echo "install protobuf in default path"; ./configure; make && sudo make install; - #cd python; - #python setup.py build; - #sudo python setup.py install; - #cd ..; + cd python; + python setup.py build; + sudo python setup.py install; + cd ..; else echo "wrong commands"; fi diff --git a/tool/dlaas/main.py b/tool/dlaas/main.py new file mode 100644 index 0000000000..3ee5b62abe --- /dev/null +++ b/tool/dlaas/main.py @@ -0,0 +1,178 @@ +#!/usr/bin/env python + +#/************************************************************ +#* +#* Licensed to the Apache Software Foundation (ASF) under one +#* or more contributor license agreements. See the NOTICE file +#* distributed with this work for additional information +#* regarding copyright ownership. The ASF licenses this file +#* to you under the Apache License, Version 2.0 (the +#* "License"); you may not use this file except in compliance +#* with the License. You may obtain a copy of the License at +#* +#* http://www.apache.org/licenses/LICENSE-2.0 +#* +#* Unless required by applicable law or agreed to in writing, +#* software distributed under the License is distributed on an +#* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +#* KIND, either express or implied. See the License for the +#* specific language governing permissions and limitations +#* under the License. +#* +#*************************************************************/ + +import os, sys +import numpy as np + +current_path_ = os.path.dirname(__file__) +singa_root_=os.path.abspath(os.path.join(current_path_,'../..')) +sys.path.append(os.path.join(singa_root_,'thirdparty','protobuf-2.6.0','python')) +sys.path.append(os.path.join(singa_root_,'tool','python')) + +from model import neuralnet, updater +from singa.driver import Driver +from singa.layer import * +from singa.model import save_model_parameter, load_model_parameter +from singa.utils.utility import swap32 + +from PIL import Image +import glob,random, shutil, time +from flask import Flask, request, redirect, url_for +from singa.utils import kvstore, imgtool +app = Flask(__name__) + +def train(batchsize,disp_freq,check_freq,train_step,workspace,checkpoint=None): + print '[Layer registration/declaration]' + # TODO change layer registration methods + d = Driver() + d.Init(sys.argv) + + print '[Start training]' + + #if need to load checkpoint + if checkpoint: + load_model_parameter(workspace+checkpoint, neuralnet, batchsize) + + for i in range(0,train_step): + + for h in range(len(neuralnet)): + #Fetch data for input layer + if neuralnet[h].layer.type==kDummy: + neuralnet[h].FetchData(batchsize) + else: + neuralnet[h].ComputeFeature() + + neuralnet[h].ComputeGradient(i+1, updater) + + if (i+1)%disp_freq == 0: + print ' Step {:>3}: '.format(i+1), + neuralnet[h].display() + + if (i+1)%check_freq == 0: + save_model_parameter(i+1, workspace, neuralnet) + + + print '[Finish training]' + + +def product(workspace,checkpoint): + + print '[Layer registration/declaration]' + # TODO change layer registration methods + d = Driver() + d.Init(sys.argv) + + load_model_parameter(workspace+checkpoint, neuralnet,1) + + app.debug = True + app.run(host='0.0.0.0', port=80) + + +@app.route("/") +def index(): + return "Hello World! This is SINGA DLAAS! Please send post request with image=file to '/predict' " + +def allowed_file(filename): + allowd_extensions_ = set(['txt', 'pdf', 'png', 'jpg', 'jpeg', 'gif']) + return '.' in filename and \ + filename.rsplit('.', 1)[1] in allowd_extensions_ + +@app.route('/predict', methods=['POST']) +def predict(): + size_=(32,32) + pixel_length_=3*size_[0]*size_[1] + label_num_=10 + if request.method == 'POST': + file = request.files['image'] + if file and allowed_file(file.filename): + im = Image.open(file).convert("RGB") + im = imgtool.resize_to_center(im,size_) + pixel = floatVector(pixel_length_) + byteArray = imgtool.toBin(im,size_) + data = np.frombuffer(byteArray, dtype=np.uint8) + data = data.reshape(1, pixel_length_) + #dummy data Layer + shape = intVector(4) + shape[0]=1 + shape[1]=3 + shape[2]=size_[0] + shape[3]=size_[1] + + for h in range(len(neuralnet)): + #Fetch data for input layer + if neuralnet[h].is_datalayer: + if not neuralnet[h].is_label: + neuralnet[h].Feed(data,3) + else: + neuralnet[h].FetchData(1) + else: + neuralnet[h].ComputeFeature() + + #get result + #data = neuralnet[-1].get_singalayer().data(neuralnet[-1].get_singalayer()) + #prop =floatArray_frompointer(data.mutable_cpu_data()) + prop = neuralnet[-1].GetData() + print prop + result=[] + for i in range(label_num_): + result.append((i,prop[i])) + + result.sort(key=lambda tup: tup[1], reverse=True) + print result + response="" + for r in result: + response+=str(r[0])+":"+str(r[1]) + + return response + return "error" + + +if __name__=='__main__': + + if sys.argv[1]=="train": + if len(sys.argv) < 6: + print "argv should be more than 6" + exit() + if len(sys.argv) > 6: + checkpoint = sys.argv[6] + else: + checkpoint = None + #training + train( + batchsize = int(sys.argv[2]), + disp_freq = int(sys.argv[3]), + check_freq = int(sys.argv[4]), + train_step = int(sys.argv[5]), + workspace = '/workspace', + checkpoint = checkpoint, + ) + else: + if len(sys.argv) < 3: + print "argv should be more than 2" + exit() + checkpoint = sys.argv[2] + product( + workspace = '/workspace', + checkpoint = checkpoint + ) + diff --git a/tool/dlaas/model.py b/tool/dlaas/model.py new file mode 100644 index 0000000000..8030a88844 --- /dev/null +++ b/tool/dlaas/model.py @@ -0,0 +1,34 @@ +import os, sys +import numpy as np + +current_path_ = os.path.dirname(__file__) +singa_root_="/usr/src/incubator-singa" +sys.path.append(os.path.join(singa_root_,'tool','python')) + +from singa.driver import Driver +from singa.layer import * +from singa.model import * +from singa.utils.utility import swap32 + + +imageData_0=ImageData(shape=[50000,3,32,32],data_path="/workspace/data/train.bin",data_type="byte",mean_path="/workspace/data/mean.bin",mean_type="float") +labelData_0=LabelData(shape=[50000,1],label_path="/workspace/data/train.label.bin",label_type="int") +convolution_0=Convolution2D(32,5,1,2,w_std=0.0001, b_lr=2,src=[imageData_0]) +pooling_0=MaxPooling2D(pool_size=(3,3), stride=2,src=[convolution_0]) +activation_0=Activation("relu",src=[pooling_0]) +lrn_0=LRN2D(3, alpha=0.00005, beta=0.75,src=[activation_0]) +convolution_1=Convolution2D(32,5,1,2, b_lr=2,src=[lrn_0]) +activation_1=Activation("relu",src=[convolution_1]) +pooling_1=AvgPooling2D(pool_size=(3,3), stride=2,src=[activation_1]) +lrn_1=LRN2D(3, alpha=0.00005, beta=0.75,src=[pooling_1]) +convolution_2=Convolution2D(64,5,1,2, b_lr=2,src=[lrn_1]) +activation_2=Activation("relu",src=[convolution_2]) +pooling_2=AvgPooling2D(pool_size=(3,3), stride=2,src=[activation_2]) +dense_0=Dense(10, w_wd=250, b_lr=2, b_wd=0,src=[pooling_2]) +loss_0=Loss("softmaxloss",src=[dense_0,labelData_0]) +neuralnet = [imageData_0,labelData_0,convolution_0,pooling_0,activation_0,lrn_0,convolution_1,activation_1,pooling_1,lrn_1,convolution_2,activation_2,pooling_2,dense_0,loss_0] + + + +#algorithm +updater = SGD(decay=0.004, momentum=0.9, lr_type='manual', step=(0,60000,65000), step_lr=(0.001,0.0001,0.00001)) \ No newline at end of file diff --git a/tool/dlaas/test.sh b/tool/dlaas/test.sh new file mode 100644 index 0000000000..d285df9d53 --- /dev/null +++ b/tool/dlaas/test.sh @@ -0,0 +1,7 @@ +#!/bin/bash +cd /workspace +wget $1 +tar zxf *.tar.gz +cp /workspace/model.py /opt/incubator-singa/tool/dlaas/ +cd /opt/incubator-singa/ +python tool/dlaas/main.py test $2 $3 diff --git a/tool/dlaas/train.sh b/tool/dlaas/train.sh new file mode 100644 index 0000000000..66cd8d84e6 --- /dev/null +++ b/tool/dlaas/train.sh @@ -0,0 +1,7 @@ +#!/bin/bash +cd /workspace +wget $1 +tar zxf *.tar.gz +cp /workspace/model.py /opt/incubator-singa/tool/dlaas/ +cd /opt/incubator-singa/ +python tool/dlaas/main.py train $2 $3 $4 $5 $6 diff --git a/tool/docker/ubuntu-14.04/Dockerfile b/tool/docker/ubuntu-14.04/Dockerfile new file mode 100644 index 0000000000..67e4662334 --- /dev/null +++ b/tool/docker/ubuntu-14.04/Dockerfile @@ -0,0 +1,30 @@ +FROM ubuntu:14.04 +MAINTAINER Aaron WWF + + +ENV PREFIX=/usr/local +ENV LIBRARY_PATH=/opt/OpenBLAS/lib:$PREFIX/lib:$LIBRARY_PATH +ENV LD_LIBRARY_PATH=/opt/OpenBLAS/lib:$PREFIX/lib:$LD_LIBRARY_PATH +ENV CPLUS_INCLUDE_PATH=/opt/OpenBLAS/include:$PREFIX/include:$CPLUS_INCLUDE_PATH +ENV SINGA_HOME=/opt/incubator-singa +ENV PATH=/opt/OpenBLAS/bin:$SINGA_HOME/bin:$PREFIX/bin:$PATH + +RUN apt-get update && apt-get install -y git unzip make autotools-dev \ + automake autoconf wget gcc g++ libtool python2.7 python2.7-dev \ + build-essential swig python-pip libtiff5-dev libjpeg8-dev zlib1g-dev \ + libfreetype6-dev liblcms2-dev libwebp-dev tcl8.6-dev tk8.6-dev python-tk + +RUN pip install Image flask numpy + +#RUN ln -s /usr/bin/python2.7 /usr/bin/python + +#use /bin/bash instead of /bin/sh +RUN rm /bin/sh && ln -s /bin/bash /bin/sh + +COPY . $SINGA_HOME/ + +RUN cd $SINGA_HOME && ./thirdparty/install.sh all \ + && cd ./tool/python/singa && ./generatepy.sh \ + && cd $SINGA_HOME && ./autogen.sh && ./configure --enable-python \ + && make + diff --git a/tool/docker/ubuntu-14.04/build.sh b/tool/docker/ubuntu-14.04/build.sh new file mode 100644 index 0000000000..efd622eb61 --- /dev/null +++ b/tool/docker/ubuntu-14.04/build.sh @@ -0,0 +1,4 @@ +cd ../../.. +cp tool/docker/ubuntu-14.04/Dockerfile . +docker build -t singa . + diff --git a/tool/python/singa/driver.i b/tool/python/singa/driver.i index 63f2287933..f28414e208 100644 --- a/tool/python/singa/driver.i +++ b/tool/python/singa/driver.i @@ -66,14 +66,14 @@ namespace singa{ void InitNetParams(const std::string& folder, std::vector net); void Checkpoint(int step, const std::string& folder, std::vector net); }; - class DummyLayer{ public: + /* void Setup(const singa::LayerProto& proto, const std::vector& srclayers); + */ void Setup(const std::string str, const std::vector& srclayers); - void Feed(int batchsize, std::vector& data, std::vector& aux_data); + void Feed(std::vector shape, std::vector* data, int op); singa::Layer* ToLayer(); - }; - + }; %nodefault Layer; class Layer{ public: diff --git a/tool/python/singa/layer.py b/tool/python/singa/layer.py index c9a992d765..8a0750ab45 100644 --- a/tool/python/singa/layer.py +++ b/tool/python/singa/layer.py @@ -25,7 +25,7 @@ This script includes Layer class and its subclasses that users can configure different types of layers for their model. ''' -import numpy as np +import numpy from singa.parameter import Parameter, set_param_field from singa.initializations import get_init_values from singa.utils.utility import setval, generate_name @@ -54,7 +54,8 @@ def __init__(self, **kwargs): # layer connectivity is set in Model.build() self.is_datalayer = False self.singalayer = None - self.srclayers = [] + self.srclayers = [] + # set src for Rafiki if 'src' in kwargs: @@ -63,19 +64,15 @@ def __init__(self, **kwargs): self.src = None def setup(self, srclys): - ''' Create singa::Layer and store srclayers - ''' + # create singa::Layer and store srclayers if self.singalayer == None: - self.singalayer = SingaLayer.CreateLayer( - self.layer.SerializeToString()) - self.singaSrclayerVector = layerVector(len(srclys)) + self.singalayer = SingaLayer.CreateLayer(self.layer.SerializeToString()) + self.singaSrclayerVector = layerVector(len(srclys)) for i in range(len(srclys)): self.srclayers.append(srclys[i]) self.singaSrclayerVector[i] = srclys[i].get_singalayer() # set up the layer - SingaLayer.SetupLayer(self.singalayer, - self.layer.SerializeToString(), - self.singaSrclayerVector) + SingaLayer.SetupLayer(self.singalayer, self.layer.SerializeToString(), self.singaSrclayerVector) def ComputeFeature(self, *srclys): ''' The method creates and sets up singa::Layer @@ -84,38 +81,48 @@ def ComputeFeature(self, *srclys): *srclys = (list) // a list of source layers ''' - # create singa::Layer and store srclayers + + # create singa::Layer and store srclayers if self.singalayer == None: if self.src != None: srclys = self.src - self.singalayer = SingaLayer.CreateLayer( - self.layer.SerializeToString()) - self.singaSrclayerVector = layerVector(len(srclys)) + self.singalayer = SingaLayer.CreateLayer(self.layer.SerializeToString()) + self.singaSrclayerVector = layerVector(len(srclys)) for i in range(len(srclys)): self.srclayers.append(srclys[i]) self.singaSrclayerVector[i] = srclys[i].get_singalayer() # set up the layer - SingaLayer.SetupLayer(self.singalayer, - self.layer.SerializeToString(), - self.singaSrclayerVector) + SingaLayer.SetupLayer(self.singalayer, self.layer.SerializeToString(), self.singaSrclayerVector) self.singalayer.ComputeFeature(1, self.singaSrclayerVector) - def ComputeGradient(self): + def ComputeGradient(self, step, upd=None): ''' The method creates singa::Updater and calls ComputeGradient for gradient computation then updates the parameters. + + step = (int) // a training step + upd = (object) // Updater object ''' + + # create singa::Updater + assert upd != None, 'required Updater (see model.py)' + if Layer.singaupdater == None: + Layer.singaupdater = SingaUpdater.CreateUpdater(upd.proto.SerializeToString()) + # call ComputeGradient of Singa self.singalayer.ComputeGradient(1, self.singaSrclayerVector) - def UpdateParams(self, step, upd): - ''' The method updates parameter values - ''' # update parameters singaParams = self.singalayer.GetParams() - for par in singaParams: - upd.singaupdater.Update(step, par, 1.0) + for p in singaParams: + Layer.singaupdater.Update(step, p, 1.0) + + # recursively call ComputeGradient of srclayers + #(TODO) what if there are multiple source layers??? + for sly in self.srclayers: + if sly.srclayers != None: + sly.ComputeGradient(step, upd) def GetParams(self): ''' The method gets parameter values @@ -126,12 +133,12 @@ def GetParams(self): assert len(singaParams) == 2, 'weight and bias' # for weight weight_array = floatArray_frompointer(singaParams[0].mutable_cpu_data()) - weight = [weight_array[i] for i in range(singaParams[0].size())] - weight = np.array(weight).reshape(singaParams[0].shape()) + weight = [ weight_array[i] for i in range(singaParams[0].size()) ] + weight = numpy.array(weight).reshape(singaParams[0].shape()) # for bias bias_array = floatArray_frompointer(singaParams[1].mutable_cpu_data()) - bias = [bias_array[i] for i in range(singaParams[1].size())] - bias = np.array(bias).reshape(singaParams[1].shape()[0], 1) + bias = [ bias_array[i] for i in range(singaParams[1].size()) ] + bias = numpy.array(bias).reshape(singaParams[1].shape()[0], 1) return weight, bias @@ -148,19 +155,17 @@ def SetParams(self, *params): bp.shape.append(int(params[k].shape[1])) for i in range(params[k].shape[0]): for j in range(params[k].shape[1]): - bp.data.append(params[k][i, j]) + bp.data.append(params[k][i,j]) singaParams[k].FromProto(bp.SerializeToString()) def GetData(self): - ''' The method gets layer data values - ''' blobptr = self.singalayer.data(self.singalayer) data_array = floatArray_frompointer(blobptr.mutable_cpu_data()) - data = [data_array[i] for i in range(blobptr.count())] + data = [ data_array[i] for i in range(blobptr.count()) ] return data def display(self): - debug, flag = False, 0 + debug, flag = 0, 0 print self.singalayer.ToString(debug, flag) def get_singalayer(self): @@ -169,18 +174,44 @@ def get_singalayer(self): class Dummy(object): - def __init__(self, **kwargs): - ''' Dummy layer is used for data layer to feed/fetch input data - or label information + def __init__(self, shape=[], path='', dtype='', src=[]): + ''' Dummy layer is used for data layer + shape = (list) // [# of samples, # of channels, img h, img w] + path = (string) // path to dataset ''' self.is_datalayer = True - self.srclayers = None + self.srclayers = None self.singalayer = None # create layer proto for Dummy layer kwargs = {'name':'dummy', 'type':kDummy} self.layer = Message('Layer', **kwargs).proto + + # if dataset path is not specified, skip + # otherwise, load dataset + if path == '': + return + + self.shape = shape + self.path = path + self.src = None + self.batch_index = 0 + + import numpy as np + nb_samples = shape[0] + nb_pixels = shape[1] + for i in range(len(shape)-2): + nb_pixels *= shape[i+2] + if dtype=='byte': + self.is_label = 0 + d = np.fromfile(path, dtype=np.uint8) + elif dtype=='int': + self.is_label = 1 + d = np.fromfile(path, dtype=np.int) + self.data = d.reshape(nb_samples, nb_pixels) + + def setup(self, data_shape): ''' Create and Setup singa Dummy layer called by load_model_parameter @@ -189,67 +220,216 @@ def setup(self, data_shape): setval(self.layer.dummy_conf, input=True) setval(self.layer.dummy_conf, shape=data_shape) self.singalayer = DummyLayer() - self.singalayer.Setup(self.layer.SerializeToString(), - layerVector(0)) + self.singalayer.Setup(self.layer.SerializeToString(), layerVector(0)) + + + def FetchData(self, batchsize): + + d = self.data[self.batch_index*batchsize:(self.batch_index+1)*batchsize, :] + self.Feed(d, self.shape[1], self.is_label) + self.batch_index += 1 - def Feed(self, shape, data, aux_data): + + def Feed(self, data, nb_channel=1, is_label=0): ''' Create and Setup singa::DummyLayer for input data Insert data using Feed() ''' - batchsize = shape[0] - hdim = reduce(lambda x, y: x*y, shape[1:]) + + batchsize, hdim = data.shape datasize = batchsize * hdim + imgsize = int(numpy.sqrt(hdim/nb_channel)) + shapeVector = [batchsize, nb_channel, imgsize, imgsize] # create and setup the dummy layer if self.singalayer == None: - self.setup(shape) + setval(self.layer.dummy_conf, input=True) + setval(self.layer.dummy_conf, shape=shapeVector) + self.singalayer = DummyLayer() + self.singalayer.Setup(self.layer.SerializeToString(), layerVector(0)) + + # feed input data + data = data.astype(numpy.float) + dataVector = floatVector(datasize) + k = 0 + for i in range(batchsize): + for j in range(hdim): + dataVector[k] = data[i,j] + k += 1 + self.singalayer.Feed(shapeVector, dataVector, is_label) + + def get_singalayer(self): + return self.singalayer.ToLayer() + +class ImageData(object): + + def __init__(self, shape=[], data_path='', data_type='byte',mean_path='',mean_type='float'): + ''' Dummy layer is used for data layer + shape = (list) // [# of samples, # of channels, img h, img w] + data_path = (string) // path to dataset + mean_path + ''' + self.is_datalayer = True + self.srclayers = None + self.singalayer = None + self.is_label = False + # create layer proto for Dummy layer + kwargs = {'name':'dummy', 'type':kDummy} + self.layer = Message('Layer', **kwargs).proto + + # if dataset path is not specified, skip + # otherwise, load dataset + if data_path == '' or mean_path=='': + return + + self.shape = shape + self.data_path = data_path + self.mean_path = mean_path + self.src = None + self.batch_index = 0 + + import numpy as np + nb_samples = shape[0] + nb_pixels = shape[1] + for i in range(len(shape)-2): + nb_pixels *= shape[i+2] + + if data_type=='byte': + d = np.fromfile(data_path, dtype=np.uint8) + elif data_type=='int': + d = np.fromfile(data_path, dtype=np.int) + self.data = d.reshape(nb_samples, nb_pixels) + + if mean_type=='float': + d = np.fromfile(mean_path, dtype=np.float32) + self.mean = d.reshape(1, nb_pixels) + + def setup(self, data_shape): + ''' Create and Setup singa Dummy layer + called by load_model_parameter + ''' + if self.singalayer == None: + setval(self.layer.dummy_conf, input=True) + setval(self.layer.dummy_conf, shape=data_shape) + self.singalayer = DummyLayer() + self.singalayer.Setup(self.layer.SerializeToString(), layerVector(0)) + - if data is not None: - data = data.astype(np.float) - dataVector = floatVector(datasize) - for i in range(batchsize): - for j in range(hdim): - dataVector[i*hdim+j] = data[i, j] - labelVector = intVector(0) + def FetchData(self, batchsize): - if aux_data is not None: - aux_data = aux_data.astype(np.int) - labelVector = intVector(datasize) - for i in range(batchsize): - labelVector[i] = aux_data[i, 0] - dataVector = floatVector(0) + d = self.data[self.batch_index*batchsize:(self.batch_index+1)*batchsize, :] + self.Feed(d, self.shape[1]) + self.batch_index += 1 + if (self.batch_index+1)*batchsize>self.data.shape[0]: + self.batch_index=0 - self.singalayer.Feed(batchsize, dataVector, labelVector) + + + def Feed(self, data, nb_channel=1): + ''' Create and Setup singa::DummyLayer for input data + Insert data using Feed() + Need to minus the mean file + ''' + batchsize, hdim = data.shape + datasize = batchsize * hdim + imgsize = int(numpy.sqrt(hdim/nb_channel)) + shapeVector = [batchsize, nb_channel, imgsize, imgsize] + #print shapeVector + # create and setup the dummy layer + if self.singalayer == None: + setval(self.layer.dummy_conf, input=True) + setval(self.layer.dummy_conf, shape=shapeVector) + self.singalayer = DummyLayer() + self.singalayer.Setup(self.layer.SerializeToString(), layerVector(0)) + + # feed input data and minus mean + data = data.astype(numpy.float) + dataVector = floatVector(datasize) + k = 0 + for i in range(batchsize): + for j in range(hdim): + dataVector[k] = data[i,j]-self.mean[0,j] + k += 1 + self.singalayer.Feed(shapeVector, dataVector, 0) def get_singalayer(self): return self.singalayer.ToLayer() -class ImageInput(Dummy): - ''' This class is used to feed image data - ''' - def __init__(self, width=None, height=None, nb_channel=1): - super(ImageInput, self).__init__() - self.width = width - self.height = height - self.nb_channel = nb_channel - - def Feed(self, image_data): - batchsize = image_data.shape[0] - if self.width == None or self.height == None: - hdim = image_data.shape[1] - imgsize = int(np.sqrt(hdim/self.nb_channel)) - shape = [batchsize, self.nb_channel, self.width, self.height] - Dummy.Feed(self, shape, image_data, None) - -class LabelInput(Dummy): - ''' This class is used to feed label data - ''' - def __init__(self): - super(LabelInput, self).__init__() - - def Feed(self, label_data): - Dummy.Feed(self, label_data.shape, None, label_data) +class LabelData(object): + + def __init__(self, shape=[], label_path='', label_type='int'): + ''' Dummy layer is used for label data layer + shape = (list) // [# of samples, # of channels, img h, img w] + data_path = (string) // path to dataset + mean_path + ''' + self.is_datalayer = True + self.srclayers = None + self.singalayer = None + self.is_label = True + # create layer proto for Dummy layer + kwargs = {'name':'dummy', 'type':kDummy} + self.layer = Message('Layer', **kwargs).proto + + # if dataset path is not specified, skip + # otherwise, load dataset + if label_path == '': + return + + self.shape = shape + self.label_path = label_path + self.src = None + self.batch_index = 0 + + import numpy as np + nb_samples = shape[0] + + if label_type=='int': + d = np.fromfile(label_path, dtype=np.int) + self.data = d.reshape(nb_samples, 1) + + def setup(self, data_shape): + ''' Create and Setup singa Dummy layer + called by load_model_parameter + ''' + if self.singalayer == None: + setval(self.layer.dummy_conf, input=True) + setval(self.layer.dummy_conf, shape=data_shape) + self.singalayer = DummyLayer() + self.singalayer.Setup(self.layer.SerializeToString(), layerVector(0)) + + + def FetchData(self, batchsize): + + d = self.data[self.batch_index*batchsize:(self.batch_index+1)*batchsize, :] + self.Feed(d, self.shape[1]) + self.batch_index += 1 + if (self.batch_index+1)*batchsize>self.data.shape[0]: + self.batch_index=0 + + def Feed(self, data,nb_chanel=1): + ''' Create and Setup singa::DummyLayer for input data + Insert data using Feed() + Need to minus the mean file + ''' + batchsize = data.shape[0] + shapeVector = [batchsize, 1] + + # create and setup the dummy layer + if self.singalayer == None: + setval(self.layer.dummy_conf, input=True) + setval(self.layer.dummy_conf, shape=shapeVector) + self.singalayer = DummyLayer() + self.singalayer.Setup(self.layer.SerializeToString(), layerVector(0)) + + data = data.astype(numpy.float) + dataVector = floatVector(batchsize) + for i in range(batchsize): + dataVector[i] = data[i,0] + self.singalayer.Feed(shapeVector, dataVector, 1) + + def get_singalayer(self): + return self.singalayer.ToLayer() class Data(Layer): @@ -311,7 +491,7 @@ def __init__(self, nb_filter=0, kernel=0, stride=1, pad=0, // scale the learning rate when updating parameters. w_wd = (float) // weight decay multiplier for weight, used to // scale the weight decay when updating parameters. - b_lr = (float) // learning rate multiplier for bias + b_lr = (float) // learning rate multiplier for bias b_wd = (float) // weight decay multiplier for bias ''' @@ -321,22 +501,22 @@ def __init__(self, nb_filter=0, kernel=0, stride=1, pad=0, fields = {"num_filters":nb_filter} # for kernel if type(kernel) == int: - fields['kernel'] = kernel + fields['kernel'] = kernel else: - fields['kernel_x'] = kernel[0] - fields['kernel_y'] = kernel[1] - # for stride + fields['kernel_x'] = kernel[0] + fields['kernel_y'] = kernel[1] + # for stride if type(stride) == int: - fields['stride'] = stride + fields['stride'] = stride else: - fields['stride_x'] = stride[0] - fields['stride_y'] = stride[1] - # for pad + fields['stride_x'] = stride[0] + fields['stride_y'] = stride[1] + # for pad if type(pad) == int: - fields['pad'] = pad + fields['pad'] = pad else: - fields['pad_x'] = pad[0] - fields['pad_y'] = pad[1] + fields['pad_x'] = pad[0] + fields['pad_y'] = pad[1] setval(self.layer.convolution_conf, **fields) @@ -381,7 +561,7 @@ def __init__(self, pool_size=None, 'currently pool size should be square in Singa' super(MaxPooling2D, self).__init__(name=generate_name('pool'), type=kCPooling, **kwargs) - fields = {'pool' : PoolingProto().MAX, + fields = {'pool' : MAX, 'kernel' : pool_size[0], 'stride' : stride, 'pad' : 0 if ignore_border else 1} @@ -407,8 +587,8 @@ def __init__(self, pool_size=None, 'currently pool size should be square in Singa' super(AvgPooling2D, self).__init__(name=generate_name('pool'), type=kCPooling, **kwargs) - self.layer.pooling_conf.pool = PoolingProto().AVG - fields = {'pool' : PoolingProto().AVG, + self.layer.pooling_conf.pool = AVG + fields = {'pool' : AVG, 'kernel' : pool_size[0], 'stride' : stride, 'pad' : 0 if ignore_border else 1} @@ -450,7 +630,7 @@ def __init__(self, activation='stanh', **kwargs): activation = (string) // relu, sigmoid, tanh, stanh, softmax. ''' if activation == 'tanh': - print 'Warning: Tanh layer is not supported for CPU' + print 'Warning: Tanh layer is not supported for CPU' self.name = activation self.layer_type = kActivation @@ -468,6 +648,7 @@ def __init__(self, activation='stanh', **kwargs): self.layer.activation_conf.type = TANH # for GPU #elif activation == 'stanh': # self.layer.activation_conf.type = STANH + class Dropout(Layer): @@ -528,7 +709,7 @@ def __init__(self, output_dim=0, activation=None, // scale the learning rate when updating parameters. w_wd = (float) // weight decay multiplier for weight, used to // scale the weight decay when updating parameters. - b_lr = (float) // learning rate multiplier for bias + b_lr = (float) // learning rate multiplier for bias b_wd = (float) // weight decay multiplier for bias ''' # required @@ -570,7 +751,7 @@ def __init__(self, hid_dim=None, out_dim=0, required hid_dim = (int/list) // the number of nodes in hidden layers out_dim = (int) // the number of nodes in the top layer - optional + optional activation = (string) param_share = (bool) // to share params in encoder and decoder ''' @@ -609,8 +790,7 @@ def __init__(self, out_dim=None, w_param=None, b_param=None, self.name = kwargs['name'] if 'name' in kwargs else 'RBMVis' self.layer_type = kwargs['type'] if 'type' in kwargs else kRBMVis super(RBM, self).__init__(name=generate_name(self.name, - withnumber=False), - type=self.layer_type, **kwargs) + withnumber=False), type=self.layer_type, **kwargs) setval(self.layer.rbm_conf, hdim=self.out_dim[-1]) if self.layer_type == kRBMHid and sampling != None: if sampling == 'gaussian': diff --git a/tool/python/singa/utils/utility.py b/tool/python/singa/utils/utility.py index b88720cac0..459b593119 100644 --- a/tool/python/singa/utils/utility.py +++ b/tool/python/singa/utils/utility.py @@ -84,3 +84,13 @@ def setval(proto, **kwargs): else: setattr(proto, key, val) +def swap32(x): + return (((x << 24) & 0xFF000000) | + ((x << 8) & 0x00FF0000) | + ((x >> 8) & 0x0000FF00) | + ((x >> 24) & 0x000000FF)) + +def blob_to_numpy(blob): + '''TODO This method transform blob data to python numpy array + ''' + pass From f70db8564ce9758e106550df1722dadbac0224b3 Mon Sep 17 00:00:00 2001 From: WANG Ji Date: Mon, 2 May 2016 15:49:07 +0000 Subject: [PATCH 2/4] add python utils --- tool/python/singa/utils/imgtool.py | 345 +++++++++++++++++++++++++++++ tool/python/singa/utils/kvstore.py | 90 ++++++++ 2 files changed, 435 insertions(+) create mode 100644 tool/python/singa/utils/imgtool.py create mode 100644 tool/python/singa/utils/kvstore.py diff --git a/tool/python/singa/utils/imgtool.py b/tool/python/singa/utils/imgtool.py new file mode 100644 index 0000000000..683e3d3954 --- /dev/null +++ b/tool/python/singa/utils/imgtool.py @@ -0,0 +1,345 @@ +''' +Created on Jan 8, 2016 +@author: aaron +''' +from PIL import Image +import sys, glob, os, random, shutil, time, struct +from . import kvstore + +sys.path.append(os.path.join(os.path.dirname(__file__), '../../pb2')) +from common_pb2 import RecordProto + +#bytearray to image object +def toImg(byteArray,size): + img = Image.new("RGB",size) + pix = img.load() + area = size[0]*size[1] + red = byteArray[:area] + green = byteArray[area:area*2] + blue = byteArray[area*2:] + index=0 + for x in range(0,size[0]): + for y in range(0,size[1]): + img.putpixel((x,y), (red[index],green[index],blue[index])) + index+=1 + return img + +# image object to bytearray +def toBin(im,size): + red = [] + green = [] + blue = [] + pix = im.load() + for x in range(0,size[0]): + for y in range(0,size[1]): + pixel = pix[x,y] + red.append(pixel[0]) + green.append(pixel[1]) + blue.append(pixel[2]) + fileByteArray = bytearray(red+green+blue) + return fileByteArray + +def resize_to_center(im,size): + oldSize = im.size + #bigest center cube + data=(0,0,0,0) + if oldSize[0] < oldSize[1]: + data= (0,(oldSize[1]-oldSize[0])/2,oldSize[0],(oldSize[1]+oldSize[0])/2) + else : + data= ((oldSize[0]-oldSize[1])/2,0,(oldSize[0]+oldSize[1])/2,oldSize[1]) + newIm = im.transform(size,Image.EXTENT,data) + return newIm +#transfer, resize img. only deal with .jpg file +def transform_img( + input_folder, + output_folder, + size + ): + print "Transfer images begin at:"+time.strftime('%X %x %Z') + + #if output_folder exists, empty it, otherwise create a dir + try: + os.stat(output_folder) + for root, dirs, files in os.walk(output_folder): + for f in files: + os.unlink(os.path.join(root, f)) + for d in dirs: + shutil.rmtree(os.path.join(root, d)) + except: + os.makedirs(output_folder) + + count=0 + for root, dirs, files in os.walk(input_folder): + for d in dirs: + print "find dir:", d + os.makedirs(os.path.join(output_folder,d)) + for infile in glob.glob(os.path.join(input_folder,d,"*.jpg")): + fileName = os.path.split(infile)[-1] + name,ext = os.path.splitext(fileName) + im = Image.open(infile).convert("RGB") + newIm=resize_to_center(im,size) + newIm.save(os.path.join(output_folder,d,name+".center.jpg"), "JPEG") + count+=1 + + print "transfer end at:"+time.strftime('%X %x %Z') + print "total file number: ", count + + return count + + + +def generate_bin_data( + input_folder, + output_folder, + size , + train_num, + test_num, + validate_num, + meta_file_name="meta.txt", + train_bin_file_name="train.bin", + train_label_bin_file_name="train.label.bin", + test_bin_file_name="test.bin", + test_label_bin_file_name="test.label.bin", + validate_bin_file_name="validate.bin", + validate_label_bin_file_name="validate.label.bin", + mean_bin_file_name="mean.bin", + label_bin_file_name="label.bin", + + ): + try: + os.stat(output_folder) + except: + os.makedirs(output_folder) + + print "Generate bin start at: "+time.strftime('%X %x %Z') + meta_file = open(os.path.join(output_folder,meta_file_name), "w") + + fileList=[] + labelList= [] + label=0 #label begin from 1 + + #get all img file, the folder name is the label name + for d in os.listdir(input_folder): + if os.path.isdir(os.path.join(input_folder,d)): + labelList.append((label,d)) + for f in glob.glob(os.path.join(input_folder,d,"*.jpg")): + fileList.append((label,f)) + label += 1 + + # disorder all the files + random.shuffle(fileList) + + total = len(fileList) + print total,train_num,test_num,validate_num + assert total >= train_num+test_num+validate_num + + train_file = open(os.path.join(output_folder,train_bin_file_name),"wb") + train_label_file = open(os.path.join(output_folder,train_label_bin_file_name),"wb") + validate_file = open(os.path.join(output_folder,validate_bin_file_name),"wb") + validate_label_file = open(os.path.join(output_folder,validate_label_bin_file_name),"wb") + test_file = open(os.path.join(output_folder,test_bin_file_name),"wb") + test_label_file = open(os.path.join(output_folder,test_label_bin_file_name),"wb") + mean_file = open(os.path.join(output_folder,mean_bin_file_name),"wb") + + count=0 + trainCount=0 + validateCount=0 + testCount=0 + + # the expected image binary length + binaryLength=3*size[0]*size[1] + + meanData=[] + for i in range(0,binaryLength): + meanData.append(0.0) + + #calculate mean + for (label,f) in fileList: + + count+=1 + im =Image.open(f) + #the image size should be equal + assert im.size==size + binaryPixel=toBin(im,size) + if count <= train_num : + trainCount+=1 + train_file.write(binaryPixel) + train_label_file.write(kvstore.i2b(label)) + #only caculate train data's mean value + for i in range(binaryLength): + meanData[i]+=binaryPixel[i] + elif count <= train_num+validate_num : + validateCount+=1 + validate_label_file.write(kvstore.i2b(label)) + validate_file.write(binaryPixel) + elif count <= train_num+validate_num+test_num: + testCount+=1 + test_label_file.write(kvstore.i2b(label)) + test_file.write(binaryPixel) + else: + break + + for i in range(binaryLength): + meanData[i]/=trainCount + + meanBinary=struct.pack("%sf" % binaryLength, *meanData) + + mean_file.write(meanBinary) + mean_file.flush() + mean_file.close() + + train_file.flush() + train_file.close() + validate_file.flush() + validate_file.close() + test_file.flush() + test_file.close() + + meta_file.write("image size: "+str(size[0])+"*"+str(size[1])+"\n") + meta_file.write("total file num: "+str(count)+"\n") + meta_file.write("train file num: "+str(trainCount)+"\n") + meta_file.write("validate file num: "+str(validateCount)+"\n") + meta_file.write("test file num: "+str(testCount)+"\n") + meta_file.write("label list:[\n") + + for item in labelList: + meta_file.write("("+str(item[0])+",\""+item[1]+"\"),\n") + meta_file.write("]") + meta_file.flush() + meta_file.close() + + print "end at: "+time.strftime('%X %x %Z') + + return labelList + + +def generate_kvrecord_data( + input_folder, + output_folder, + size , + train_num, + test_num, + validate_num, + meta_file_name="meta.txt", + train_bin_file_name="train.bin", + test_bin_file_name="test.bin", + validate_bin_file_name="validate.bin", + mean_bin_file_name="mean.bin", + + ): + try: + os.stat(output_folder) + except: + os.makedirs(output_folder) + + print "Generate kvrecord start at: "+time.strftime('%X %x %Z') + meta_file = open(os.path.join(output_folder,meta_file_name), "w") + + fileList=[] + labelList= [] + label=0 #label begin from 1 + + #get all img file, the folder name is the label name + for d in os.listdir(input_folder): + if os.path.isdir(os.path.join(input_folder,d)): + labelList.append((label,d)) + for f in glob.glob(os.path.join(input_folder,d,"*.jpg")): + fileList.append((label,f)) + label += 1 + + # disorder all the files + random.shuffle(fileList) + + total = len(fileList) + print total,train_num,test_num,validate_num + assert total >= train_num+test_num+validate_num + + + trainStore = kvstore.FileStore() + trainStore.open(os.path.join(output_folder,train_bin_file_name), "create") + validateStore = kvstore.FileStore() + validateStore.open(os.path.join(output_folder,validate_bin_file_name), "create") + testStore = kvstore.FileStore() + testStore.open(os.path.join(output_folder,test_bin_file_name), "create") + + meanStore = kvstore.FileStore() + meanStore.open(os.path.join(output_folder,mean_bin_file_name), "create") + + + count=0 + trainCount=0 + validateCount=0 + testCount=0 + + # the expected image binary length + binaryLength=3*size[0]*size[1] + + meanRecord = RecordProto() + meanRecord.shape.extend([3,size[0],size[1]]) + for i in range(binaryLength): + meanRecord.data.append(0.0) + + for (label,f) in fileList: + + im =Image.open(f) + #the image size should be equal + assert im.size==size + + binaryContent=str(toBin(im,size)) + + count +=1 + record = RecordProto() + record.shape.extend([3,size[0],size[1]]) + record.label=label + record.pixel=binaryContent + + value = record.SerializeToString() + + if count <= train_num : + key = "%05d" % trainCount + trainCount+=1 + trainStore.write(key,value) + #only caculate train data's mean + for i in range(binaryLength): + meanRecord.data[i]+=ord(binaryContent[i]) + elif count <= train_num+validate_num : + key = "%05d" % validateCount + validateCount+=1 + validateStore.write(key,value) + elif count <= train_num+validate_num+test_num: + key = "%05d" % testCount + testCount+=1 + testStore.write(key,value) + else: + break + + for i in range(binaryLength): + meanRecord.data[i]/=trainCount + + meanStore.write("mean", meanRecord.SerializeToString()) + meanStore.flush() + meanStore.close() + + trainStore.flush() + trainStore.close() + validateStore.flush() + validateStore.close() + testStore.flush() + testStore.close() + + meta_file.write("image size: "+str(size[0])+"*"+str(size[1])+"\n") + meta_file.write("total file num: "+str(count)+"\n") + meta_file.write("train file num: "+str(trainCount)+"\n") + meta_file.write("validate file num: "+str(validateCount)+"\n") + meta_file.write("test file num: "+str(testCount)+"\n") + meta_file.write("label list:[\n") + + for item in labelList: + meta_file.write("("+str(item[0])+",\""+item[1]+"\"),\n") + meta_file.write("]") + meta_file.flush() + meta_file.close() + + print "end at: "+time.strftime('%X %x %Z') + + return labelList diff --git a/tool/python/singa/utils/kvstore.py b/tool/python/singa/utils/kvstore.py new file mode 100644 index 0000000000..7fe16e019d --- /dev/null +++ b/tool/python/singa/utils/kvstore.py @@ -0,0 +1,90 @@ +''' +Created on Jan 8, 2016 + +@author: aaron +''' +import struct, os + +INT_LEN=8 + +class FileStore(): + ''' + kv file store + ''' + def open(self,src_path,mode): + if mode == "create": + self._file = open(src_path,"wb") + if mode == "append": + self._file = open(src_path,"ab") + if mode == "read": + self._file = open(src_path,"rb") + return self + + def close(self): + self._file.close() + return + + def read(self): + keyLen=b2i(self._file.read(INT_LEN)) + key=str(self._file.read(keyLen)) + valueLen=b2i(self._file.read(INT_LEN)) + value=str(self._file.read(valueLen)) + return key,value + + def seekToFirst(self): + self._file.seek(0) + return + + #Don't do this + def seek(self,offset): + self._file.seek(offset) + return + + def write(self,key,value): + key_len = len(key) + value_len = len(value) + self._file.write(i2b(key_len)+key+i2b(value_len)+value) + return + + def flush(self): + self._file.flush() + return + + def __init__(self ): + + return +#integer to binary Q means long long, 8 bytes +def i2b(i): + return struct.pack(" Date: Mon, 2 May 2016 16:20:34 +0000 Subject: [PATCH 3/4] add access control header --- tool/dlaas/main.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tool/dlaas/main.py b/tool/dlaas/main.py index 3ee5b62abe..a61c9a1126 100644 --- a/tool/dlaas/main.py +++ b/tool/dlaas/main.py @@ -37,7 +37,7 @@ from PIL import Image import glob,random, shutil, time -from flask import Flask, request, redirect, url_for +from flask import Flask, request, redirect, url_for, Response from singa.utils import kvstore, imgtool app = Flask(__name__) @@ -141,8 +141,9 @@ def predict(): print result response="" for r in result: - response+=str(r[0])+":"+str(r[1]) - + response+=str(r[0])+":"+str(r[1])+"\n" + resp = Response(response) + resp.headers['Access-Control-Allow-Origin'] = '*' return response return "error" From d8da953ee7fd5cbea280e070655dd95f47550c00 Mon Sep 17 00:00:00 2001 From: aaron Date: Thu, 5 May 2016 11:41:03 +0000 Subject: [PATCH 4/4] resolve access control bug --- tool/dlaas/main.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tool/dlaas/main.py b/tool/dlaas/main.py index 3ee5b62abe..e05d176da3 100644 --- a/tool/dlaas/main.py +++ b/tool/dlaas/main.py @@ -37,7 +37,7 @@ from PIL import Image import glob,random, shutil, time -from flask import Flask, request, redirect, url_for +from flask import Flask, request, redirect, url_for, Response from singa.utils import kvstore, imgtool app = Flask(__name__) @@ -141,9 +141,10 @@ def predict(): print result response="" for r in result: - response+=str(r[0])+":"+str(r[1]) - - return response + response+=str(r[0])+":"+str(r[1])+"\n" + resp = Response(response) + resp.headers['Access-Control-Allow-Origin'] = '*' + return resp return "error"