diff --git a/modelzoo/googlenet.pkl b/modelzoo/googlenet.pkl new file mode 100644 index 0000000..0c3cd94 --- /dev/null +++ b/modelzoo/googlenet.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8379b4d252977c628cbd22a7329db015d3af9298b8b9f07ecf87d790414b00b3 +size 79992351 diff --git a/modelzoo/googlenet.py b/modelzoo/googlenet.py index 54342b6..4ea761a 100644 --- a/modelzoo/googlenet.py +++ b/modelzoo/googlenet.py @@ -1,100 +1,342 @@ -# BLVC Googlenet, model from the paper: -# "Going Deeper with Convolutions" -# Original source: -# https://github.com/BVLC/caffe/tree/master/models/bvlc_googlenet -# License: unrestricted use - -# Download pretrained weights from: -# https://s3.amazonaws.com/lasagne/recipes/pretrained/imagenet/blvc_googlenet.pkl - -from lasagne.layers import InputLayer -from lasagne.layers import DenseLayer -from lasagne.layers import ConcatLayer -from lasagne.layers import NonlinearityLayer -from lasagne.layers import GlobalPoolLayer -from lasagne.layers.dnn import Conv2DDNNLayer as ConvLayer -from lasagne.layers.dnn import MaxPool2DDNNLayer as PoolLayerDNN -from lasagne.layers import MaxPool2DLayer as PoolLayer -from lasagne.layers import LocalResponseNormalization2DLayer as LRNLayer -from lasagne.nonlinearities import softmax, linear - - -def build_inception_module(name, input_layer, nfilters): - # nfilters: (pool_proj, 1x1, 3x3_reduce, 3x3, 5x5_reduce, 5x5) - net = {} - net['pool'] = PoolLayerDNN(input_layer, pool_size=3, stride=1, pad=1) - net['pool_proj'] = ConvLayer(net['pool'], nfilters[0], 1) - - net['1x1'] = ConvLayer(input_layer, nfilters[1], 1) - - net['3x3_reduce'] = ConvLayer(input_layer, nfilters[2], 1) - net['3x3'] = ConvLayer(net['3x3_reduce'], nfilters[3], 3, pad=1) - - net['5x5_reduce'] = ConvLayer(input_layer, nfilters[4], 1) - net['5x5'] = ConvLayer(net['5x5_reduce'], nfilters[5], 5, pad=2) - - net['output'] = ConcatLayer([ - net['1x1'], - net['3x3'], - net['5x5'], - net['pool_proj'], - ]) - - return {'{}/{}'.format(name, k): v for k, v in net.items()} - - -def build_model(): - net = {} - net['input'] = InputLayer((None, 3, None, None)) - net['conv1/7x7_s2'] = ConvLayer(net['input'], 64, 7, stride=2, pad=3) - net['pool1/3x3_s2'] = PoolLayer(net['conv1/7x7_s2'], - pool_size=3, - stride=2, - ignore_border=False) - net['pool1/norm1'] = LRNLayer(net['pool1/3x3_s2'], alpha=0.00002, k=1) - net['conv2/3x3_reduce'] = ConvLayer(net['pool1/norm1'], 64, 1) - net['conv2/3x3'] = ConvLayer(net['conv2/3x3_reduce'], 192, 3, pad=1) - net['conv2/norm2'] = LRNLayer(net['conv2/3x3'], alpha=0.00002, k=1) - net['pool2/3x3_s2'] = PoolLayer(net['conv2/norm2'], pool_size=3, stride=2) - - net.update(build_inception_module('inception_3a', - net['pool2/3x3_s2'], - [32, 64, 96, 128, 16, 32])) - net.update(build_inception_module('inception_3b', - net['inception_3a/output'], - [64, 128, 128, 192, 32, 96])) - net['pool3/3x3_s2'] = PoolLayer(net['inception_3b/output'], - pool_size=3, stride=2) - - net.update(build_inception_module('inception_4a', - net['pool3/3x3_s2'], - [64, 192, 96, 208, 16, 48])) - net.update(build_inception_module('inception_4b', - net['inception_4a/output'], - [64, 160, 112, 224, 24, 64])) - net.update(build_inception_module('inception_4c', - net['inception_4b/output'], - [64, 128, 128, 256, 24, 64])) - net.update(build_inception_module('inception_4d', - net['inception_4c/output'], - [64, 112, 144, 288, 32, 64])) - net.update(build_inception_module('inception_4e', - net['inception_4d/output'], - [128, 256, 160, 320, 32, 128])) - net['pool4/3x3_s2'] = PoolLayer(net['inception_4e/output'], - pool_size=3, stride=2) - - net.update(build_inception_module('inception_5a', - net['pool4/3x3_s2'], - [128, 256, 160, 320, 32, 128])) - net.update(build_inception_module('inception_5b', - net['inception_5a/output'], - [128, 384, 192, 384, 48, 128])) - - net['pool5/7x7_s1'] = GlobalPoolLayer(net['inception_5b/output']) - net['loss3/classifier'] = DenseLayer(net['pool5/7x7_s1'], - num_units=1000, - nonlinearity=linear) - net['prob'] = NonlinearityLayer(net['loss3/classifier'], - nonlinearity=softmax) - return net +import numpy as np +from lasagne.layers import dnn +import lasagne as nn +import lasagne +import utils +import theano +import theano.tensor as T +from lasagne.layers.normalization import LocalResponseNormalization2DLayer +from lasagne.layers.base import Layer +import os +import pickle +import time + + +if __name__ == "__main__": + import caffe + + net = caffe.Classifier('data/googlenet.prototxt', 'data/bvlc_googlenet.caffemodel') + layer_names = net.blobs.keys() + + def get_caffe_params(l_name): + layer_params = np.array(net.params[l_name]) + filter = caffe.io.blobproto_to_array(layer_params[0]) + bias = caffe.io.blobproto_to_array(layer_params[1]) + return utils.struct( + filter=filter, + bias=bias + ) + + def get_pretrained_params(): + l = [] + + def append_name(name): + l.append(get_caffe_params(name).filter) + l.append(get_caffe_params(name).bias.reshape((-1,))) + + append_name('conv1/7x7_s2') + append_name('conv2/3x3_reduce') + append_name('conv2/3x3') + + for inception_layer in ['inception_3a', 'inception_3b', + 'inception_4a', 'inception_4b', 'inception_4c', 'inception_4d', 'inception_4e', + 'inception_5a', 'inception_5b' + ]: + append_name(inception_layer + '/1x1') + append_name(inception_layer + '/3x3_reduce') + append_name(inception_layer + '/3x3') + append_name(inception_layer + '/5x5_reduce') + append_name(inception_layer + '/5x5') + append_name(inception_layer + '/pool_proj') + + append_name('loss3/classifier') + + return l + + class flip(Layer): + def get_output_shape_for(self, input_shape): + return input_shape + + def get_output_for(self, input, **kwargs): + return input[:, :, ::-1, ::-1] + +else: + # The following is only needed to exactly match caffe. Not needed in real-life scenario, override it. + def flip(x): + return x + + +def googlenet(batch_shape): + """ + Create a googlenet, with the parameters from https://github.com/BVLC/caffe/tree/master/models/bvlc_googlenet + :param batch_shape: The shape of the input images. This should be of size (N, 3, 224, 224) + :return: a struct with the input layer, the logit layer (before the final softmax) and the output layer. + """ + l_in = lasagne.layers.InputLayer( + shape=batch_shape, + name='input', + ) + + l_conv1 = dnn.Conv2DDNNLayer( + l_in, + num_filters=64, + filter_size=(7, 7), + pad=3, + stride=(2, 2), + nonlinearity=lasagne.nonlinearities.rectify, + name='conv1/7x7_s2', + ) + + l_pool1 = flip(dnn.MaxPool2DDNNLayer( + flip(l_conv1), + pool_size=(3, 3), # pool_size + stride=(2, 2), + pad=(1, 1), + name='pool1/3x3_s2' + )) + + lrn = LocalResponseNormalization2DLayer( + l_pool1, + alpha=0.0001 / 5, + beta=0.75, + k=1, + n=5, + name='pool1/norm1', + ) + + l_conv2 = dnn.Conv2DDNNLayer( + lrn, + num_filters=64, + filter_size=(1, 1), + pad=0, + stride=(1, 1), + nonlinearity=lasagne.nonlinearities.rectify, + name='conv2/3x3_reduce', + ) + + l_conv2b = dnn.Conv2DDNNLayer( + l_conv2, + num_filters=192, + filter_size=(3, 3), + pad=1, + stride=(1, 1), + nonlinearity=lasagne.nonlinearities.rectify, + name='conv2/3x3', + ) + + lrn2 = LocalResponseNormalization2DLayer( + l_conv2b, + alpha=0.0001 / 5, + beta=0.75, + k=1, + n=5, + name='conv2/norm2', + ) + + l_pool2 = flip(dnn.MaxPool2DDNNLayer( + flip(lrn2), + pool_size=(3, 3), # pool_size + stride=(2, 2), + pad=(1, 1), + name='pool2/3x3_s2' + )) + + def inception(layer, name, no_1x1=64, no_3x3r=96, no_3x3=128, no_5x5r=16, no_5x5=32, no_pool=32): + l_conv_inc = dnn.Conv2DDNNLayer( + layer, + num_filters=no_1x1, + filter_size=(1, 1), + pad=0, + stride=(1, 1), + nonlinearity=lasagne.nonlinearities.rectify, + name=name + '/1x1', + ) + l_conv_inc2 = dnn.Conv2DDNNLayer( + layer, + num_filters=no_3x3r, + filter_size=(1, 1), + pad=0, + stride=(1, 1), + nonlinearity=lasagne.nonlinearities.rectify, + name=name + '/3x3_reduce', + ) + l_conv_inc2b = dnn.Conv2DDNNLayer( + l_conv_inc2, + num_filters=no_3x3, + filter_size=(3, 3), + pad=1, + stride=(1, 1), + nonlinearity=lasagne.nonlinearities.rectify, + name=name + '/3x3', + ) + l_conv_inc2c = dnn.Conv2DDNNLayer( + layer, + num_filters=no_5x5r, + filter_size=(1, 1), + pad=0, + stride=(1, 1), + nonlinearity=lasagne.nonlinearities.rectify, + name=name + '/5x5_reduce', + ) + l_conv_inc2d = dnn.Conv2DDNNLayer( + l_conv_inc2c, + num_filters=no_5x5, + filter_size=(5, 5), + pad=2, + stride=(1, 1), + nonlinearity=lasagne.nonlinearities.rectify, + name=name + '/5x5', + ) + l_pool2 = flip(dnn.MaxPool2DDNNLayer( + flip(layer), + pool_size=(3, 3), # pool_size + stride=(1, 1), + pad=(1, 1), + name=name + '/pool' + )) + l_conv_inc2e = dnn.Conv2DDNNLayer( + l_pool2, + num_filters=no_pool, + filter_size=(1, 1), + pad=0, + stride=(1, 1), + nonlinearity=lasagne.nonlinearities.rectify, + name=name + '/pool_proj', + ) + + l_inc_out = nn.layers.concat([l_conv_inc, l_conv_inc2b, l_conv_inc2d, l_conv_inc2e]) + return l_inc_out + + l_inc_3a = inception(l_pool2, 'inception_3a', no_1x1=64, no_3x3r=96, no_3x3=128, no_5x5r=16, no_5x5=32, no_pool=32) + l_inc_3b = inception(l_inc_3a, 'inception_3b', no_1x1=128, no_3x3r=128, no_3x3=192, no_5x5r=32, no_5x5=96, + no_pool=64) + + l_pool3 = flip(dnn.MaxPool2DDNNLayer( + flip(l_inc_3b), + pool_size=(3, 3), # pool_size + stride=(2, 2), + pad=1, + name='pool3/3x3_s2' + )) + + l_inc_4a = inception(l_pool3, 'inception_4a', no_1x1=192, no_3x3r=96, no_3x3=208, no_5x5r=16, no_5x5=48, no_pool=64) + l_inc_4b = inception(l_inc_4a, 'inception_4b', no_1x1=160, no_3x3r=112, no_3x3=224, no_5x5r=24, no_5x5=64, + no_pool=64) + l_inc_4c = inception(l_inc_4b, 'inception_4c', no_1x1=128, no_3x3r=128, no_3x3=256, no_5x5r=24, no_5x5=64, + no_pool=64) + l_inc_4d = inception(l_inc_4c, 'inception_4d', no_1x1=112, no_3x3r=144, no_3x3=288, no_5x5r=32, no_5x5=64, + no_pool=64) + l_inc_4e = inception(l_inc_4d, 'inception_4e', no_1x1=256, no_3x3r=160, no_3x3=320, no_5x5r=32, no_5x5=128, + no_pool=128) + + l_pool4 = flip(dnn.MaxPool2DDNNLayer( + flip(l_inc_4e), + pool_size=(3, 3), # pool_size + stride=(2, 2), + pad=1, + name='pool4/3x3_s2' + )) + + l_inc_5a = inception(l_pool4, 'inception_5a', no_1x1=256, no_3x3r=160, no_3x3=320, no_5x5r=32, no_5x5=128, + no_pool=128) + l_inc_5b = inception(l_inc_5a, 'inception_5b', no_1x1=384, no_3x3r=192, no_3x3=384, no_5x5r=48, no_5x5=128, + no_pool=128) + + l_pool5 = flip(dnn.Pool2DDNNLayer( + flip(l_inc_5b), + pool_size=(7, 7), # pool_size + stride=(1, 1), + pad=0, + mode='average', + name='pool5/7x7_s1' + )) + + l_logit = dnn.Conv2DDNNLayer( + l_pool5, + num_filters=1000, + filter_size=(1, 1), + pad=0, + stride=(1, 1), + nonlinearity=lasagne.nonlinearities.linear, + name='prob', + ) + + l_logit_flat = lasagne.layers.FlattenLayer(l_logit) + l_dense = lasagne.layers.NonlinearityLayer(l_logit_flat, nonlinearity=lasagne.nonlinearities.softmax) + l_out = l_dense + + filename = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'googlenet.pkl') + + if os.path.exists(filename): + with open(filename, 'r') as f: + nn.layers.set_all_param_values(l_dense, pickle.load(f)) + + return utils.struct( + input=l_in, + logit=l_logit, + out=l_out, + interesting=l_inc_4c, + ) + + +if __name__ == "__main__": + + uptolayer = "prob" + + # now do the same in lasagne + model = get_googlenet() + for layer in nn.layers.get_all_layers(model.out): + print " %s %s" % (layer.name, layer.output_shape,) + + nn.layers.set_all_param_values(model.out, get_pretrained_params()) + + filename = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'googlenet.pkl') + pickle.dump(nn.layers.get_all_param_values(model.out), open(filename, 'w')) + + x = nn.utils.shared_empty(dim=len(model.input.get_output_shape())) + givens = { + # target_var: T.sqr(y), + model.input.input_var: x + } + idx = T.lscalar('idx') + + compute_output = theano.function([idx], model.out.get_output(deterministic=True), givens=givens, + on_unused_input='ignore') + + lasagne_time = [] + caffe_time = [] + for i in xrange(10): + inp = np.random.random((10, 3, 224, 224)).astype('float32') + + t_lasagne = time.time() + x.set_value(inp) + actual_output = compute_output(0) + lasagne_time.append(time.time() - t_lasagne) + print "lasagne time:", lasagne_time[-1] + + t_caffe = time.time() + net.blobs['data'].data[...] = inp + net.forward(end=uptolayer) + goal_output = net.blobs[uptolayer].data + caffe_time.append(time.time() - t_caffe) + print "caffe time:", caffe_time[-1] + + print "goal_output shape:", goal_output.shape + print "actual_output shape:", actual_output.shape + print np.max(goal_output), np.max(actual_output) + #print (np.count_nonzero( goal_output == np.max(goal_output) ), + # np.count_nonzero( actual_output == np.max(actual_output) )) + print np.min(goal_output), np.min(actual_output) + print np.sum(goal_output), np.sum(actual_output) + print "0 =", np.max(abs(actual_output - goal_output)), "atol" + print "0 =", np.max(abs(1.0 - actual_output / goal_output)), "rtol" + + #print goal_output[0,10,:5,:5] + #print np.array(actual_output[0,10,:5,:5]) + print "Is this correct?", np.allclose(goal_output, actual_output, atol=1e-05) + + print "caffe", np.mean(caffe_time) + print "lasagne", np.mean(lasagne_time) + + + diff --git a/modelzoo/vgg16.pkl b/modelzoo/vgg16.pkl new file mode 100644 index 0000000..1c6f961 --- /dev/null +++ b/modelzoo/vgg16.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2675fe3c55c58faa565010232eba02a26b6dae77b817e2ae22acd378558eed9 +size 1564313465 diff --git a/modelzoo/vgg16.py b/modelzoo/vgg16.py index 660cb04..20d9ce7 100644 --- a/modelzoo/vgg16.py +++ b/modelzoo/vgg16.py @@ -1,41 +1,102 @@ -# VGG-16, 16-layer model from the paper: -# "Very Deep Convolutional Networks for Large-Scale Image Recognition" -# Original source: https://gist.github.com/ksimonyan/211839e770f7b538e2d8 -# License: non-commercial use only - -# Download pretrained weights from: -# https://s3.amazonaws.com/lasagne/recipes/pretrained/imagenet/vgg16.pkl - -from lasagne.layers import InputLayer, DenseLayer, NonlinearityLayer -from lasagne.layers.dnn import Conv2DDNNLayer as ConvLayer -from lasagne.layers import Pool2DLayer as PoolLayer -from lasagne.nonlinearities import softmax - - -def build_model(): - net = {} - net['input'] = InputLayer((None, 3, 224, 224)) - net['conv1_1'] = ConvLayer(net['input'], 64, 3, pad=1) - net['conv1_2'] = ConvLayer(net['conv1_1'], 64, 3, pad=1) - net['pool1'] = PoolLayer(net['conv1_2'], 2) - net['conv2_1'] = ConvLayer(net['pool1'], 128, 3, pad=1) - net['conv2_2'] = ConvLayer(net['conv2_1'], 128, 3, pad=1) - net['pool2'] = PoolLayer(net['conv2_2'], 2) - net['conv3_1'] = ConvLayer(net['pool2'], 256, 3, pad=1) - net['conv3_2'] = ConvLayer(net['conv3_1'], 256, 3, pad=1) - net['conv3_3'] = ConvLayer(net['conv3_2'], 256, 3, pad=1) - net['pool3'] = PoolLayer(net['conv3_3'], 2) - net['conv4_1'] = ConvLayer(net['pool3'], 512, 3, pad=1) - net['conv4_2'] = ConvLayer(net['conv4_1'], 512, 3, pad=1) - net['conv4_3'] = ConvLayer(net['conv4_2'], 512, 3, pad=1) - net['pool4'] = PoolLayer(net['conv4_3'], 2) - net['conv5_1'] = ConvLayer(net['pool4'], 512, 3, pad=1) - net['conv5_2'] = ConvLayer(net['conv5_1'], 512, 3, pad=1) - net['conv5_3'] = ConvLayer(net['conv5_2'], 512, 3, pad=1) - net['pool5'] = PoolLayer(net['conv5_3'], 2) - net['fc6'] = DenseLayer(net['pool5'], num_units=4096) - net['fc7'] = DenseLayer(net['fc6'], num_units=4096) - net['fc8'] = DenseLayer(net['fc7'], num_units=1000, nonlinearity=None) - net['prob'] = NonlinearityLayer(net['fc8'], softmax) - - return net +# coding=utf-8 + +import numpy as np +import lasagne as nn +from lasagne.layers import dnn +from functools import partial +import os +import utils +import pickle + +conv3 = partial(dnn.Conv2DDNNLayer, + stride=(1, 1), + border_mode="same", + filter_size=(3, 3), + nonlinearity=nn.nonlinearities.rectify) + +dense = partial(nn.layers.DenseLayer, + nonlinearity=nn.nonlinearities.rectify) + +max_pool = partial(dnn.MaxPool2DDNNLayer, + pool_size=(2, 2), + stride=(2, 2)) + + +def vgg16(batch_shape): + """ + Create a vgg16, with the parameters from http://www.robots.ox.ac.uk/~vgg/research/very_deep/ + See googlenet.py for the method used to convert these caffe parameters to lasagne parameters. + :param batch_shape: The shape of the input images. This should be of size (N, 3, X>=224, Y>=224). Note flexible + image size, as the last dense layers have been implemented here with convolutional layers. + :return: a struct with the input layer, the logit layer (before the final softmax) and the output layer. + """ + l_in = nn.layers.InputLayer(shape=batch_shape) + l = l_in + + l = conv3(l, num_filters=64) + l = conv3(l, num_filters=64) + + l = max_pool(l) + + l = conv3(l, num_filters=128) + l = conv3(l, num_filters=128) + + l = max_pool(l) + + l = conv3(l, num_filters=256) + l = conv3(l, num_filters=256) + l = conv3(l, num_filters=256) + + l = max_pool(l) + + l = conv3(l, num_filters=512) + l = conv3(l, num_filters=512) + l = conv3(l, num_filters=512) + + l = max_pool(l) + + l = conv3(l, num_filters=512) + l = conv3(l, num_filters=512) + l = conv3(l, num_filters=512) + + l = max_pool(l) + + l = dnn.Conv2DDNNLayer(l, + num_filters=4096, + stride=(1, 1), + border_mode="valid", + filter_size=(7, 7)) + l = dnn.Conv2DDNNLayer(l, + num_filters=4096, + stride=(1, 1), + border_mode="same", + filter_size=(1, 1)) + + l_logit = dnn.Conv2DDNNLayer(l, + num_filters=1000, + stride=(1, 1), + border_mode="same", + filter_size=(1, 1), + nonlinearity=None) + + l_logit_flat = nn.layers.FlattenLayer(l_logit) + l_dense = nn.layers.NonlinearityLayer(l_logit_flat, nonlinearity=nn.nonlinearities.softmax) + + filename = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'vgg16.pkl') + + if os.path.exists(filename): + with open(filename, 'r') as f: + nn.layers.set_all_param_values(l_dense, pickle.load(f)) + + return utils.struct( + input=l_in, + logit=l_logit, + out=l_dense + ) + + +if __name__ == "__main__": + model = vgg16((1, 3, 224, 224)) + nn.layers.set_all_param_values(model.out, np.load("data/vgg16.npy")) + filename = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'vgg16.pkl') + pickle.dump(nn.layers.get_all_param_values(model.out), open(filename, 'w')) diff --git a/modelzoo/vgg19.pkl b/modelzoo/vgg19.pkl new file mode 100644 index 0000000..ed81afd --- /dev/null +++ b/modelzoo/vgg19.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:619780f9be54845dfa1765802fde1a287a1daeb973343165ad92e929ce6f3732 +size 1625650576 diff --git a/modelzoo/vgg19.py b/modelzoo/vgg19.py index f13d91c..cb53c42 100644 --- a/modelzoo/vgg19.py +++ b/modelzoo/vgg19.py @@ -1,44 +1,103 @@ -# VGG-19, 19-layer model from the paper: -# "Very Deep Convolutional Networks for Large-Scale Image Recognition" -# Original source: https://gist.github.com/ksimonyan/3785162f95cd2d5fee77 -# License: non-commercial use only - -# Download pretrained weights from: -# https://s3.amazonaws.com/lasagne/recipes/pretrained/imagenet/vgg19.pkl - -from lasagne.layers import InputLayer, DenseLayer, NonlinearityLayer -from lasagne.layers.dnn import Conv2DDNNLayer as ConvLayer -from lasagne.layers import Pool2DLayer as PoolLayer -from lasagne.nonlinearities import softmax - - -def build_model(): - net = {} - net['input'] = InputLayer((None, 3, 224, 224)) - net['conv1_1'] = ConvLayer(net['input'], 64, 3, pad=1) - net['conv1_2'] = ConvLayer(net['conv1_1'], 64, 3, pad=1) - net['pool1'] = PoolLayer(net['conv1_2'], 2) - net['conv2_1'] = ConvLayer(net['pool1'], 128, 3, pad=1) - net['conv2_2'] = ConvLayer(net['conv2_1'], 128, 3, pad=1) - net['pool2'] = PoolLayer(net['conv2_2'], 2) - net['conv3_1'] = ConvLayer(net['pool2'], 256, 3, pad=1) - net['conv3_2'] = ConvLayer(net['conv3_1'], 256, 3, pad=1) - net['conv3_3'] = ConvLayer(net['conv3_2'], 256, 3, pad=1) - net['conv3_4'] = ConvLayer(net['conv3_3'], 256, 3, pad=1) - net['pool3'] = PoolLayer(net['conv3_4'], 2) - net['conv4_1'] = ConvLayer(net['pool3'], 512, 3, pad=1) - net['conv4_2'] = ConvLayer(net['conv4_1'], 512, 3, pad=1) - net['conv4_3'] = ConvLayer(net['conv4_2'], 512, 3, pad=1) - net['conv4_4'] = ConvLayer(net['conv4_3'], 512, 3, pad=1) - net['pool4'] = PoolLayer(net['conv4_4'], 2) - net['conv5_1'] = ConvLayer(net['pool4'], 512, 3, pad=1) - net['conv5_2'] = ConvLayer(net['conv5_1'], 512, 3, pad=1) - net['conv5_3'] = ConvLayer(net['conv5_2'], 512, 3, pad=1) - net['conv5_4'] = ConvLayer(net['conv5_3'], 512, 3, pad=1) - net['pool5'] = PoolLayer(net['conv5_4'], 2) - net['fc6'] = DenseLayer(net['pool5'], num_units=4096) - net['fc7'] = DenseLayer(net['fc6'], num_units=4096) - net['fc8'] = DenseLayer(net['fc7'], num_units=1000, nonlinearity=None) - net['prob'] = NonlinearityLayer(net['fc8'], softmax) - - return net +import numpy as np +import lasagne as nn +from lasagne.layers import dnn +from functools import partial +import os +import utils +import pickle + +conv3 = partial(dnn.Conv2DDNNLayer, + stride=(1, 1), + border_mode="same", + filter_size=(3, 3), + nonlinearity=nn.nonlinearities.rectify) + +dense = partial(nn.layers.DenseLayer, + nonlinearity=nn.nonlinearities.rectify) + +max_pool = partial(dnn.MaxPool2DDNNLayer, + pool_size=(2, 2), + stride=(2, 2)) + + +def vgg19(batch_shape): + """ + Create a vgg19, with the parameters from http://www.robots.ox.ac.uk/~vgg/research/very_deep/ + See googlenet.py for the method used to convert these caffe parameters to lasagne parameters. + :param batch_shape: The shape of the input images. This should be of size (N, 3, X>=224, Y>=224). Note flexible + image size, as the last dense layers have been implemented here with convolutional layers. + :return: a struct with the input layer, the logit layer (before the final softmax) and the output layer. + """ + l_in = nn.layers.InputLayer(shape=batch_shape) + l = l_in + + l = conv3(l, num_filters=64) + l = conv3(l, num_filters=64) + + l = max_pool(l) + + l = conv3(l, num_filters=128) + l = conv3(l, num_filters=128) + + l = max_pool(l) + + l = conv3(l, num_filters=256) + l = conv3(l, num_filters=256) + l = conv3(l, num_filters=256) + l = conv3(l, num_filters=256) + + l = max_pool(l) + + l = conv3(l, num_filters=512) + l = conv3(l, num_filters=512) + l = conv3(l, num_filters=512) + l = conv3(l, num_filters=512) + + l = max_pool(l) + + l = conv3(l, num_filters=512) + l = conv3(l, num_filters=512) + l = conv3(l, num_filters=512) + l = conv3(l, num_filters=512) + + l = max_pool(l) + + l = dnn.Conv2DDNNLayer(l, + num_filters=4096, + stride=(1, 1), + border_mode="valid", + filter_size=(7, 7)) + l = dnn.Conv2DDNNLayer(l, + num_filters=4096, + stride=(1, 1), + border_mode="same", + filter_size=(1, 1)) + + l_logit = dnn.Conv2DDNNLayer(l, + num_filters=1000, + stride=(1, 1), + border_mode="same", + filter_size=(1, 1), + nonlinearity=None) + + l_logit_flat = nn.layers.FlattenLayer(l_logit) + l_dense = nn.layers.NonlinearityLayer(l_logit_flat, nonlinearity=nn.nonlinearities.softmax) + + filename = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'vgg19.pkl') + + if os.path.exists(filename): + with open(filename, 'r') as f: + nn.layers.set_all_param_values(l_dense, pickle.load(f)) + + return utils.struct( + input=l_in, + out=l_dense, + logit=l_logit + ) + + +if __name__ == "__main__": + model = vgg19((1, 3, 224, 224)) + nn.layers.set_all_param_values(model.out, np.load("data/vgg19.npy")) + filename = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'vgg19.pkl') + pickle.dump(nn.layers.get_all_param_values(model.out), open(filename, 'w'))