diff --git a/autoencoder.js b/autoencoder.js index 18ac063..ca8e3be 100644 --- a/autoencoder.js +++ b/autoencoder.js @@ -24,7 +24,11 @@ Autoencoder.prototype.getOutput = function(row) { else if (row[j] > x) input.push(-1.0); } - var row = input; + + // Normalize input + var k = input.map(Math.abs).reduce(function(a, b) { return a + b;}, 0); + var row = input.map(function(x) { return x * Math.pow(k+1, -0.5); }); + for (var layer = 0; layer < this.Ws.length; layer++) { // console.log(this.Ws[layer].length + ' * ' + this.Ws[layer][0].length); // Compute row * Ws[layer] + bs[layer] diff --git a/autoencoder.py b/autoencoder.py index c18ef48..5aa56cc 100644 --- a/autoencoder.py +++ b/autoencoder.py @@ -43,8 +43,9 @@ def get_row(headers, K, data_row, splits, headers_keep=None): if x < x_split: V_row[k] = 1 - if headers_keep is not None and headers[j] not in headers_keep: - Q_row[k] = 1 + if headers_keep is not None: + if headers[j] not in headers_keep: + Q_row[k] = 1 return V_row, M_row, Q_row @@ -53,14 +54,16 @@ def build_matrices(headers, data, D, K, splits, batch_size=200): V = numpy.zeros((D, K), dtype=theano.config.floatX) M = numpy.zeros((D, K), dtype=theano.config.floatX) Q = numpy.zeros((D, K), dtype=theano.config.floatX) + k = numpy.zeros((D, ), dtype=theano.config.floatX) for i, data_row in enumerate(random.sample(data, batch_size)): # How many header should we remove n_headers_keep = random.randint(0, len(headers)) headers_keep = set(random.sample(headers, n_headers_keep)) V[i], M[i], Q[i] = get_row(headers, K, data_row, splits, headers_keep) + k[i] = len([h for h in headers if h in headers_keep and data_row]) - return V, M, Q + return V, M, Q, k def W_values(n_in, n_out): @@ -90,11 +93,15 @@ def get_model(Ws, bs, dropout=False): v = T.matrix('input') m = T.matrix('missing') q = T.matrix('target') + k = T.vector('normalization factor') # Set all missing/target values to 0.5 keep_mask = (1-m) * (1-q) h = keep_mask * (v * 2 - 1) # Convert to +1, -1 + # Normalize layer 0 + h *= (k.dimshuffle(0, 'x') + 1)** -0.5 + for l in xrange(len(Ws)): h = T.dot(h, Ws[l]) + bs[l] @@ -109,7 +116,7 @@ def get_model(Ws, bs, dropout=False): # loss = -(q * LL).sum() / q.sum() loss = -((1 - m) * LL).sum() / (1 - m).sum() - return v, m, q, output, loss + return v, m, q, k, output, loss def nesterov_updates(loss, all_params, learn_rate, momentum, weight_decay): @@ -128,14 +135,14 @@ def nesterov_updates(loss, all_params, learn_rate, momentum, weight_decay): def get_train_f(Ws, bs): learning_rate = T.scalar('learning rate') - v, m, q, output, loss = get_model(Ws, bs, dropout=False) + v, m, q, k, output, loss = get_model(Ws, bs, dropout=False) updates = nesterov_updates(loss, Ws + bs, learning_rate, 0.9, 1e-6) - return theano.function([v, m, q, learning_rate], loss, updates=updates) + return theano.function([v, m, q, k, learning_rate], loss, updates=updates) def get_pred_f(Ws, bs): - v, m, q, output, loss = get_model(Ws, bs, dropout=False) - return theano.function([v, m, q], output) + v, m, q, k, output, loss = get_model(Ws, bs, dropout=False) + return theano.function([v, m, q, k], output) def train(headers, data, header_plot_x=None, header_plot_y=None, n_hidden_layers=4, n_hidden_units=128, bins=40): @@ -153,8 +160,8 @@ def train(headers, data, header_plot_x=None, header_plot_y=None, n_hidden_layers t0 = time.time() for iter in xrange(1000000): learning_rate = 1.0 * math.exp(-(time.time() - t0) / 3600) - V, M, Q = build_matrices(headers, data, D, K, splits) - print train_f(V, M, Q, learning_rate), learning_rate + V, M, Q, k = build_matrices(headers, data, D, K, splits) + print train_f(V, M, Q, k, learning_rate), learning_rate if (iter + 1) % 10 == 0: yield {'K': K, 'bins': bins, 'splits': splits, 'headers': headers, @@ -172,7 +179,7 @@ def train(headers, data, header_plot_x=None, header_plot_y=None, n_hidden_layers data_row = {headers[j]: x_split} V, M, Q = [x.reshape((1, K)) for x in get_row(headers, K, data_row, splits)] - P = pred_f(V, M, Q) + P = pred_f(V, M, Q, k) xs = [] ys = []