Skip to content

Commit

Permalink
Use knowledge of the form structure to better handle selects
Browse files Browse the repository at this point in the history
Introduces util/xform.js to centralize parsing of XForms.

Fixes #96
  • Loading branch information
mojodna committed Apr 13, 2017
1 parent e4b2b4e commit db6f813
Show file tree
Hide file tree
Showing 5 changed files with 194 additions and 106 deletions.
49 changes: 2 additions & 47 deletions api/odk/controllers/upload-form.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,10 @@ const multiparty = require('multiparty');
const mv = require('mv');
const PythonShell = require('python-shell');
const tempy = require('tempy');
const { parseString } = require('xml2js');

const settings = require('../../../settings');
const { getForms, loadXForm } = require('../../../util/xform');

const formsDir = settings.dataDir + '/forms/';

/**
Expand All @@ -36,52 +37,6 @@ const xlsToXForm = (xlsPath, callback) => {
});
};

const loadXForm = (xformPath, callback) => {
return async.waterfall([
async.apply(fs.readFile, xformPath),
parseString
], (err, obj) => {
if (err) {
return callback(err);
}

try {
const key = Object.keys(obj['h:html']['h:head'][0]['model'][0]['instance'][0])[0];

return callback(null, {
path: xformPath,
filename: path.basename(xformPath),
title: obj['h:html']['h:head'][0]['h:title'][0],
id: obj['h:html']['h:head'][0]['model'][0]['instance'][0][key][0]['$']['id'],
form: obj
});
} catch (err) {
return callback(new Error(`Failed while parsing ${xformPath}: ${err.message}`));
}
});
};

const getForms = (callback) => {
return fs.readdir(formsDir, (err, forms) => {
if (err) {
return callback(err);
}

return async.waterfall([
async.apply(async.map,
forms
.filter(x => x.match(/\.xml$/i))
.map(x => path.join(formsDir, x)),
/* eslint handle-callback-err: 0 */
(filename, callback) =>
loadXForm(filename, (err, form) =>
// ignore errors
callback(null, form))),
(forms, callback) => callback(null, forms.filter(x => x != null))
], callback);
});
};

/**
* User uploads an XLSForm (Excel ODK Form).
* XLSForms are converted to XForm with pyxform, and both
Expand Down
149 changes: 90 additions & 59 deletions api/odk/helpers/aggregate-submissions.js
Original file line number Diff line number Diff line change
@@ -1,19 +1,21 @@
'use strict';

var fs = require('fs');
var path = require('path');
const fs = require('fs');
const path = require('path');

var JSONStream = require('JSONStream');
var async = require('async');
const async = require('async');
const JSONStream = require('JSONStream');
const xpath = require('xml2js-xpath');

var settings = require('../../../settings');
const settings = require('../../../settings');
const { getFormMetadata } = require('../../../util/xform');

var ASYNC_LIMIT = 10;
const ASYNC_LIMIT = 10;

module.exports = function (opts, callback) {
var formName = opts.formName;
var limit = parseInt(opts.limit);
var offset = opts.offset;
module.exports = (opts, callback) => {
const { formName } = opts;
let { offset } = opts;
let limit = parseInt(opts.limit);

// default to 100 for limit
if (isNaN(limit) || limit < 1) {
Expand Down Expand Up @@ -46,69 +48,98 @@ module.exports = function (opts, callback) {
});
}

var dir = settings.dataDir + '/submissions/' + formName;
var aggregate = [];

// All of the submission dirs in the form directory
// Note that fs.readdir is always in alphabetical order on POSIX systems.
return fs.readdir(dir, function (err, submissionDirs) {
return getFormMetadata(formName, (err, meta) => {
if (err) {
if (err.errno === -2) {
// trying to open a directory that is not there.
// TODO pass an Error
return callback({
status: 404,
msg: 'You are trying to aggregate the ODK submissions for a form that has no submissions. Please submit at least one survey to see data. Also, check to see if you spelled the form name correctly. Form name: ' + formName,
err
});
}

// TODO pass an error
return callback({
status: 500,
msg: 'Problem reading submissions directory.',
err: err
msg: 'Could not read form metadata.',
err
});
}

// if offset, we do pagination
if (offset != null) {
submissionDirs = submissionDirs.slice(offset, offset + limit);
}
const selectFields = Object.keys(meta.fields).filter(k => meta.fields[k] === 'select');
const selectItems = selectFields.reduce((obj, k) => {
obj[k] = xpath.find(meta.form, `//h:body/select[@ref='/${meta.instanceName}/${k}']/item`)
.reduce((obj2, item) => {
obj2[item.label[0]] = item.value[0];

return async.eachLimit(submissionDirs, ASYNC_LIMIT, function (submissionDir, next) {
// If it's not a directory, we just skip processing that path.
if (submissionDir[0] === '.' || submissionDir.indexOf('.txt') > 0) {
return next(); // ok, but skipping
}
// Otherwise, we want to open up the data.json in the submission dir.
var dataFile = path.join(dir, submissionDir, 'data.json');
try {
var parser = fs.createReadStream(dataFile).pipe(JSONStream.parse());
return obj2;
}, {});

parser.on('data', data => {
aggregate.push(data);
return obj;
}, {});

return next(); // ok submission
});
const dir = settings.dataDir + '/submissions/' + formName;
const aggregate = [];

parser.on('error', err => next(err));
} catch (err) {
// TODO pass an Error
return next({
// All of the submission dirs in the form directory
// Note that fs.readdir is always in alphabetical order on POSIX systems.
return fs.readdir(dir, (err, submissionDirs) => {
if (err) {
if (err.errno === -2) {
// trying to open a directory that is not there.
// TODO pass an Error
return callback({
status: 404,
msg: 'You are trying to aggregate the ODK submissions for a form that has no submissions. Please submit at least one survey to see data. Also, check to see if you spelled the form name correctly. Form name: ' + formName,
err
});
}

// TODO pass an error
return callback({
status: 500,
msg: 'Problem reading data.json file in submission directory. dataFile: ' + dataFile,
err
}); // we have an error, break out of all async iteration
msg: 'Problem reading submissions directory.',
err: err
});
}
}, function (err) {
// an error occurred...
if (err) {
return callback(err);

// if offset, we do pagination
if (offset != null) {
submissionDirs = submissionDirs.slice(offset, offset + limit);
}

// it was a success
return callback(null, aggregate);
return async.eachLimit(submissionDirs, ASYNC_LIMIT, (submissionDir, next) => {
// If it's not a directory, we just skip processing that path.
// TODO use fs.stat() and check for stats.isFile()
if (submissionDir[0] === '.' || submissionDir.indexOf('.txt') > 0) {
return next(); // ok, but skipping
}
// Otherwise, we want to open up the data.json in the submission dir.
const dataFile = path.join(dir, submissionDir, 'data.json');
try {
const parser = fs.createReadStream(dataFile).pipe(JSONStream.parse());

parser.on('data', data => {
const submission = Object.keys(data).reduce((obj, k) => {
obj[k] = data[k];

if (meta.fields[k] === 'select') {
const values = data[k].split(' ');

Object.keys(selectItems[k]).forEach(itemKey => {
obj[`${k}/${selectItems[k][itemKey]}`] = values.indexOf(selectItems[k][itemKey]) >= 0;
});
}

return obj;
}, {});

aggregate.push(submission);

return next(); // ok submission
});

parser.on('error', err => next(err));
} catch (err) {
// TODO pass an Error
return next({
status: 500,
msg: 'Problem reading data.json file in submission directory. dataFile: ' + dataFile,
err
}); // we have an error, break out of all async iteration
}
}, err => callback(err, aggregate));
});
});
};
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
"type-is": "^1.6.10",
"xform-to-json": "^1.2.1",
"xml2js": "^0.4.16",
"xml2js-xpath": "^0.8.0",
"xmlbuilder": "^8.2.2",
"xtend": "^4.0.1"
},
Expand Down
91 changes: 91 additions & 0 deletions util/xform.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
'use strict';

const fs = require('fs');
const path = require('path');

const async = require('async');
const { parseString } = require('xml2js');
const xpath = require('xml2js-xpath');

const settings = require('../settings');

const FORMS_DIR = path.join(settings.dataDir, 'forms/');

const loadXForm = (xformPath, callback) => {
return async.waterfall([
async.apply(fs.readFile, xformPath),
parseString
], (err, form) => {
if (err) {
return callback(err);
}

try {
const instanceName = path.basename(xformPath, path.extname(xformPath));
const fieldNames = Object
.keys(xpath.evalFirst(form, `//model/instance/${instanceName}`))
.filter(x => x !== '$');

const fields = fieldNames.reduce((obj, k) => {
const node = xpath.evalFirst(form, `//model/bind[@nodeset='/${instanceName}/${k}']`);

if (node != null) {
obj[k] = node.$.type;
}

return obj;
}, {});

return callback(null, {
path: xformPath,
fields,
filename: path.basename(xformPath),
form,
id: xpath.evalFirst(form, '//model/instance/@id').$.id,
instanceName,
title: xpath.evalFirst(form, '//h:title')
});
} catch (err) {
return callback(new Error(`Failed while parsing ${xformPath}: ${err.message}`));
}
});
};

const getForms = (callback) => {
return fs.readdir(FORMS_DIR, (err, forms) => {
if (err) {
return callback(err);
}

return async.waterfall([
async.apply(async.map,
forms
.filter(x => x.match(/\.xml$/i))
.map(x => path.join(FORMS_DIR, x)),
/* eslint handle-callback-err: 0 */
(filename, callback) =>
loadXForm(filename, (err, form) =>
// ignore errors
callback(null, form))),
(forms, callback) => callback(null, forms.filter(x => x != null))
], callback);
});
};

const getFormMetadata = (formName, callback) => {
return getForms((err, forms) => {
if (err) {
return callback(err);
}

const meta = forms.filter(x => x.id === formName).pop();

return callback(null, meta);
});
};

module.exports = {
getFormMetadata,
getForms,
loadXForm
};
10 changes: 10 additions & 0 deletions yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -1350,6 +1350,10 @@ lodash.uniq@^4.3.0:
version "4.5.0"
resolved "https://registry.yarnpkg.com/lodash.uniq/-/lodash.uniq-4.5.0.tgz#d0225373aeb652adc1bc82e4945339a842754773"

[email protected]:
version "4.17.2"
resolved "https://registry.yarnpkg.com/lodash/-/lodash-4.17.2.tgz#34a3055babe04ce42467b607d700072c7ff6bf42"

lodash@^3.0.1, lodash@^3.5.0:
version "3.10.1"
resolved "https://registry.yarnpkg.com/lodash/-/lodash-3.10.1.tgz#5bf45e8e49ba4189e17d482789dfd15bd140b7b6"
Expand Down Expand Up @@ -2349,6 +2353,12 @@ xform-to-json@^1.2.1:
uuid "^2.0.1"
xml2js "^0.4.4"

xml2js-xpath@^0.8.0:
version "0.8.0"
resolved "https://registry.yarnpkg.com/xml2js-xpath/-/xml2js-xpath-0.8.0.tgz#4d3079a059de7b19fd1b4facdefab3757db45611"
dependencies:
lodash "4.17.2"

xml2js@^0.4.16, xml2js@^0.4.4:
version "0.4.17"
resolved "https://registry.yarnpkg.com/xml2js/-/xml2js-0.4.17.tgz#17be93eaae3f3b779359c795b419705a8817e868"
Expand Down

0 comments on commit db6f813

Please sign in to comment.