From db6f813eb324a6dda81ca314c7770b8e2f2cbd43 Mon Sep 17 00:00:00 2001 From: Seth Fitzsimmons Date: Wed, 12 Apr 2017 18:46:19 -0700 Subject: [PATCH] Use knowledge of the form structure to better handle selects Introduces util/xform.js to centralize parsing of XForms. Fixes #96 --- api/odk/controllers/upload-form.js | 49 +------- api/odk/helpers/aggregate-submissions.js | 149 ++++++++++++++--------- package.json | 1 + util/xform.js | 91 ++++++++++++++ yarn.lock | 10 ++ 5 files changed, 194 insertions(+), 106 deletions(-) create mode 100644 util/xform.js diff --git a/api/odk/controllers/upload-form.js b/api/odk/controllers/upload-form.js index e99f3d10..82f99c63 100644 --- a/api/odk/controllers/upload-form.js +++ b/api/odk/controllers/upload-form.js @@ -9,9 +9,10 @@ const multiparty = require('multiparty'); const mv = require('mv'); const PythonShell = require('python-shell'); const tempy = require('tempy'); -const { parseString } = require('xml2js'); const settings = require('../../../settings'); +const { getForms, loadXForm } = require('../../../util/xform'); + const formsDir = settings.dataDir + '/forms/'; /** @@ -36,52 +37,6 @@ const xlsToXForm = (xlsPath, callback) => { }); }; -const loadXForm = (xformPath, callback) => { - return async.waterfall([ - async.apply(fs.readFile, xformPath), - parseString - ], (err, obj) => { - if (err) { - return callback(err); - } - - try { - const key = Object.keys(obj['h:html']['h:head'][0]['model'][0]['instance'][0])[0]; - - return callback(null, { - path: xformPath, - filename: path.basename(xformPath), - title: obj['h:html']['h:head'][0]['h:title'][0], - id: obj['h:html']['h:head'][0]['model'][0]['instance'][0][key][0]['$']['id'], - form: obj - }); - } catch (err) { - return callback(new Error(`Failed while parsing ${xformPath}: ${err.message}`)); - } - }); -}; - -const getForms = (callback) => { - return fs.readdir(formsDir, (err, forms) => { - if (err) { - return callback(err); - } - - return async.waterfall([ - async.apply(async.map, - forms - .filter(x => x.match(/\.xml$/i)) - .map(x => path.join(formsDir, x)), - /* eslint handle-callback-err: 0 */ - (filename, callback) => - loadXForm(filename, (err, form) => - // ignore errors - callback(null, form))), - (forms, callback) => callback(null, forms.filter(x => x != null)) - ], callback); - }); -}; - /** * User uploads an XLSForm (Excel ODK Form). * XLSForms are converted to XForm with pyxform, and both diff --git a/api/odk/helpers/aggregate-submissions.js b/api/odk/helpers/aggregate-submissions.js index c60d0760..ac0224ce 100644 --- a/api/odk/helpers/aggregate-submissions.js +++ b/api/odk/helpers/aggregate-submissions.js @@ -1,19 +1,21 @@ 'use strict'; -var fs = require('fs'); -var path = require('path'); +const fs = require('fs'); +const path = require('path'); -var JSONStream = require('JSONStream'); -var async = require('async'); +const async = require('async'); +const JSONStream = require('JSONStream'); +const xpath = require('xml2js-xpath'); -var settings = require('../../../settings'); +const settings = require('../../../settings'); +const { getFormMetadata } = require('../../../util/xform'); -var ASYNC_LIMIT = 10; +const ASYNC_LIMIT = 10; -module.exports = function (opts, callback) { - var formName = opts.formName; - var limit = parseInt(opts.limit); - var offset = opts.offset; +module.exports = (opts, callback) => { + const { formName } = opts; + let { offset } = opts; + let limit = parseInt(opts.limit); // default to 100 for limit if (isNaN(limit) || limit < 1) { @@ -46,69 +48,98 @@ module.exports = function (opts, callback) { }); } - var dir = settings.dataDir + '/submissions/' + formName; - var aggregate = []; - - // All of the submission dirs in the form directory - // Note that fs.readdir is always in alphabetical order on POSIX systems. - return fs.readdir(dir, function (err, submissionDirs) { + return getFormMetadata(formName, (err, meta) => { if (err) { - if (err.errno === -2) { - // trying to open a directory that is not there. - // TODO pass an Error - return callback({ - status: 404, - msg: 'You are trying to aggregate the ODK submissions for a form that has no submissions. Please submit at least one survey to see data. Also, check to see if you spelled the form name correctly. Form name: ' + formName, - err - }); - } - - // TODO pass an error return callback({ status: 500, - msg: 'Problem reading submissions directory.', - err: err + msg: 'Could not read form metadata.', + err }); } - // if offset, we do pagination - if (offset != null) { - submissionDirs = submissionDirs.slice(offset, offset + limit); - } + const selectFields = Object.keys(meta.fields).filter(k => meta.fields[k] === 'select'); + const selectItems = selectFields.reduce((obj, k) => { + obj[k] = xpath.find(meta.form, `//h:body/select[@ref='/${meta.instanceName}/${k}']/item`) + .reduce((obj2, item) => { + obj2[item.label[0]] = item.value[0]; - return async.eachLimit(submissionDirs, ASYNC_LIMIT, function (submissionDir, next) { - // If it's not a directory, we just skip processing that path. - if (submissionDir[0] === '.' || submissionDir.indexOf('.txt') > 0) { - return next(); // ok, but skipping - } - // Otherwise, we want to open up the data.json in the submission dir. - var dataFile = path.join(dir, submissionDir, 'data.json'); - try { - var parser = fs.createReadStream(dataFile).pipe(JSONStream.parse()); + return obj2; + }, {}); - parser.on('data', data => { - aggregate.push(data); + return obj; + }, {}); - return next(); // ok submission - }); + const dir = settings.dataDir + '/submissions/' + formName; + const aggregate = []; - parser.on('error', err => next(err)); - } catch (err) { - // TODO pass an Error - return next({ + // All of the submission dirs in the form directory + // Note that fs.readdir is always in alphabetical order on POSIX systems. + return fs.readdir(dir, (err, submissionDirs) => { + if (err) { + if (err.errno === -2) { + // trying to open a directory that is not there. + // TODO pass an Error + return callback({ + status: 404, + msg: 'You are trying to aggregate the ODK submissions for a form that has no submissions. Please submit at least one survey to see data. Also, check to see if you spelled the form name correctly. Form name: ' + formName, + err + }); + } + + // TODO pass an error + return callback({ status: 500, - msg: 'Problem reading data.json file in submission directory. dataFile: ' + dataFile, - err - }); // we have an error, break out of all async iteration + msg: 'Problem reading submissions directory.', + err: err + }); } - }, function (err) { - // an error occurred... - if (err) { - return callback(err); + + // if offset, we do pagination + if (offset != null) { + submissionDirs = submissionDirs.slice(offset, offset + limit); } - // it was a success - return callback(null, aggregate); + return async.eachLimit(submissionDirs, ASYNC_LIMIT, (submissionDir, next) => { + // If it's not a directory, we just skip processing that path. + // TODO use fs.stat() and check for stats.isFile() + if (submissionDir[0] === '.' || submissionDir.indexOf('.txt') > 0) { + return next(); // ok, but skipping + } + // Otherwise, we want to open up the data.json in the submission dir. + const dataFile = path.join(dir, submissionDir, 'data.json'); + try { + const parser = fs.createReadStream(dataFile).pipe(JSONStream.parse()); + + parser.on('data', data => { + const submission = Object.keys(data).reduce((obj, k) => { + obj[k] = data[k]; + + if (meta.fields[k] === 'select') { + const values = data[k].split(' '); + + Object.keys(selectItems[k]).forEach(itemKey => { + obj[`${k}/${selectItems[k][itemKey]}`] = values.indexOf(selectItems[k][itemKey]) >= 0; + }); + } + + return obj; + }, {}); + + aggregate.push(submission); + + return next(); // ok submission + }); + + parser.on('error', err => next(err)); + } catch (err) { + // TODO pass an Error + return next({ + status: 500, + msg: 'Problem reading data.json file in submission directory. dataFile: ' + dataFile, + err + }); // we have an error, break out of all async iteration + } + }, err => callback(err, aggregate)); }); }); }; diff --git a/package.json b/package.json index cf6c6bcc..5e8a3d05 100644 --- a/package.json +++ b/package.json @@ -57,6 +57,7 @@ "type-is": "^1.6.10", "xform-to-json": "^1.2.1", "xml2js": "^0.4.16", + "xml2js-xpath": "^0.8.0", "xmlbuilder": "^8.2.2", "xtend": "^4.0.1" }, diff --git a/util/xform.js b/util/xform.js new file mode 100644 index 00000000..07464199 --- /dev/null +++ b/util/xform.js @@ -0,0 +1,91 @@ +'use strict'; + +const fs = require('fs'); +const path = require('path'); + +const async = require('async'); +const { parseString } = require('xml2js'); +const xpath = require('xml2js-xpath'); + +const settings = require('../settings'); + +const FORMS_DIR = path.join(settings.dataDir, 'forms/'); + +const loadXForm = (xformPath, callback) => { + return async.waterfall([ + async.apply(fs.readFile, xformPath), + parseString + ], (err, form) => { + if (err) { + return callback(err); + } + + try { + const instanceName = path.basename(xformPath, path.extname(xformPath)); + const fieldNames = Object + .keys(xpath.evalFirst(form, `//model/instance/${instanceName}`)) + .filter(x => x !== '$'); + + const fields = fieldNames.reduce((obj, k) => { + const node = xpath.evalFirst(form, `//model/bind[@nodeset='/${instanceName}/${k}']`); + + if (node != null) { + obj[k] = node.$.type; + } + + return obj; + }, {}); + + return callback(null, { + path: xformPath, + fields, + filename: path.basename(xformPath), + form, + id: xpath.evalFirst(form, '//model/instance/@id').$.id, + instanceName, + title: xpath.evalFirst(form, '//h:title') + }); + } catch (err) { + return callback(new Error(`Failed while parsing ${xformPath}: ${err.message}`)); + } + }); +}; + +const getForms = (callback) => { + return fs.readdir(FORMS_DIR, (err, forms) => { + if (err) { + return callback(err); + } + + return async.waterfall([ + async.apply(async.map, + forms + .filter(x => x.match(/\.xml$/i)) + .map(x => path.join(FORMS_DIR, x)), + /* eslint handle-callback-err: 0 */ + (filename, callback) => + loadXForm(filename, (err, form) => + // ignore errors + callback(null, form))), + (forms, callback) => callback(null, forms.filter(x => x != null)) + ], callback); + }); +}; + +const getFormMetadata = (formName, callback) => { + return getForms((err, forms) => { + if (err) { + return callback(err); + } + + const meta = forms.filter(x => x.id === formName).pop(); + + return callback(null, meta); + }); +}; + +module.exports = { + getFormMetadata, + getForms, + loadXForm +}; diff --git a/yarn.lock b/yarn.lock index c9bddf09..1c145f51 100644 --- a/yarn.lock +++ b/yarn.lock @@ -1350,6 +1350,10 @@ lodash.uniq@^4.3.0: version "4.5.0" resolved "https://registry.yarnpkg.com/lodash.uniq/-/lodash.uniq-4.5.0.tgz#d0225373aeb652adc1bc82e4945339a842754773" +lodash@4.17.2: + version "4.17.2" + resolved "https://registry.yarnpkg.com/lodash/-/lodash-4.17.2.tgz#34a3055babe04ce42467b607d700072c7ff6bf42" + lodash@^3.0.1, lodash@^3.5.0: version "3.10.1" resolved "https://registry.yarnpkg.com/lodash/-/lodash-3.10.1.tgz#5bf45e8e49ba4189e17d482789dfd15bd140b7b6" @@ -2349,6 +2353,12 @@ xform-to-json@^1.2.1: uuid "^2.0.1" xml2js "^0.4.4" +xml2js-xpath@^0.8.0: + version "0.8.0" + resolved "https://registry.yarnpkg.com/xml2js-xpath/-/xml2js-xpath-0.8.0.tgz#4d3079a059de7b19fd1b4facdefab3757db45611" + dependencies: + lodash "4.17.2" + xml2js@^0.4.16, xml2js@^0.4.4: version "0.4.17" resolved "https://registry.yarnpkg.com/xml2js/-/xml2js-0.4.17.tgz#17be93eaae3f3b779359c795b419705a8817e868"