From db4cd06576a783bd2daef567141c9f4b3a2bf70c Mon Sep 17 00:00:00 2001 From: Julian Simioni Date: Mon, 29 Oct 2018 13:03:07 -0400 Subject: [PATCH] feat(log): Add full clean context to parser logs Without the unmodified text input, it's hard to tell what cases is causing these messages. --- query/autocomplete.js | 2 +- query/search_original.js | 2 +- query/text_parser_addressit.js | 44 +++++++++++++++++----------------- sanitizer/_text_addressit.js | 8 +++---- 4 files changed, 27 insertions(+), 29 deletions(-) diff --git a/query/autocomplete.js b/query/autocomplete.js index f16adc44c..0f084d33d 100644 --- a/query/autocomplete.js +++ b/query/autocomplete.js @@ -127,7 +127,7 @@ function generateQuery( clean ){ // run the address parser if( clean.parsed_text ){ - textParser( clean.parsed_text, vs ); + textParser( clean, vs ); } return { diff --git a/query/search_original.js b/query/search_original.js index 2bcaeb700..09f671e06 100644 --- a/query/search_original.js +++ b/query/search_original.js @@ -130,7 +130,7 @@ function generateQuery( clean ){ // run the address parser if( clean.parsed_text ){ - textParser( clean.parsed_text, vs ); + textParser( clean, vs ); } return { diff --git a/query/text_parser_addressit.js b/query/text_parser_addressit.js index 72a82831a..15a77aa1b 100644 --- a/query/text_parser_addressit.js +++ b/query/text_parser_addressit.js @@ -15,58 +15,58 @@ var adminFields = placeTypes.concat([ **/ // all the address parsing logic -function addParsedVariablesToQueryVariables( parsed_text, vs ){ +function addParsedVariablesToQueryVariables( clean, vs ){ // is it a street address? - var isStreetAddress = parsed_text.hasOwnProperty('number') && parsed_text.hasOwnProperty('street'); + var isStreetAddress = clean.parsed_text.hasOwnProperty('number') && clean.parsed_text.hasOwnProperty('street'); if( isStreetAddress ){ - vs.var( 'input:name', parsed_text.number + ' ' + parsed_text.street ); + vs.var( 'input:name', clean.parsed_text.number + ' ' + clean.parsed_text.street ); } // ? - else if( parsed_text.admin_parts ) { - vs.var( 'input:name', parsed_text.name ); + else if( clean.parsed_text.admin_parts ) { + vs.var( 'input:name', clean.parsed_text.name ); } // ? else { logger.warn( 'chaos monkey asks: what happens now?', { - parsed_text: parsed_text + params: clean }); } // ==== add parsed matches [address components] ==== // house number - if( parsed_text.hasOwnProperty('number') ){ - vs.var( 'input:housenumber', parsed_text.number ); + if( clean.parsed_text.hasOwnProperty('number') ){ + vs.var( 'input:housenumber', clean.parsed_text.number ); } // street name - if( parsed_text.hasOwnProperty('street') ){ - vs.var( 'input:street', parsed_text.street ); + if( clean.parsed_text.hasOwnProperty('street') ){ + vs.var( 'input:street', clean.parsed_text.street ); } // postal code - if( parsed_text.hasOwnProperty('postalcode') ){ - vs.var( 'input:postcode', parsed_text.postalcode ); + if( clean.parsed_text.hasOwnProperty('postalcode') ){ + vs.var( 'input:postcode', clean.parsed_text.postalcode ); } // ==== add parsed matches [admin components] ==== // city - if( parsed_text.hasOwnProperty('city') ){ - vs.var( 'input:county', parsed_text.city ); + if( clean.parsed_text.hasOwnProperty('city') ){ + vs.var( 'input:county', clean.parsed_text.city ); } // state - if( parsed_text.hasOwnProperty('state') ){ - vs.var( 'input:region_a', parsed_text.state ); + if( clean.parsed_text.hasOwnProperty('state') ){ + vs.var( 'input:region_a', clean.parsed_text.state ); } // country - if( parsed_text.hasOwnProperty('country') ){ - vs.var( 'input:country_a', parsed_text.country ); + if( clean.parsed_text.hasOwnProperty('country') ){ + vs.var( 'input:country_a', clean.parsed_text.country ); } // ==== deal with the 'leftover' components ==== @@ -74,11 +74,11 @@ function addParsedVariablesToQueryVariables( parsed_text, vs ){ // a concept called 'leftovers' which is just 'admin_parts' /or 'regions'. var leftoversString = ''; - if( parsed_text.hasOwnProperty('admin_parts') ){ - leftoversString = parsed_text.admin_parts; + if( clean.parsed_text.hasOwnProperty('admin_parts') ){ + leftoversString = clean.parsed_text.admin_parts; } - else if( parsed_text.hasOwnProperty('regions') ){ - leftoversString = parsed_text.regions.join(' '); + else if( clean.parsed_text.hasOwnProperty('regions') ){ + leftoversString = clean.parsed_text.regions.join(' '); } // if we have 'leftovers' then assign them to any fields which diff --git a/sanitizer/_text_addressit.js b/sanitizer/_text_addressit.js index 70a4bd24a..1918308eb 100644 --- a/sanitizer/_text_addressit.js +++ b/sanitizer/_text_addressit.js @@ -29,7 +29,7 @@ function _sanitize( raw, clean ){ delete clean.parsed_text; // parse text with query parser - var parsed_text = parse(clean.text); + var parsed_text = parse(clean.text, clean); if (check.assigned(parsed_text)) { clean.parsed_text = parsed_text; } @@ -51,7 +51,7 @@ module.exports = () => ({ // this is the addressit functionality from https://github.com/pelias/text-analyzer/blob/master/src/addressItParser.js var DELIM = ','; -function parse(query) { +function parse(query, clean) { var getAdminPartsBySplittingOnDelim = function(queryParts) { // naive approach - for admin matching during query time // split 'flatiron, new york, ny' into 'flatiron' and 'new york, ny' @@ -110,12 +110,10 @@ function parse(query) { // if all we found was regions, ignore it as it is not enough information to make smarter decisions if (Object.keys(parsed_text).length === 1 && !_.isUndefined(parsed_text.regions)) { logger.info('Ignoring address parser output, regions only', { - text: query.text, - parsed: parsed_text + params: clean }); return null; } return parsed_text; - }