diff --git a/labelGenerator.js b/labelGenerator.js index 9c3dadf..d836647 100644 --- a/labelGenerator.js +++ b/labelGenerator.js @@ -9,14 +9,14 @@ function dedupeNameAndFirstLabelElement(labelParts) { // first, dedupe the name and 1st label array elements // this is used to ensure that the `name` and first admin hierarchy elements aren't repeated // eg - `["Lancaster", "Lancaster", "PA", "United States"]` -> `["Lancaster", "PA", "United States"]` - const deduped = _.uniq([labelParts.shift(), labelParts.shift()]); - // second, unshift the deduped parts back onto the labelParts - labelParts.unshift.apply(labelParts, deduped); - + // we take the first part because the layer should be the name and is required + if (labelParts[0].label === labelParts[1].label) { + const first = labelParts.shift(); + labelParts[0] = first; + } } return labelParts; - } function dedupeNameAndLastLabelElement(labelParts) { @@ -25,15 +25,14 @@ function dedupeNameAndLastLabelElement(labelParts) { // first, dedupe the name and second to last label array elements // this is used to ensure that the `name` and most granular admin hierarchy elements aren't repeated // eg - `["South Korea", "Seoul", "Seoul"]` -> `["South Korea", "Seoul"]` - const deduped = _.uniq([labelParts.pop(), labelParts.pop()]).reverse(); - - // second, unshift the deduped parts back onto the labelParts - labelParts.push.apply(labelParts, deduped); - + // we take the last part because the layer should be the name and is required + if (labelParts[labelParts.length - 1].label === labelParts[labelParts.length - 2].label) { + const last = labelParts.pop(); + labelParts[labelParts.length - 1] = last; + } } return labelParts; - } function getSchema(country_a) { @@ -42,7 +41,6 @@ function getSchema(country_a) { } return schemas.default; - } // this can go away once geonames is no longer supported @@ -69,7 +67,6 @@ function isUSAOrCAN(country_a) { function isGeonamesOrWhosOnFirst(source) { return 'geonames' === source || 'whosonfirst' === source; - } function isInUSAOrCAN(record) { @@ -81,10 +78,8 @@ function isInKOR(record) { } // helper function that sets a default label for non-US/CA regions and countries -function buildPrefixLabelParts(schema, record) { - if (isRegion(record.layer) && - isGeonamesOrWhosOnFirst(record.source) && - isInUSAOrCAN(record)) { +function buildPrefixLabelParts(schema, record, role = 'required') { + if (isRegion(record.layer) && isGeonamesOrWhosOnFirst(record.source) && isInUSAOrCAN(record)) { return []; } @@ -96,13 +91,17 @@ function buildPrefixLabelParts(schema, record) { return []; } + const street = []; + if (record.layer === 'venue' && record.street) { + street.push({ label: record.street, role: 'optional', layer: 'street' }); + } + // support name aliases if (Array.isArray(record.name.default)) { - return record.name.default.slice(0,1); + return _.concat({ label: record.name.default.slice(0, 1), role, layer: 'name' }, street); } - return [record.name.default]; - + return _.concat({ label: record.name.default, role, layer: 'name' }, street); } function buildAdminLabelPart(schema, record) { @@ -111,13 +110,16 @@ function buildAdminLabelPart(schema, record) { // iterate the schema for (const field in schema.valueFunctions) { const valueFunction = schema.valueFunctions[field]; - labelParts.push(valueFunction(record)); + const result = valueFunction(record); + if (result && result.label) { + labelParts.push(_.assign({ layer: field }, result)); + } } return labelParts; } -function buildPostfixLabelParts(schema, record) { +function buildPostfixLabelParts(schema, record, role = 'required') { if (!isInKOR(record)) { return []; } @@ -126,27 +128,26 @@ function buildPostfixLabelParts(schema, record) { if (isAddress(record.layer)) { if (record.street) { - labelParts.push(record.street); + labelParts.push({ label: record.street, role, layer: 'name' }); + } else if (record.neighbourhood) { + labelParts.push({ label: record.neighbourhood, role, layer: 'name' }); } - else if (record.neighbourhood) { - labelParts.push(record.neighbourhood); - } - labelParts.push(record.housenumber); + labelParts.push({ label: record.housenumber, role, layer: 'housenumber' }); return labelParts; } // support name aliases if (Array.isArray(record.name.default)) { - return record.name.default.slice(0,1); + return [{ label: record.name.default[0], role, layer: 'name' }]; } - return [record.name.default]; + return [{ label: record.name.default, role, layer: 'name' }]; } -module.exports = function( record ){ +function generator (record) { const schema = getSchema(record.country_a); - const separator = _.get(schema, ['meta','separator'], ', '); + const separator = _.get(schema, ['meta', 'separator'], ', '); // in virtually all cases, this will be the `name` field const prefixParts = buildPrefixLabelParts(schema, record); @@ -161,11 +162,16 @@ module.exports = function( record ){ // third, dedupe and join with a comma and return if (isInKOR(record)) { labelParts = dedupeNameAndLastLabelElement(labelParts); - } - else { + } else { labelParts = dedupeNameAndFirstLabelElement(labelParts); } - return labelParts.join(separator); + return { parts: labelParts, separator }; +} +module.exports = function(record) { + const { parts, separator } = generator(record); + return parts.filter(p => p.role === 'required').map(p => p.label).join(separator); }; + +module.exports.partsGenerator = generator; \ No newline at end of file diff --git a/labelSchema.js b/labelSchema.js index 7a8fecc..d071a9a 100644 --- a/labelSchema.js +++ b/labelSchema.js @@ -13,13 +13,13 @@ function normalizeString(str){ const FRA_OVERSEAS = ['GF', 'GP', 'MQ', 'RE', 'YT']; // find the first field of record that has a non-empty value that's not already in labelParts -function getFirstProperty(fields) { +function getFirstProperty(fields, role = 'required') { return function(record) { for (var i = 0; i < fields.length; i++) { var fieldValue = record[fields[i]]; if (!_.isEmpty(fieldValue)) { - return fieldValue[0]; + return { label: fieldValue[0], role }; } } @@ -34,29 +34,29 @@ function getFirstProperty(fields) { // the full state/province name, eg: Pennsylvania, USA and Ontario, CA // 3. otherwise, the state/province abbreviation should be used, eg: Lancaster, PA, USA and Bruce, ON, CA // 4. if the abbreviation isn't available, use the full state/province name -function getRegionalValue(record) { +function getRegionalValue(record, role = 'required') { if (!_.isEmpty(record.dependency) || !_.isEmpty(record.dependency_a)) { return; } if ('region' === record.layer && !_.isEmpty(record.region)) { // return full state name when state is the most granular piece of info - return record.region[0]; + return { label: record.region[0], role }; } else if (!_.isEmpty(record.region_a)) { // otherwise just return the region code when available - return record.region_a[0]; + return { label: record.region_a[0], role }; } else if (!_.isEmpty(record.region)) { // return the full name when there's no region code available - return record.region[0]; + return { label: record.region[0], role }; } } // The same as getRegionalValue above, but only returns a region if the region name // is distinct from the locality/localadmin/city name // This works best for large cities in countries where the region name/abbr is not _always_ included in the label -function getUniqueRegionalValue(record) { +function getUniqueRegionalValue(record, role = 'required') { if (!_.isEmpty(record.dependency) || !_.isEmpty(record.dependency_a)) { return; } @@ -65,10 +65,10 @@ function getUniqueRegionalValue(record) { if ('region' === record.layer) { if (!_.isEmpty(record.region)) { // return full state name when state is the most granular piece of info - return record.region[0]; + return { label: record.region[0], role }; } } else { - const localityValue = getFirstProperty(['locality', 'localadmin'])(record); + const localityValue = _.get(getFirstProperty(['locality', 'localadmin'])(record), 'label'); if (record.region && normalizeString(localityValue) === normalizeString(record.region[0])) { // skip returning anything when the region and locality name are identical @@ -78,9 +78,9 @@ function getUniqueRegionalValue(record) { // prefer the region abbreviation, fall back to the region name if no abbreviation if (!_.isEmpty(record.region_a)) { - return record.region_a[0]; + return { label: record.region_a[0], role }; } else if (!_.isEmpty(record.region)) { - return record.region[0]; + return { label: record.region[0], role }; } } } @@ -91,20 +91,20 @@ function getUniqueRegionalValue(record) { // 3. use dependency abbreviation if applicable, eg - San Juan, PR // 4. use dependency name if no abbreviation, eg - San Juan, Puerto Rico // 5. use country abbreviation, eg - Lancaster, PA, USA -function getUSADependencyOrCountryValue(record) { +function getUSADependencyOrCountryValue(record, role = 'required') { if ('dependency' === record.layer && !_.isEmpty(record.dependency)) { - return record.dependency[0]; + return { label: record.dependency[0], role }; } else if ('country' === record.layer && !_.isEmpty(record.country)) { - return record.country[0]; + return { label: record.country[0], role }; } if (!_.isEmpty(record.dependency_a)) { - return record.dependency_a[0]; + return { label: record.dependency_a[0], role }; } else if (!_.isEmpty(record.dependency)) { - return record.dependency[0]; + return { label: record.dependency[0], role }; } - return record.country_a[0]; + return { label: record.country_a[0], role }; } // this function generates the last field of the labels for FRA records @@ -122,10 +122,25 @@ function getFRACountryValue() { }; } +// this function generates the region field for FRA records. +// 1. use nothing if the record is a in the French overseas or Paris (VP), +// eg - Saint-Denis, Reunion (instead of Saint-Denis, Reunion, Reunion) +// 2. use region name, eg - Bagneux, Hauts-De-Seine, France +// 3. use this with caution, Paris is both a locality and region. This can cause label like `Tour Eiffel, Paris, Paris, France` +function getFRARegionValue() { + const _default = getFirstProperty(['region'], 'optional'); + return (record) => { + if (!_.isEmpty(record.region_a) && (_.includes(FRA_OVERSEAS, record.region_a[0]) || record.region_a[0] === 'VP')) { + return undefined; + } + return _default(record); + }; +} + function isInNYC(record) { - const _region_a = getFirstProperty(['region_a'])(record); - const _country_a = getFirstProperty(['country_a'])(record); - const _locality_a = getFirstProperty(['locality_a'])(record); + const _region_a = _.get(getFirstProperty(['region_a'])(record), 'label'); + const _country_a = _.get(getFirstProperty(['country_a'])(record), 'label'); + const _locality_a = _.get(getFirstProperty(['locality_a'])(record), 'label'); return _country_a === 'USA' && _region_a === 'NY' && _locality_a === 'NYC'; } @@ -146,26 +161,23 @@ function getUSABoroughValue(record) { // - The borough is used for the locality in addresses // - Except in Queens, where ideally the neighbourhood is // - Also, 'New York' is the proper locality name for Manhattan -function getNYCLocalValue(record) { - const _default = getFirstProperty(['locality', 'localadmin', 'county'])(record); - const _borough = getFirstProperty(['borough'])(record); - const _neighbourhood = getFirstProperty(['neighbourhood'])(record); +function getNYCLocalValue(record, role = 'required') { + const _default = getFirstProperty(['locality', 'localadmin', 'county'], role)(record); + const _borough = getFirstProperty(['borough'], role)(record); + const _neighbourhood = getFirstProperty(['neighbourhood'], role)(record); // We still want to return "neighborhood, borough, region_a" when a user searches for a neighborhood // otherwise it looks incomplete, so skip to returning the borough in that case // Otherwise, in Queens only, use the neighborhood for the city in address labels if ('neighbourhood' !== record.layer && - _borough && - _borough.startsWith('Queens') && - _neighbourhood + _.get(_borough, 'label', '').startsWith('Queens') && + _.get(_neighbourhood, 'label') ) { return _neighbourhood; - } else if (_borough && - _borough.startsWith('Manhattan') - ) { + } else if (_.get(_borough, 'label', '').startsWith('Manhattan')) { // return 'Manhattan, New York, for Manhattan neighbourhoods if (record.layer === 'neighbourhood') { - return `${_borough}, ${_default}`; - // return only locality for Manhattan venues/addresses + return { label: `${_borough.label}, ${_default.label}`, role }; + // return only locality for Manhattan venues/addresses } else{ return _default; } @@ -174,8 +186,8 @@ function getNYCLocalValue(record) { } } -function getUSALocalValue(record) { - const _default = getFirstProperty(['locality', 'localadmin', 'county'])(record); +function getUSALocalValue(record, role = 'required') { + const _default = getFirstProperty(['locality', 'localadmin', 'county'], role)(record); // NYC is special for addresses if (isInNYC(record)) { @@ -234,7 +246,9 @@ module.exports = { }, 'FRA': { 'valueFunctions': { + 'borough': getFirstProperty(['borough'], 'optional'), 'local': getFirstProperty(['locality', 'localadmin']), + 'regional': getFRARegionValue(), 'country': getFRACountryValue() } }, diff --git a/test/labelSchema.js b/test/labelSchema.js index f46b84f..d8fadf9 100644 --- a/test/labelSchema.js +++ b/test/labelSchema.js @@ -30,7 +30,7 @@ module.exports.tests.supported_countries = function(test, common) { t.equals(Object.keys(schemas.GBR.valueFunctions).length, 3); t.equals(Object.keys(schemas.AUS.valueFunctions).length, 3); t.equals(Object.keys(schemas.KOR.valueFunctions).length, 3); - t.equals(Object.keys(schemas.FRA.valueFunctions).length, 2); + t.equals(Object.keys(schemas.FRA.valueFunctions).length, 4); t.equals(Object.keys(schemas.ITA.valueFunctions).length, 3); t.equals(Object.keys(schemas.KOR.meta).length, 1);