Skip to content

Commit

Permalink
feat(partsGenerator): this new function will delay the part generation
Browse files Browse the repository at this point in the history
This will help for label deduplication in the API
  • Loading branch information
Joxit committed Oct 8, 2021
1 parent 382276b commit 5936adf
Show file tree
Hide file tree
Showing 3 changed files with 89 additions and 69 deletions.
74 changes: 40 additions & 34 deletions labelGenerator.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,14 @@ function dedupeNameAndFirstLabelElement(labelParts) {
// first, dedupe the name and 1st label array elements
// this is used to ensure that the `name` and first admin hierarchy elements aren't repeated
// eg - `["Lancaster", "Lancaster", "PA", "United States"]` -> `["Lancaster", "PA", "United States"]`
const deduped = _.uniq([labelParts.shift(), labelParts.shift()]);
// second, unshift the deduped parts back onto the labelParts
labelParts.unshift.apply(labelParts, deduped);

// we take the first part because the layer should be the name and is required
if (labelParts[0].label === labelParts[1].label) {
const first = labelParts.shift();
labelParts[0] = first;
}
}

return labelParts;

}

function dedupeNameAndLastLabelElement(labelParts) {
Expand All @@ -25,15 +25,14 @@ function dedupeNameAndLastLabelElement(labelParts) {
// first, dedupe the name and second to last label array elements
// this is used to ensure that the `name` and most granular admin hierarchy elements aren't repeated
// eg - `["South Korea", "Seoul", "Seoul"]` -> `["South Korea", "Seoul"]`
const deduped = _.uniq([labelParts.pop(), labelParts.pop()]).reverse();

// second, unshift the deduped parts back onto the labelParts
labelParts.push.apply(labelParts, deduped);

// we take the last part because the layer should be the name and is required
if (labelParts[labelParts.length - 1].label === labelParts[labelParts.length - 2].label) {
const last = labelParts.pop();
labelParts[labelParts.length - 1] = last;
}
}

return labelParts;

}

function getSchema(country_a) {
Expand All @@ -42,7 +41,6 @@ function getSchema(country_a) {
}

return schemas.default;

}

// this can go away once geonames is no longer supported
Expand All @@ -69,7 +67,6 @@ function isUSAOrCAN(country_a) {

function isGeonamesOrWhosOnFirst(source) {
return 'geonames' === source || 'whosonfirst' === source;

}

function isInUSAOrCAN(record) {
Expand All @@ -81,10 +78,8 @@ function isInKOR(record) {
}

// helper function that sets a default label for non-US/CA regions and countries
function buildPrefixLabelParts(schema, record) {
if (isRegion(record.layer) &&
isGeonamesOrWhosOnFirst(record.source) &&
isInUSAOrCAN(record)) {
function buildPrefixLabelParts(schema, record, role = 'required') {
if (isRegion(record.layer) && isGeonamesOrWhosOnFirst(record.source) && isInUSAOrCAN(record)) {
return [];
}

Expand All @@ -96,13 +91,17 @@ function buildPrefixLabelParts(schema, record) {
return [];
}

const street = [];
if (record.layer === 'venue' && record.street) {
street.push({ label: record.street, role: 'optional', layer: 'street' });
}

// support name aliases
if (Array.isArray(record.name.default)) {
return record.name.default.slice(0,1);
return _.concat({ label: record.name.default.slice(0, 1), role, layer: 'name' }, street);
}

return [record.name.default];

return _.concat({ label: record.name.default, role, layer: 'name' }, street);
}

function buildAdminLabelPart(schema, record) {
Expand All @@ -111,13 +110,16 @@ function buildAdminLabelPart(schema, record) {
// iterate the schema
for (const field in schema.valueFunctions) {
const valueFunction = schema.valueFunctions[field];
labelParts.push(valueFunction(record));
const result = valueFunction(record);
if (result && result.label) {
labelParts.push(_.assign({ layer: field }, result));
}
}

return labelParts;
}

function buildPostfixLabelParts(schema, record) {
function buildPostfixLabelParts(schema, record, role = 'required') {
if (!isInKOR(record)) {
return [];
}
Expand All @@ -126,27 +128,26 @@ function buildPostfixLabelParts(schema, record) {

if (isAddress(record.layer)) {
if (record.street) {
labelParts.push(record.street);
labelParts.push({ label: record.street, role, layer: 'name' });
} else if (record.neighbourhood) {
labelParts.push({ label: record.neighbourhood, role, layer: 'name' });
}
else if (record.neighbourhood) {
labelParts.push(record.neighbourhood);
}
labelParts.push(record.housenumber);
labelParts.push({ label: record.housenumber, role, layer: 'housenumber' });

return labelParts;
}

// support name aliases
if (Array.isArray(record.name.default)) {
return record.name.default.slice(0,1);
return [{ label: record.name.default[0], role, layer: 'name' }];
}

return [record.name.default];
return [{ label: record.name.default, role, layer: 'name' }];
}

module.exports = function( record ){
function generator (record) {
const schema = getSchema(record.country_a);
const separator = _.get(schema, ['meta','separator'], ', ');
const separator = _.get(schema, ['meta', 'separator'], ', ');

// in virtually all cases, this will be the `name` field
const prefixParts = buildPrefixLabelParts(schema, record);
Expand All @@ -161,11 +162,16 @@ module.exports = function( record ){
// third, dedupe and join with a comma and return
if (isInKOR(record)) {
labelParts = dedupeNameAndLastLabelElement(labelParts);
}
else {
} else {
labelParts = dedupeNameAndFirstLabelElement(labelParts);
}

return labelParts.join(separator);
return { parts: labelParts, separator };
}

module.exports = function(record) {
const { parts, separator } = generator(record);
return parts.filter(p => p.role === 'required').map(p => p.label).join(separator);
};

module.exports.partsGenerator = generator;
82 changes: 48 additions & 34 deletions labelSchema.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,13 @@ function normalizeString(str){
const FRA_OVERSEAS = ['GF', 'GP', 'MQ', 'RE', 'YT'];

// find the first field of record that has a non-empty value that's not already in labelParts
function getFirstProperty(fields) {
function getFirstProperty(fields, role = 'required') {
return function(record) {
for (var i = 0; i < fields.length; i++) {
var fieldValue = record[fields[i]];

if (!_.isEmpty(fieldValue)) {
return fieldValue[0];
return { label: fieldValue[0], role };
}

}
Expand All @@ -34,29 +34,29 @@ function getFirstProperty(fields) {
// the full state/province name, eg: Pennsylvania, USA and Ontario, CA
// 3. otherwise, the state/province abbreviation should be used, eg: Lancaster, PA, USA and Bruce, ON, CA
// 4. if the abbreviation isn't available, use the full state/province name
function getRegionalValue(record) {
function getRegionalValue(record, role = 'required') {
if (!_.isEmpty(record.dependency) || !_.isEmpty(record.dependency_a)) {
return;
}

if ('region' === record.layer && !_.isEmpty(record.region)) {
// return full state name when state is the most granular piece of info
return record.region[0];
return { label: record.region[0], role };

} else if (!_.isEmpty(record.region_a)) {
// otherwise just return the region code when available
return record.region_a[0];
return { label: record.region_a[0], role };

} else if (!_.isEmpty(record.region)) {
// return the full name when there's no region code available
return record.region[0];
return { label: record.region[0], role };
}
}

// The same as getRegionalValue above, but only returns a region if the region name
// is distinct from the locality/localadmin/city name
// This works best for large cities in countries where the region name/abbr is not _always_ included in the label
function getUniqueRegionalValue(record) {
function getUniqueRegionalValue(record, role = 'required') {
if (!_.isEmpty(record.dependency) || !_.isEmpty(record.dependency_a)) {
return;
}
Expand All @@ -65,10 +65,10 @@ function getUniqueRegionalValue(record) {
if ('region' === record.layer) {
if (!_.isEmpty(record.region)) {
// return full state name when state is the most granular piece of info
return record.region[0];
return { label: record.region[0], role };
}
} else {
const localityValue = getFirstProperty(['locality', 'localadmin'])(record);
const localityValue = _.get(getFirstProperty(['locality', 'localadmin'])(record), 'label');

if (record.region && normalizeString(localityValue) === normalizeString(record.region[0])) {
// skip returning anything when the region and locality name are identical
Expand All @@ -78,9 +78,9 @@ function getUniqueRegionalValue(record) {

// prefer the region abbreviation, fall back to the region name if no abbreviation
if (!_.isEmpty(record.region_a)) {
return record.region_a[0];
return { label: record.region_a[0], role };
} else if (!_.isEmpty(record.region)) {
return record.region[0];
return { label: record.region[0], role };
}
}
}
Expand All @@ -91,20 +91,20 @@ function getUniqueRegionalValue(record) {
// 3. use dependency abbreviation if applicable, eg - San Juan, PR
// 4. use dependency name if no abbreviation, eg - San Juan, Puerto Rico
// 5. use country abbreviation, eg - Lancaster, PA, USA
function getUSADependencyOrCountryValue(record) {
function getUSADependencyOrCountryValue(record, role = 'required') {
if ('dependency' === record.layer && !_.isEmpty(record.dependency)) {
return record.dependency[0];
return { label: record.dependency[0], role };
} else if ('country' === record.layer && !_.isEmpty(record.country)) {
return record.country[0];
return { label: record.country[0], role };
}

if (!_.isEmpty(record.dependency_a)) {
return record.dependency_a[0];
return { label: record.dependency_a[0], role };
} else if (!_.isEmpty(record.dependency)) {
return record.dependency[0];
return { label: record.dependency[0], role };
}

return record.country_a[0];
return { label: record.country_a[0], role };
}

// this function generates the last field of the labels for FRA records
Expand All @@ -122,10 +122,25 @@ function getFRACountryValue() {
};
}

// this function generates the region field for FRA records.
// 1. use nothing if the record is a in the French overseas or Paris (VP),
// eg - Saint-Denis, Reunion (instead of Saint-Denis, Reunion, Reunion)
// 2. use region name, eg - Bagneux, Hauts-De-Seine, France
// 3. use this with caution, Paris is both a locality and region. This can cause label like `Tour Eiffel, Paris, Paris, France`
function getFRARegionValue() {
const _default = getFirstProperty(['region'], 'optional');
return (record) => {
if (!_.isEmpty(record.region_a) && (_.includes(FRA_OVERSEAS, record.region_a[0]) || record.region_a[0] === 'VP')) {
return undefined;
}
return _default(record);
};
}

function isInNYC(record) {
const _region_a = getFirstProperty(['region_a'])(record);
const _country_a = getFirstProperty(['country_a'])(record);
const _locality_a = getFirstProperty(['locality_a'])(record);
const _region_a = _.get(getFirstProperty(['region_a'])(record), 'label');
const _country_a = _.get(getFirstProperty(['country_a'])(record), 'label');
const _locality_a = _.get(getFirstProperty(['locality_a'])(record), 'label');

return _country_a === 'USA' && _region_a === 'NY' && _locality_a === 'NYC';
}
Expand All @@ -146,26 +161,23 @@ function getUSABoroughValue(record) {
// - The borough is used for the locality in addresses
// - Except in Queens, where ideally the neighbourhood is
// - Also, 'New York' is the proper locality name for Manhattan
function getNYCLocalValue(record) {
const _default = getFirstProperty(['locality', 'localadmin', 'county'])(record);
const _borough = getFirstProperty(['borough'])(record);
const _neighbourhood = getFirstProperty(['neighbourhood'])(record);
function getNYCLocalValue(record, role = 'required') {
const _default = getFirstProperty(['locality', 'localadmin', 'county'], role)(record);
const _borough = getFirstProperty(['borough'], role)(record);
const _neighbourhood = getFirstProperty(['neighbourhood'], role)(record);
// We still want to return "neighborhood, borough, region_a" when a user searches for a neighborhood
// otherwise it looks incomplete, so skip to returning the borough in that case
// Otherwise, in Queens only, use the neighborhood for the city in address labels
if ('neighbourhood' !== record.layer &&
_borough &&
_borough.startsWith('Queens') &&
_neighbourhood
_.get(_borough, 'label', '').startsWith('Queens') &&
_.get(_neighbourhood, 'label')
) {
return _neighbourhood;
} else if (_borough &&
_borough.startsWith('Manhattan')
) {
} else if (_.get(_borough, 'label', '').startsWith('Manhattan')) {
// return 'Manhattan, New York, for Manhattan neighbourhoods
if (record.layer === 'neighbourhood') {
return `${_borough}, ${_default}`;
// return only locality for Manhattan venues/addresses
return { label: `${_borough.label}, ${_default.label}`, role };
// return only locality for Manhattan venues/addresses
} else{
return _default;
}
Expand All @@ -174,8 +186,8 @@ function getNYCLocalValue(record) {
}
}

function getUSALocalValue(record) {
const _default = getFirstProperty(['locality', 'localadmin', 'county'])(record);
function getUSALocalValue(record, role = 'required') {
const _default = getFirstProperty(['locality', 'localadmin', 'county'], role)(record);

// NYC is special for addresses
if (isInNYC(record)) {
Expand Down Expand Up @@ -234,7 +246,9 @@ module.exports = {
},
'FRA': {
'valueFunctions': {
'borough': getFirstProperty(['borough'], 'optional'),
'local': getFirstProperty(['locality', 'localadmin']),
'regional': getFRARegionValue(),
'country': getFRACountryValue()
}
},
Expand Down
2 changes: 1 addition & 1 deletion test/labelSchema.js
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ module.exports.tests.supported_countries = function(test, common) {
t.equals(Object.keys(schemas.GBR.valueFunctions).length, 3);
t.equals(Object.keys(schemas.AUS.valueFunctions).length, 3);
t.equals(Object.keys(schemas.KOR.valueFunctions).length, 3);
t.equals(Object.keys(schemas.FRA.valueFunctions).length, 2);
t.equals(Object.keys(schemas.FRA.valueFunctions).length, 4);
t.equals(Object.keys(schemas.ITA.valueFunctions).length, 3);

t.equals(Object.keys(schemas.KOR.meta).length, 1);
Expand Down

0 comments on commit 5936adf

Please sign in to comment.