Skip to content

Commit

Permalink
feat(venue_popularity): foundations for popularity scoring module
Browse files Browse the repository at this point in the history
  • Loading branch information
missinglink committed Jun 13, 2019
1 parent f96c861 commit 508a1b3
Show file tree
Hide file tree
Showing 7 changed files with 63,405 additions and 31,493 deletions.
2 changes: 2 additions & 0 deletions stream/importPipeline.js
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ streams.adminLookup = require('pelias-wof-admin-lookup').create;
streams.addressExtractor = require('./address_extractor');
streams.categoryMapper = require('./category_mapper');
streams.addendumMapper = require('./addendum_mapper');
streams.popularityMapper = require('./popularity_mapper');
streams.dbMapper = require('pelias-model').createDocumentMapperStream;
streams.elasticsearch = require('pelias-dbclient');

Expand All @@ -26,6 +27,7 @@ streams.import = function(){
.pipe( streams.blacklistStream() )
.pipe( streams.categoryMapper( categoryDefaults ) )
.pipe( streams.addendumMapper() )
.pipe( streams.popularityMapper() )
.pipe( streams.adminLookup() )
.pipe( streams.dbMapper() )
.pipe( streams.elasticsearch({name: 'openstreetmap'}) );
Expand Down
156 changes: 156 additions & 0 deletions stream/popularity_mapper.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
/**
The popularity mapper is responsible for generating a 'popularity'
value by inspecting OSM tags.
Feel free to make changes to this mapping file!
**/

const through = require('through2');
const peliasLogger = require('pelias-logger').get('openstreetmap');

const config = {
// https://taginfo.openstreetmap.org/keys/importance
importance: {
international: { _score: 20000 },
national: { _score: 5000 }
},
// https://taginfo.openstreetmap.org/keys/wikipedia
wikipedia: { _score: 2000 },
// https://taginfo.openstreetmap.org/keys/wikidata
wikidata: { _score: 2000 },

// misc properties found on major landmarks &
// public buildings.
architect: { _score: 5000 },
heritage: { _score: 5000 },
historic: { _score: 5000 },
building: {
colour: { _score: 2000 },
material: { _score: 2000 },
supermarket: { _score: 2000 },
civic: { _score: 2000 },
government: { _score: 2000 },
hospital: { _score: 2000 },
train_station: { _score: 5000 },
transportation: { _score: 5000 },
university: { _score: 2000 },
public: { _score: 2000 },
sports_hall: { _score: 2000 },
},
height: { _score: 2000 },
start_date: { _score: 2000 },

// misc properties found on tourist attractions
tourism: {
visitors: { _score: 2000 },
aquarium: { _score: 2000 },
attraction: { _score: 1000 },
museum: { _score: 2000 },
theme_park: { _score: 2000 },
zoo: { _score: 2000 },
_score: 1000
},
amenity: {
college: { _score: 2000 },
library: { _score: 2000 },
school: { _score: 1000 },
university: { _score: 2000 },
bank: { _score: 1000 },
clinic: { _score: 1000 },
dentist: { _score: 1000 },
doctors: { _score: 1000 },
hospital: { _score: 2000 },
nursing_home: { _score: 1000 },
pharmacy: { _score: 1000 },
social_facility: { _score: 1000 },
veterinary: { _score: 1000 },
community_centre: { _score: 1000 },
music_venue: { _score: 1000 },
nightclub: { _score: 1000 },
planetarium: { _score: 1000 },
social_centre: { _score: 1000 },
theatre: { _score: 1000 },
courthouse: { _score: 1000 },
coworking_space: { _score: 1000 },
dojo: { _score: 1000 },
embassy: { _score: 1000 },
fire_station: { _score: 1000 },
marketplace: { _score: 1000 },
place_of_worship: { _score: 1000 },
police: { _score: 1000 },
post_office: { _score: 1000 },
prison: { _score: 1000 },
public_bath: { _score: 1000 },
townhall: { _score: 1000 }
},
museum: { _score: 2000 },
museum_type: { _score: 2000 },
opening_hours: { _score: 1000 },
operator: { _score: 1000 },
fee: { _score: 2000 },
website: { _score: 2000 },
contact: {
website: { _score: 2000 },
email: { _score: 1000 },
phone: { _score: 1000 },
fax: { _score: 1000 },
foursquare: { _score: 2000 },
facebook: { _score: 2000 },
linkedin: { _score: 2000 },
instagram: { _score: 2000 },
skype: { _score: 2000 },
flickr: { _score: 2000 },
youtube: { _score: 2000 },
}
};

module.exports = function(){

return through.obj(( doc, enc, next ) => {

try {

// skip records with no tags
let tags = doc.getMeta('tags');
if( !tags ){
return next( null, doc );
}

// default popularity
let popularity = doc.getPopularity() || 0;

// apply scores from config
for( let tag in config ){
if( tags.hasOwnProperty( tag ) ){
// global score for the tag
if( config[tag]._score ){
popularity += config[tag]._score;
}
// individual scores for specific values
for( let value in config[tag] ){
if( value === '_score' ){ continue; }
if( !config[tag][value]._score ){ continue; }
if( tags[tag] === value.trim() ){
popularity += config[tag][value]._score;
}
}
}
}

// set document popularity when it is greater than zero
if( !!popularity ){
doc.setPopularity( popularity );
}
}

catch( e ){
peliasLogger.error( 'popularity_mapper error' );
peliasLogger.error( e.stack );
peliasLogger.error( JSON.stringify( doc, null, 2 ) );
}

return next( null, doc );

});

};
1 change: 1 addition & 0 deletions test/end-to-end.js
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ streams.pbfParser()
.pipe( streams.addressExtractor() )
.pipe( streams.categoryMapper( streams.config.categoryDefaults ) )
.pipe( streams.addendumMapper() )
.pipe( streams.popularityMapper() )
.pipe( model.createDocumentMapperStream() )
.pipe( sink.obj(function (doc) {
results.push(doc);
Expand Down
Loading

0 comments on commit 508a1b3

Please sign in to comment.