Skip to content
This repository has been archived by the owner on Jan 20, 2022. It is now read-only.

add python lib imposm.parser #2

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 13 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ The tests involve decompressing a PBF extract of London stored on SSD and serial
- `node-osmium` https://github.com/osmcode/node-osmium
- `node-osmium-stream` https://github.com/geopipes/osmium-stream
- `go-osmpbf` https://github.com/qedus/osmpbf
- `py-imposm-parser` https://github.com/omniscale/imposm-parser

## results

Expand All @@ -40,21 +41,32 @@ Make sure you have the most current versions of the following installed:
- nodejs
- golang
- mercurial (for the golang dep)
- python

for impartial PBF stats I use:
- osmconvert (sudo apt-get install osmctools)

### dependencies

node/golang

```bash
go get github.com/qedus/osmpbf;
npm install;
```

python

```bash
sudo apt-get install build-essential python-dev python-pip protobuf-compiler libprotobuf-dev;
[sudo] pip install imposm.parser;
[sudo] pip install ujson;
```

### run test

```bash
bash run.sh
bash run.sh;
```

### drive performance
Expand Down
77 changes: 77 additions & 0 deletions imposm-parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@

import sys
import ujson
from imposm.parser import OSMParser
from collections import OrderedDict # requires python 2.7+?

# http://newbebweb.blogspot.co.uk/2012/02/python-head-ioerror-errno-32-broken.html
from signal import signal, SIGPIPE, SIG_DFL
signal(SIGPIPE,SIG_DFL)

# simple class that handles the parsed OSM data.
class JsonOutput(object):

# coords are nodes without tags
def coords(self, coords):
for osmid, lon, lat in coords:
# output = OrderedDict([
# ('type','node'),
# ('id', osmid),
# ('lat', lat),
# ('lon', lon)
# ])
output = {
'type': 'node',
'id': osmid,
'lat': lat,
'lon': lon
}
sys.stdout.write( ujson.dumps(output) + '\n' )

def nodes(self, nodes):
for osmid, tags, centroid in nodes:
# output = OrderedDict([
# ('type','node'),
# ('id', osmid),
# ('lat', centroid[1]),
# ('lon', centroid[0]),
# ('tags', tags)
# ])
output = {
'type': 'node',
'id': osmid,
'lat': centroid[1],
'lon': centroid[0],
'tags': tags
}
sys.stdout.write( ujson.dumps(output) + '\n' )

def ways(self, ways):
for osmid, tags, refs in ways:
# output = OrderedDict([
# ('type','way'),
# ('id', osmid),
# ('refs', refs),
# ('tags', tags)
# ])
output = {
'type': 'way',
'id': osmid,
'refs': refs,
'tags': tags
}
sys.stdout.write( ujson.dumps(output) + '\n' )

def relations(self, relations):
return; # do nothing (yet)

# instantiate counter and parser and start parsing
jsonify = JsonOutput()
p = OSMParser(
# concurrency=4, # defaults to the number of CPU and cores of the host system
coords_callback=jsonify.coords,
nodes_callback=jsonify.nodes,
ways_callback=jsonify.ways,
relations_callback=jsonify.relations
)
p.parse(sys.argv[1])
35 changes: 19 additions & 16 deletions run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,30 +10,33 @@ osmconvert --out-statistics $PBF_FILE; echo;

stats(){
echo "total lines: `cat tmpfile | wc -l`";
echo "total nodes: `cat tmpfile | grep node | wc -l`";
echo "total ways: `cat tmpfile | grep refs | wc -l`";
echo "total nodes: `cat tmpfile | grep '\"node\"' | wc -l`";
echo "total ways: `cat tmpfile | grep '\"way\"' | wc -l`";
echo "shasum: (`shasum tmpfile`)";
}

echo '--- osm-pbf-parser ---';
time node osm-pbf-parser $PBF_FILE >tmpfile;
stats; rm tmpfile; echo;
# echo '--- osm-pbf-parser ---';
# time node osm-pbf-parser $PBF_FILE >tmpfile;
# stats; rm tmpfile; echo;

echo '--- osm-read ---';
time node osm-read $PBF_FILE >tmpfile;
cp tmpfile tmp1;
stats; rm tmpfile; echo;
# echo '--- osm-read ---';
# time node osm-read $PBF_FILE >tmpfile;
# cp tmpfile tmp1;
# stats; rm tmpfile; echo;

echo '--- node-osmium ---';
time node node-osmium $PBF_FILE >tmpfile;
cp tmpfile tmp2;
stats; rm tmpfile; echo;
# echo '--- node-osmium ---';
# time node node-osmium $PBF_FILE >tmpfile;
# cp tmpfile tmp2;
# stats; rm tmpfile; echo;

echo '--- node-osmium-stream ---';
time node node-osmium-stream $PBF_FILE >tmpfile;
stats; rm tmpfile; echo;
# echo '--- node-osmium-stream ---';
# time node node-osmium-stream $PBF_FILE >tmpfile;
# stats; rm tmpfile; echo;

echo '--- go-osmpbf ---';
time go run osmpbf.go $PBF_FILE >tmpfile;
stats; rm tmpfile; echo;

echo '--- py-imposm-parser ---';
time python imposm-parser.py $PBF_FILE >tmpfile;
stats; rm tmpfile; echo;