Skip to content

Commit

Permalink
GTFS stop id hash as direction_id substitute ref #15736
Browse files Browse the repository at this point in the history
Signed-off-by: m-kro <[email protected]>
  • Loading branch information
m-kro committed Dec 16, 2024
1 parent 55b7800 commit b013ebc
Showing 1 changed file with 22 additions and 1 deletion.
23 changes: 22 additions & 1 deletion tools/import/gtfs/gtfs2osm.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

# @file gtfs2osm.py
# @author Giuliana Armellini
# @author Mirko Barthauer
# @date 2021-02-18

"""
Expand All @@ -27,6 +28,8 @@
import io
import re
from collections import defaultdict
import hashlib

# from pprint import pprint

import pandas as pd
Expand Down Expand Up @@ -105,6 +108,10 @@
# }


def md5hash(s):
return hashlib.md5(s.encode('utf-8')).hexdigest()


@benchmark
def import_gtfs(options, gtfsZip):
"""
Expand All @@ -124,7 +131,7 @@ def import_gtfs(options, gtfsZip):
if 'trip_headsign' not in trips:
trips['trip_headsign'] = ''
if 'direction_id' not in trips:
trips['direction_id'] = ''
trips = discover_direction(routes, trips, stop_times)
if 'route_short_name' not in routes:
routes['route_short_name'] = routes['route_long_name']

Expand Down Expand Up @@ -199,6 +206,20 @@ def import_gtfs(options, gtfsZip):
return routes, trips_on_day, shapes, stops, stop_times


@benchmark
def discover_direction(routes, trips, stop_times):
"""
Sets the direction value if it is not present in the GTFS data to identify separate
directions of the same PT line.
"""
# create a direction_id identifier from the stop sequence
enhancedStopTimes = pd.merge(stop_times, pd.merge(trips, routes, on='route_id', how='left'), on='trip_id')
groupedStopTimes = enhancedStopTimes.groupby(["trip_id"], as_index=False).agg({'stop_id': ' '.join})
groupedStopTimes['direction_id'] = groupedStopTimes['stop_id'].apply(md5hash)
# copy the direction_id back to the trips file / join the DataFrame
return pd.merge(trips, groupedStopTimes[['trip_id', 'direction_id']], on='trip_id', how='left')


@benchmark
def filter_gtfs(options, routes, trips_on_day, shapes, stops, stop_times):
"""
Expand Down

0 comments on commit b013ebc

Please sign in to comment.