From b013ebc4af42c9deb7ba6b21309f48355a489633 Mon Sep 17 00:00:00 2001 From: m-kro Date: Mon, 16 Dec 2024 13:33:59 +0100 Subject: [PATCH] GTFS stop id hash as direction_id substitute ref #15736 Signed-off-by: m-kro --- tools/import/gtfs/gtfs2osm.py | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/tools/import/gtfs/gtfs2osm.py b/tools/import/gtfs/gtfs2osm.py index 593df954b667..0aab521f74c3 100644 --- a/tools/import/gtfs/gtfs2osm.py +++ b/tools/import/gtfs/gtfs2osm.py @@ -12,6 +12,7 @@ # @file gtfs2osm.py # @author Giuliana Armellini +# @author Mirko Barthauer # @date 2021-02-18 """ @@ -27,6 +28,8 @@ import io import re from collections import defaultdict +import hashlib + # from pprint import pprint import pandas as pd @@ -105,6 +108,10 @@ # } +def md5hash(s): + return hashlib.md5(s.encode('utf-8')).hexdigest() + + @benchmark def import_gtfs(options, gtfsZip): """ @@ -124,7 +131,7 @@ def import_gtfs(options, gtfsZip): if 'trip_headsign' not in trips: trips['trip_headsign'] = '' if 'direction_id' not in trips: - trips['direction_id'] = '' + trips = discover_direction(routes, trips, stop_times) if 'route_short_name' not in routes: routes['route_short_name'] = routes['route_long_name'] @@ -199,6 +206,20 @@ def import_gtfs(options, gtfsZip): return routes, trips_on_day, shapes, stops, stop_times +@benchmark +def discover_direction(routes, trips, stop_times): + """ + Sets the direction value if it is not present in the GTFS data to identify separate + directions of the same PT line. + """ + # create a direction_id identifier from the stop sequence + enhancedStopTimes = pd.merge(stop_times, pd.merge(trips, routes, on='route_id', how='left'), on='trip_id') + groupedStopTimes = enhancedStopTimes.groupby(["trip_id"], as_index=False).agg({'stop_id': ' '.join}) + groupedStopTimes['direction_id'] = groupedStopTimes['stop_id'].apply(md5hash) + # copy the direction_id back to the trips file / join the DataFrame + return pd.merge(trips, groupedStopTimes[['trip_id', 'direction_id']], on='trip_id', how='left') + + @benchmark def filter_gtfs(options, routes, trips_on_day, shapes, stops, stop_times): """