diff --git a/.gitignore b/.gitignore index 5c963d2..ab6a0db 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,4 @@ *.pyc __pycache__/ ptenv/ -.cache/ \ No newline at end of file +.cache/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b73bdfc..c87d646 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,8 +1,8 @@ # Docs @ http://pre-commit.com/ repos: -- repo: git://github.com/pre-commit/pre-commit-hooks - sha: v0.7.1 # Use the ref you want to point at +- repo: https://github.com/kuanb/pre-commit-hooks.git + sha: 61d0735ca8b0ce5ab8df867bc974798ba8316ee5 hooks: - id: autopep8-wrapper - id: check-added-large-files @@ -24,7 +24,7 @@ repos: - id: flake8 - id: trailing-whitespace -- repo: https://github.com/CalthorpeAnalytics/pre-commit-python-sorter.git +- repo: https://github.com/kuanb/pre-commit-python-sorter.git sha: 1.0.8 hooks: - - id: python-import-sorter \ No newline at end of file + - id: python-import-sorter diff --git a/.travis.yml b/.travis.yml index 8ad01d4..ae637b2 100644 --- a/.travis.yml +++ b/.travis.yml @@ -21,4 +21,4 @@ install: - pip install -r requirements.txt script: - - PYTHONPATH=. py.test \ No newline at end of file + - PYTHONPATH=. py.test diff --git a/Makefile b/Makefile index 2f50075..786f39d 100644 --- a/Makefile +++ b/Makefile @@ -1,2 +1,2 @@ test: - PYTHONPATH=. py.test \ No newline at end of file + PYTHONPATH=. py.test diff --git a/README.rst b/README.rst index 097ee4f..a9f6f3a 100644 --- a/README.rst +++ b/README.rst @@ -29,7 +29,7 @@ Usage # use to summarize impedance start = 7*60*60 # 7:00 AM end = 10*60*60 # 10:00 AM - + # Converts feed subset into a directed # network multigraph G = pt.load_feed_as_graph(feed, start, end) @@ -42,4 +42,3 @@ Note: Still a WIP. Current version on PyPI not good... .. code:: console pip install peartree - diff --git a/peartree/__version__.py b/peartree/__version__.py index 34da6b8..10939f0 100644 --- a/peartree/__version__.py +++ b/peartree/__version__.py @@ -1 +1 @@ -__version__ = '0.1.2' \ No newline at end of file +__version__ = '0.1.2' diff --git a/peartree/graph.py b/peartree/graph.py index e158b0f..8bf8e9c 100644 --- a/peartree/graph.py +++ b/peartree/graph.py @@ -1,20 +1,14 @@ from typing import Dict -import networkx as nx -import numpy as np -import pandas as pd -import random - -from fiona import crs import networkx as nx import pandas as pd import partridge as ptg +from fiona import crs from .settings import WGS84 from .summarizer import (generate_edge_and_wait_values, generate_summary_edge_costs, generate_summary_wait_times) -from .utilities import log def generate_empty_md_graph(name: str, @@ -35,10 +29,10 @@ def generate_summary_graph_elements(feed: ptg.gtfs.feed, all_wait_times) = generate_edge_and_wait_values(feed, target_time_start, target_time_end) - + summary_edge_costs = generate_summary_edge_costs(all_edge_costs) wait_times_by_stop = generate_summary_wait_times(all_wait_times) - + return (summary_edge_costs, wait_times_by_stop) @@ -53,7 +47,7 @@ def populate_graph(G: nx.MultiDiGraph, # TODO: Join tables before hand to make # this part go faster - id_mask = (feed.stops.stop_id==sid) + id_mask = (feed.stops.stop_id == sid) stop_data = feed.stops[id_mask].head(1).T.squeeze() G.add_node(full_sid, diff --git a/peartree/paths.py b/peartree/paths.py index 7b464a1..3a612ba 100644 --- a/peartree/paths.py +++ b/peartree/paths.py @@ -1,11 +1,10 @@ -import string import random +import string import networkx as nx import partridge as ptg -from .graph import (generate_empty_md_graph, - generate_summary_graph_elements, +from .graph import (generate_empty_md_graph, generate_summary_graph_elements, populate_graph) from .utilities import log @@ -26,14 +25,15 @@ def _generate_random_name(N: int=5): def get_representative_feed(file_loc: str, day_type: str='busiest'): - + service_ids_by_date = ptg.read_service_ids_by_date(file_loc) trip_counts_by_date = ptg.read_trip_counts_by_date(file_loc) # Make sure we have some valid values returned in trips if not len(trip_counts_by_date.items()): # Otherwise, error out - raise InvalidGTFS('No valid trip counts by date were identified in GTFS.') + raise InvalidGTFS('No valid trip counts by date ' + 'were identified in GTFS.') # At this point, different methods can be implemented to help select how # to pick which date/schedule id to use @@ -43,14 +43,14 @@ def get_representative_feed(file_loc: str, else: raise NotImplementedError('Unsupported day type string supplied.') - log('Selected_date: {}'.format(selected_date)) - log('Number of trips on that date: {}'.format(trip_count)) + log(f'Selected_date: {selected_date}') + log(f'Number of trips on that date: {trip_count}') all_service_ids = '\n\t'.join(service_ids_by_date[selected_date]) - log('\nAll related service IDs: \n\t{}'.format(all_service_ids)) + log(f'\nAll related service IDs: \n\t{all_service_ids}') - selected_service_ids = service_ids_by_date[selected_date] - feed_query = {'trips.txt': {'service_id': selected_service_ids}} + sub = service_ids_by_date[selected_date] + feed_query = {'trips.txt': {'service_id': sub}} return ptg.feed(file_loc, view=feed_query) @@ -68,10 +68,13 @@ def load_feed_as_graph(feed: ptg.gtfs.feed, raise InvalidTimeBracket('Invalid start or end target times provided.') if end_time < start_time: - raise InvalidTimeBracket('Invalid ordering: Start time is greater than end time.') + raise InvalidTimeBracket('Invalid ordering: Start time ' + 'is greater than end time.') (summary_edge_costs, - wait_times_by_stop) = generate_summary_graph_elements(feed, start_time, end_time) + wait_times_by_stop) = generate_summary_graph_elements(feed, + start_time, + end_time) # This is a flag used to check if we need to run any additional steps # after the feed is returned to ensure that new nodes and edge can connect @@ -80,11 +83,15 @@ def load_feed_as_graph(feed: ptg.gtfs.feed, # G is either a new MultiDiGraph or one pass from before if existing_graph_supplied: - # TODO: If passed from before we should run some checks to ensure it is valid - # as well as set a flag to create join points with other feeds so that - # they can be linked when the next is added. + # TODO: If passed from before we should run some checks to ensure + # it is valid as well as set a flag to create join points with + # other feeds so that they can be linked when the next is added. G = existing_graph else: G = generate_empty_md_graph(name) - - return populate_graph(G, name, feed, wait_times_by_stop, summary_edge_costs) + + return populate_graph(G, + name, + feed, + wait_times_by_stop, + summary_edge_costs) diff --git a/peartree/plot.py b/peartree/plot.py index 243f7ed..8ab71db 100644 --- a/peartree/plot.py +++ b/peartree/plot.py @@ -1,6 +1,11 @@ +import matplotlib import networkx as nx import osmnx as ox +# Force matplotlib to not use any Xwindows backend. +matplotlib.use('Agg') + + def generate_plot(G: nx.MultiDiGraph): # TODO: Build out custom plotting configurations but, # in the meantime, use OSMnx's plotting configurations @@ -16,4 +21,4 @@ def generate_plot(G: nx.MultiDiGraph): edge_color='#e2dede', edge_alpha=0.25, bgcolor='black') - return (fig, ax) \ No newline at end of file + return (fig, ax) diff --git a/peartree/settings.py b/peartree/settings.py index 2b226b6..2d697a1 100644 --- a/peartree/settings.py +++ b/peartree/settings.py @@ -6,7 +6,7 @@ log_console = False log_level = lg.INFO -# Set some globals here that +# Set some globals here that # are used as reference throughout lib WGS84 = 4326 # degree measurement -WEB_MERCATOR = 3857 # meter-based \ No newline at end of file +WEB_MERCATOR = 3857 # meter-based diff --git a/peartree/summarizer.py b/peartree/summarizer.py index f10806a..3ad39a3 100644 --- a/peartree/summarizer.py +++ b/peartree/summarizer.py @@ -1,7 +1,5 @@ -import networkx as nx import numpy as np import pandas as pd -import random from .utilities import log @@ -20,9 +18,10 @@ def generate_wait_times(trips_and_stop_times: pd.DataFrame): # Handle both inbound and outbound directions for direction in [0, 1]: - constraint_1 = (trips_and_stop_times.direction_id==direction) - constraint_2 = (trips_and_stop_times.stop_id==stop_id) - direction_subset = trips_and_stop_times[constraint_1 & constraint_2] + constraint_1 = (trips_and_stop_times.direction_id == direction) + constraint_2 = (trips_and_stop_times.stop_id == stop_id) + direction_subset = trips_and_stop_times[ + constraint_1 & constraint_2] # Only run if each direction is contained # in the same trip id @@ -33,19 +32,19 @@ def generate_wait_times(trips_and_stop_times: pd.DataFrame): # Add according to which direction we are working with wait_times[direction].append(average_wait) - + return wait_times def generate_all_observed_edge_costs(trips_and_stop_times): all_edge_costs = None for trip_id in trips_and_stop_times.trip_id.unique(): - tst_mask = (trips_and_stop_times.trip_id==trip_id) + tst_mask = (trips_and_stop_times.trip_id == trip_id) tst_sub = trips_and_stop_times[tst_mask] # Just in case both directions are under the same trip id for direction in [0, 1]: - tst_sub_dir = tst_sub[tst_sub.direction_id==direction] + tst_sub_dir = tst_sub[tst_sub.direction_id == direction] tst_sub_dir = tst_sub_dir.sort_values('stop_sequence') deps = tst_sub_dir.departure_time[:-1] @@ -70,7 +69,7 @@ def summarize_edge_costs(df): from_stop_id = df.from_stop_id.values[0] results_mtx = [] for to_stop_id in df.to_stop_id.unique(): - to_mask = (df.to_stop_id==to_stop_id) + to_mask = (df.to_stop_id == to_stop_id) avg_cost = df[to_mask].edge_cost.mean() results_mtx.append([avg_cost, from_stop_id, @@ -89,8 +88,8 @@ def summarize_waits_at_one_stop(stop_df): divide_by = len(stop_df) * 2 dir_0_sum = stop_df.wait_dir_0.sum() dir_1_sum = stop_df.wait_dir_1.sum() - calculated = ((dir_0_sum + dir_1_sum)/divide_by) - + calculated = ((dir_0_sum + dir_1_sum) / divide_by) + return calculated @@ -99,35 +98,37 @@ def generate_summary_wait_times(df): 'wait_dir_0', 'wait_dir_1']].reset_index(drop=True) init_of_stop_ids = df_sub.stop_id.unique() - + # TODO: Use NaN upstream so we don't have this sort of # hacky typing (floats support NaNs) and None conditioning - + # First convert all None values to NaN so we can handle them # in vector format - df_sub.loc[df_sub.wait_dir_0==None, 'wait_dir_0'] = np.nan - df_sub.loc[df_sub.wait_dir_1==None, 'wait_dir_1'] = np.nan - + dir0_mask = df_sub.wait_dir_0.isnull() + dir1_mask = df_sub.wait_dir_1.isnull() + df_sub.loc[dir0_mask, 'wait_dir_0'] = np.nan + df_sub.loc[dir1_mask, 'wait_dir_1'] = np.nan + # Convert anything that is 0 or less seconds to a NaN as well # as there should not be negative or 0 second waits in the system df_sub.loc[~(df_sub.wait_dir_0 > 0), 'wait_dir_0'] = np.nan df_sub.loc[~(df_sub.wait_dir_1 > 0), 'wait_dir_1'] = np.nan - + # Convert to type float (which support float) df_sub.wait_dir_0 = df_sub.wait_dir_0.astype(float) df_sub.wait_dir_1 = df_sub.wait_dir_1.astype(float) - + # Clean out the None values dir_0_mask = ~np.isnan(df_sub.wait_dir_0) dir_1_mask = ~np.isnan(df_sub.wait_dir_1) - + # We can't include values where both directions # have NaNs at same time d0_ids = df_sub[dir_0_mask].stop_id.unique() d1_ids = df_sub[dir_1_mask].stop_id.unique() keep_ids = list(d0_ids) + list(d1_ids) df_sub_clean = df_sub[df_sub.stop_id.isin(keep_ids)] - + orig_len = len(df_sub) new_len = len(df_sub_clean) if not new_len == orig_len: @@ -135,7 +136,7 @@ def generate_summary_wait_times(df): 'stop IDs. From {} to {}.'.format(orig_len, new_len))) # And now replace df_sub df_sub = df_sub_clean - + # Recheck all for NaNs dir_0_mask_2 = np.isnan(df_sub.wait_dir_0) dir_1_mask_2 = np.isnan(df_sub.wait_dir_1) @@ -159,24 +160,24 @@ def generate_summary_wait_times(df): if (len(dir_0_check_2) > 0) or (len(dir_1_check_2) > 0): raise Exception('NaN values for both directions on some stop IDs.') - + grouped = df_sub.groupby('stop_id') summarized = grouped.apply(summarize_waits_at_one_stop) - + summed_reset = summarized.reset_index(drop=False) summed_reset.columns = ['stop_id', 'avg_cost'] - + end_of_stop_ids = summed_reset.stop_id.unique() log('Original stop id count: {}'.format(len(init_of_stop_ids))) log('After cleaning stop id count: {}'.format(len(end_of_stop_ids))) - + if len(init_of_stop_ids) > len(end_of_stop_ids): a = set(list(init_of_stop_ids)) b = set(list(end_of_stop_ids)) unresolved_ids = list(a - b) log('Some unaccounted for stop ' 'ids. Resolving {}...'.format(len(unresolved_ids))) - + # TODO: Perhaps these are start/end stops and should adopt # a cost that is "average" for that route? # We should think of how to actually do this @@ -188,22 +189,22 @@ def generate_summary_wait_times(df): for i in unresolved_ids: sids.append(i) acst.append(30 * 60) # 30 minutes, converted to seconds - + # Rebuild the dataframe summed_reset = pd.DataFrame({'stop_id': sids, 'avg_cost': acst}) - + return summed_reset def generate_edge_and_wait_values(feed, - target_time_start, - target_time_end): + target_time_start, + target_time_end): all_edge_costs = None all_wait_times = None for i, route in feed.routes.iterrows(): log('Processing on route {}.'.format(route.route_id)) # Now get all the trips for that route - trips = feed.trips[feed.trips.route_id==route.route_id] + trips = feed.trips[feed.trips.route_id == route.route_id] # Get just the stop times related to this trip st_trip_id_mask = feed.stop_times.trip_id.isin(trips.trip_id) @@ -218,7 +219,7 @@ def generate_edge_and_wait_values(feed, # TODO: Make these logger.info statements a = len(stimes_init.trip_id.unique()) b = len(stimes.trip_id.unique()) - log('\tReduced trips in consideration from {} to {}.'.format(a,b)) + log('\tReduced trips in consideration from {} to {}.'.format(a, b)) trips_and_stop_times = pd.merge(trips, stimes, @@ -233,7 +234,8 @@ def generate_edge_and_wait_values(feed, sort_values_list = ['stop_sequence', 'arrival_time', 'departure_time'] - trips_and_stop_times = trips_and_stop_times.sort_values(sort_values_list) + trips_and_stop_times = trips_and_stop_times.sort_values( + sort_values_list) trips_and_stop_times = pd.merge(trips, stimes, how='inner', @@ -247,7 +249,8 @@ def generate_edge_and_wait_values(feed, sort_values_list = ['stop_sequence', 'arrival_time', 'departure_time'] - trips_and_stop_times = trips_and_stop_times.sort_values(sort_values_list) + trips_and_stop_times = trips_and_stop_times.sort_values( + sort_values_list) wait_times = generate_wait_times(trips_and_stop_times) trips_and_stop_times['wait_dir_0'] = wait_times[0] @@ -260,7 +263,7 @@ def generate_edge_and_wait_values(feed, if all_wait_times is None: all_wait_times = tst_sub else: - all_wait_times = all_wait_times.append(tst_sub) + all_wait_times = all_wait_times.append(tst_sub) # Get all edge costs for this route and add to the running total edge_costs = generate_all_observed_edge_costs(trips_and_stop_times) @@ -270,5 +273,5 @@ def generate_edge_and_wait_values(feed, all_edge_costs = edge_costs else: all_edge_costs = all_edge_costs.append(edge_costs) - + return (all_edge_costs, all_wait_times) diff --git a/peartree/utilities.py b/peartree/utilities.py index 622795b..4eeeb60 100644 --- a/peartree/utilities.py +++ b/peartree/utilities.py @@ -1,7 +1,8 @@ import datetime as dt -import os - import logging as lg +import os +import sys +import unicodedata from . import settings @@ -26,7 +27,8 @@ def get_logger(level=None, # get today's date and construct a log filename todays_date = dt.datetime.today().strftime('%Y_%m_%d') - log_filename = '{}/{}_{}.log'.format(settings.logs_folder, filename, todays_date) + log_filename = '{}/{}_{}.log'.format( + settings.logs_folder, filename, todays_date) # if the logs folder does not already exist, create it if not os.path.exists(settings.logs_folder): @@ -34,7 +36,8 @@ def get_logger(level=None, # create file handler and log formatter and set them up handler = lg.FileHandler(log_filename, encoding='utf-8') - formatter = lg.Formatter('%(asctime)s %(levelname)s %(name)s %(message)s') + formatter = lg.Formatter( + '%(asctime)s %(levelname)s %(name)s %(message)s') handler.setFormatter(formatter) logger.addHandler(handler) logger.setLevel(level) @@ -46,7 +49,7 @@ def get_logger(level=None, def config(log_console=settings.log_console): # Taken from OSMnx's utils.py file, see log comments # for link to version from which these methods were taken - + # Set each global variable to the passed-in parameter value settings.log_console = log_console @@ -55,6 +58,16 @@ def config(log_console=settings.log_console): log('Configured osmnx') +def make_str(value): + # This method should I ever want to support Python 2.x + try: + # For python 2.x compatibility, use unicode + return unicode(value) + except NameError: + # Python 3.x has no unicode type, so if error, use str type + return str(value) + + def log(message: str, level=None, name=None, filename=None): # Same function, taken from OSMnx's log utility # Link: https://github.com/gboeing/osmnx/blob/ @@ -92,6 +105,9 @@ def log(message: str, level=None, name=None, filename=None): # Convert message to ascii for console display so it doesn't break # windows terminals - message = unicodedata.normalize('NFKD', make_str(message)).encode('ascii', errors='replace').decode() - print(message) + str_msg = make_str(message) + normalized = unicodedata.normalize('NFKD', str_msg) + encoded = normalized.encode('ascii', errors='replace') + decoded = encoded.decode() + print(decoded) sys.stdout = standard_out diff --git a/pre-commit-reqs.txt b/pre-commit-reqs.txt index 2fb7168..790e54f 100644 --- a/pre-commit-reqs.txt +++ b/pre-commit-reqs.txt @@ -1 +1 @@ -pre-commit==0.15.2 \ No newline at end of file +pre-commit==0.15.2 diff --git a/requirements.txt b/requirements.txt index 63184ea..f086808 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ fiona==1.6.1 networkx>=2.0 osmnx==0.6 -partridge==0.3.0 \ No newline at end of file +partridge==0.3.0 diff --git a/requirements_dev.txt b/requirements_dev.txt index a8a83fa..f0f1fa8 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -3,4 +3,4 @@ wheel flake8 coverage pytest -pytest-runner \ No newline at end of file +pytest-runner diff --git a/setup.cfg b/setup.cfg index ceb0362..8e299c7 100644 --- a/setup.cfg +++ b/setup.cfg @@ -5,4 +5,4 @@ universal = 1 exclude = docs [aliases] -test = pytest \ No newline at end of file +test = pytest diff --git a/setup.py b/setup.py index 0796673..6248f1b 100644 --- a/setup.py +++ b/setup.py @@ -1,9 +1,8 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- """The setup script.""" -from setuptools import setup, find_packages +from setuptools import find_packages, setup with open('README.rst') as readme_file: readme = readme_file.read() @@ -31,7 +30,8 @@ setup( name='peartree', version=about['__version__'], - description='Peartree is a library for converting GTFS to directed graphs.', + description=('Peartree is a library for ' + 'converting GTFS to directed graphs.'), long_description=readme, author='Kuan Butts', author_email='kuanbutts@gmail.com', diff --git a/tests/test_paths.py b/tests/test_paths.py index c39ac66..8bee07f 100644 --- a/tests/test_paths.py +++ b/tests/test_paths.py @@ -1,13 +1,10 @@ import os -import pytest import networkx as nx import partridge as ptg - -from peartree.paths import (_generate_random_name, - get_representative_feed, - load_feed_as_graph, - InvalidGTFS) +import pytest +from peartree.paths import (InvalidGTFS, _generate_random_name, + get_representative_feed, load_feed_as_graph) def fixture(filename): @@ -43,9 +40,9 @@ def test_feed_to_graph_path(): path = fixture('caltrain-2017-07-24.zip') feed = get_representative_feed(path) - start = 7*60*60 - end = 10*60*60 - + start = 7 * 60 * 60 + end = 10 * 60 * 60 + G = load_feed_as_graph(feed, start, end, diff --git a/tests/test_plot.py b/tests/test_plot.py index 2e25e3d..896066e 100644 --- a/tests/test_plot.py +++ b/tests/test_plot.py @@ -1,11 +1,6 @@ import os -import pytest -import networkx as nx -import partridge as ptg - -from peartree.paths import (get_representative_feed, - load_feed_as_graph,) +from peartree.paths import get_representative_feed, load_feed_as_graph from peartree.plot import generate_plot @@ -17,9 +12,9 @@ def test_feed_to_graph_path(): path = fixture('caltrain-2017-07-24.zip') feed = get_representative_feed(path) - start = 7*60*60 - end = 10*60*60 - + start = 7 * 60 * 60 + end = 10 * 60 * 60 + G = load_feed_as_graph(feed, start, end) fig, ax = generate_plot(G) diff --git a/tests/test_utilities.py b/tests/test_utilities.py index 103b575..ea14c07 100644 --- a/tests/test_utilities.py +++ b/tests/test_utilities.py @@ -1,5 +1,3 @@ -import pytest - from peartree.utilities import log