From 69c7e8c55260c1c4c7eebdd697c227caa82d74ba Mon Sep 17 00:00:00 2001
From: Kuan Butts <kuanbutts@gmail.com>
Date: Thu, 20 Apr 2017 14:33:47 -0700
Subject: [PATCH 1/4] need to reset index and drop index name if exists in col
 of dataframe

---
 urbanaccess/gtfs/network.py | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/urbanaccess/gtfs/network.py b/urbanaccess/gtfs/network.py
index a056b59..71e49de 100644
--- a/urbanaccess/gtfs/network.py
+++ b/urbanaccess/gtfs/network.py
@@ -290,8 +290,14 @@ def interpolatestoptimes(stop_times_df, calendar_selected_trips_df, day):
     df_for_interpolation['stop_sequence_merge'] = (
         df_for_interpolation[~trailing]['stop_sequence'])
 
+    # Need to check if existing index in column names and drop if so (else
+    # a ValueError where Pandas can't insert b/c col already exists will occur)
+    drop_bool = False
+    if _check_if_index_name_in_cols(df_for_interpolation):
+        drop_bool = True
+    df_for_interpolation.reset_index(inplace=True, drop=drop_bool)
+
     # Merge back into original index
-    df_for_interpolation.reset_index(inplace=True)
     interpolated_df = pd.merge(df_for_interpolation, melted, 'left',
                                on=['stop_sequence_merge', 'unique_trip_id'])
     interpolated_df.set_index('index', inplace=True)
@@ -763,4 +769,10 @@ def load_processed_gtfs_data(dir=config.settings.data_folder,filename=None):
             if 'calendar_dates' in store.keys():
                 gtfsfeeds_df.calendar_dates = hdf5_to_df(dir=dir,filename=filename,key='calendar_dates')
 
-    return gtfsfeeds_df
\ No newline at end of file
+    return gtfsfeeds_df
+
+# helper functions
+def _check_if_index_name_in_cols(df):
+    cols = df.columns.values
+    iname = df.index.name
+    return (iname in cols)

From b7c32d69e91b3f3f36e827f0bd65382b459a4e82 Mon Sep 17 00:00:00 2001
From: Kuan Butts <kuanbutts@gmail.com>
Date: Thu, 20 Apr 2017 15:30:25 -0700
Subject: [PATCH 2/4] set index if _check_if_index_name_in_cols true to ensure
 no snag on line 306

---
 urbanaccess/gtfs/network.py | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/urbanaccess/gtfs/network.py b/urbanaccess/gtfs/network.py
index 71e49de..f1f53c6 100644
--- a/urbanaccess/gtfs/network.py
+++ b/urbanaccess/gtfs/network.py
@@ -292,8 +292,11 @@ def interpolatestoptimes(stop_times_df, calendar_selected_trips_df, day):
 
     # Need to check if existing index in column names and drop if so (else
     # a ValueError where Pandas can't insert b/c col already exists will occur)
-    drop_bool = False
     if _check_if_index_name_in_cols(df_for_interpolation):
+        # move the current index to own col named 'index'
+        col_name_to_copy = df_for_interpolation.index.name
+        col_to_copy = df_for_interpolation[col_name_to_copy].copy()
+        df_for_interpolation['index'] = col_to_copy
         drop_bool = True
     df_for_interpolation.reset_index(inplace=True, drop=drop_bool)
 
@@ -303,9 +306,19 @@ def interpolatestoptimes(stop_times_df, calendar_selected_trips_df, day):
     interpolated_df.set_index('index', inplace=True)
     interpolated_times = interpolated_df[['departure_time_sec_interpolate']]
 
-    final_stop_times_df = pd.merge(stop_times_df, interpolated_times,
-                                   how='left', left_index=True,
-                                   right_index=True, sort=False, copy=False)
+    # default value for final_stop_times
+    final_stop_times_df = stop_times_df
+    
+    # if empty just duplicate departure_time_sec col
+    if interpolated_times.empty:
+        departures = final_stop_times_df['departure_time_sec'].copy()
+        final_stop_times_df['departure_time_sec_interpolate'] = departures
+    
+    # if df not empty, override the default final_stop_times with merge result
+    else:
+        final_stop_times_df = pd.merge(stop_times_df, interpolated_times,
+                                       how='left', left_index=True,
+                                       right_index=True, sort=False, copy=False)
 
     # fill in nulls in interpolated departure time column using trips that did not need interpolation in order to create
     # one column with both original and interpolated times

From 79791899dbd7ebefe1f84542a5ef423254e3f958 Mon Sep 17 00:00:00 2001
From: Kuan Butts <kuanbutts@gmail.com>
Date: Thu, 20 Apr 2017 15:33:41 -0700
Subject: [PATCH 3/4] dropped interpolated_times empty check, it over
 complicates the function just to skip the merge step, when there really is
 not significant perf gains to be achieved through this

---
 urbanaccess/gtfs/network.py | 16 +++-------------
 1 file changed, 3 insertions(+), 13 deletions(-)

diff --git a/urbanaccess/gtfs/network.py b/urbanaccess/gtfs/network.py
index f1f53c6..21339ed 100644
--- a/urbanaccess/gtfs/network.py
+++ b/urbanaccess/gtfs/network.py
@@ -306,19 +306,9 @@ def interpolatestoptimes(stop_times_df, calendar_selected_trips_df, day):
     interpolated_df.set_index('index', inplace=True)
     interpolated_times = interpolated_df[['departure_time_sec_interpolate']]
 
-    # default value for final_stop_times
-    final_stop_times_df = stop_times_df
-    
-    # if empty just duplicate departure_time_sec col
-    if interpolated_times.empty:
-        departures = final_stop_times_df['departure_time_sec'].copy()
-        final_stop_times_df['departure_time_sec_interpolate'] = departures
-    
-    # if df not empty, override the default final_stop_times with merge result
-    else:
-        final_stop_times_df = pd.merge(stop_times_df, interpolated_times,
-                                       how='left', left_index=True,
-                                       right_index=True, sort=False, copy=False)
+    final_stop_times_df = pd.merge(stop_times_df, interpolated_times,
+                                   how='left', left_index=True,
+                                   right_index=True, sort=False, copy=False)
 
     # fill in nulls in interpolated departure time column using trips that did not need interpolation in order to create
     # one column with both original and interpolated times

From cd254626e396ebac56a165f9093f45e5bb8910a0 Mon Sep 17 00:00:00 2001
From: Kuan Butts <kuanbutts@gmail.com>
Date: Thu, 20 Apr 2017 15:36:52 -0700
Subject: [PATCH 4/4] readd drop_bool = False

---
 urbanaccess/gtfs/network.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/urbanaccess/gtfs/network.py b/urbanaccess/gtfs/network.py
index 21339ed..17e6bf2 100644
--- a/urbanaccess/gtfs/network.py
+++ b/urbanaccess/gtfs/network.py
@@ -292,6 +292,7 @@ def interpolatestoptimes(stop_times_df, calendar_selected_trips_df, day):
 
     # Need to check if existing index in column names and drop if so (else
     # a ValueError where Pandas can't insert b/c col already exists will occur)
+    drop_bool = False
     if _check_if_index_name_in_cols(df_for_interpolation):
         # move the current index to own col named 'index'
         col_name_to_copy = df_for_interpolation.index.name