From d740d356884999922e8b87e258734798f4ebb0e1 Mon Sep 17 00:00:00 2001 From: tiffanychu90 Date: Fri, 13 Oct 2023 18:10:29 +0000 Subject: [PATCH] remove files in separate script --- gtfs_funnel/cleanup.py | 29 +++++++++++ gtfs_funnel/logs/download_data.log | 80 ------------------------------ 2 files changed, 29 insertions(+), 80 deletions(-) create mode 100644 gtfs_funnel/cleanup.py diff --git a/gtfs_funnel/cleanup.py b/gtfs_funnel/cleanup.py new file mode 100644 index 000000000..2defad9bb --- /dev/null +++ b/gtfs_funnel/cleanup.py @@ -0,0 +1,29 @@ +""" +Remove staged files. + +Do this in a separate script in case we don't want to run. +""" +import gcsfs + +fs = gcsfs.GCSFileSystem() + +def if_exists_then_delete(filepath): + if fs.exists(filepath): + if fs.isdir(filepath): + fs.rm(filepath, recursive=True) + else: + fs.rm(filepath) + + return + + +if __name__ == "__main__": + + from update_vars import analysis_date_list, CONFIG_DICT + + for analysis_date in analysis_date_list: + + INPUT_FILE = CONFIG_DICT["usable_vp_file"] + if_exists_then_delete(f"{SEGMENT_GCS}{INPUT_FILE}_{analysis_date}_stage") + if_exists_then_delete(f"{SEGMENT_GCS}vp_direction_{analysis_date}.parquet") + \ No newline at end of file diff --git a/gtfs_funnel/logs/download_data.log b/gtfs_funnel/logs/download_data.log index 57d95f9fa..9e85395d4 100644 --- a/gtfs_funnel/logs/download_data.log +++ b/gtfs_funnel/logs/download_data.log @@ -1,83 +1,3 @@ -2023-05-31 16:19:00.351 | INFO | __main__::59 - *********** Download trips data *********** -2023-05-31 16:19:26.201 | INFO | __main__::87 - execution time: 0:00:27.714166 -2023-06-15 09:19:20.202 | INFO | __main__::49 - Analysis date: 2023-06-14 -2023-06-15 09:19:22.081 | INFO | __main__::56 - # operators to run: 203 -2023-06-15 09:19:22.082 | INFO | __main__::59 - *********** Download trips data *********** -2023-06-15 09:19:45.463 | INFO | __main__::87 - execution time: 0:00:25.245031 -2023-06-15 09:20:00.459 | INFO | __main__::24 - Analysis date: 2023-06-14 -2023-06-15 09:20:02.195 | INFO | __main__::31 - # operators to run: 203 -2023-06-15 09:20:02.195 | INFO | __main__::34 - *********** Download stops data *********** -2023-06-15 09:20:24.009 | INFO | __main__::66 - execution time: 0:00:23.548903 -2023-06-15 09:21:01.659 | INFO | __main__::24 - Analysis date: 2023-06-14 -2023-06-15 09:21:03.413 | INFO | __main__::31 - # operators to run: 203 -2023-06-15 09:21:03.414 | INFO | __main__::35 - *********** Download routelines data *********** -2023-06-15 09:27:48.594 | INFO | __main__::65 - execution time: 0:06:46.933896 -2023-06-15 09:28:04.716 | INFO | __main__::25 - Analysis date: 2023-06-14 -2023-06-15 09:28:06.960 | INFO | __main__::33 - # operators to run: 166 -2023-06-15 09:28:06.961 | INFO | __main__::37 - *********** Download st data *********** -2023-06-15 09:29:37.228 | INFO | __main__::60 - execution time: 0:01:32.510767 -2023-06-15 10:22:38.014 | INFO | __main__::59 - *********** Download trips data *********** -2023-06-15 10:23:03.570 | INFO | __main__::87 - execution time: 0:00:27.442476 -2023-06-23 12:13:06.484 | INFO | __main__::24 - Analysis date: 2023-06-14 -2023-06-23 12:13:08.443 | INFO | __main__::31 - # operators to run: 203 -2023-06-23 12:13:08.443 | INFO | __main__::35 - *********** Download routelines data *********** -2023-06-23 12:25:07.216 | INFO | __main__::65 - execution time: 0:12:00.690905 -2023-06-23 13:02:04.303 | INFO | __main__::24 - Analysis date: 2023-06-14 -2023-06-23 13:02:06.530 | INFO | __main__::31 - # operators to run: 203 -2023-06-23 13:02:06.532 | INFO | __main__::35 - *********** Download routelines data *********** -2023-06-23 13:06:07.144 | INFO | __main__::65 - execution time: 0:04:02.810387 -2023-07-13 10:12:38.799 | INFO | __main__::49 - Analysis date: 2023-07-12 -2023-07-13 10:12:41.595 | INFO | __main__::56 - # operators to run: 203 -2023-07-13 10:12:41.596 | INFO | __main__::59 - *********** Download trips data *********** -2023-07-13 10:14:06.883 | INFO | __main__::24 - Analysis date: 2023-07-12 -2023-07-13 10:14:08.804 | INFO | __main__::31 - # operators to run: 203 -2023-07-13 10:14:08.805 | INFO | __main__::34 - *********** Download stops data *********** -2023-07-13 10:14:28.662 | INFO | __main__::66 - execution time: 0:00:21.778023 -2023-07-13 10:14:44.916 | INFO | __main__::24 - Analysis date: 2023-07-12 -2023-07-13 10:14:46.410 | INFO | __main__::31 - # operators to run: 203 -2023-07-13 10:14:46.411 | INFO | __main__::35 - *********** Download routelines data *********** -2023-07-13 10:17:26.524 | INFO | __main__::65 - execution time: 0:02:41.607516 -2023-07-13 12:08:32.427 | INFO | __main__::49 - Analysis date: 2023-07-12 -2023-07-13 12:08:34.263 | INFO | __main__::56 - # operators to run: 203 -2023-07-13 12:08:34.264 | INFO | __main__::59 - *********** Download trips data *********** -2023-07-13 12:08:59.386 | INFO | __main__::89 - execution time: 0:00:26.926539 -2023-07-13 12:10:33.946 | INFO | __main__::25 - Analysis date: 2023-07-12 -2023-07-13 12:10:35.732 | INFO | __main__::33 - # operators to run: 176 -2023-07-13 12:10:35.733 | INFO | __main__::37 - *********** Download st data *********** -2023-07-13 12:12:24.693 | INFO | __main__::60 - execution time: 0:01:50.746267 -202 -2023-08-18 12:14:19.213 | INFO | __main__::49 - Analysis date: 2023-08-16 -2023-08-18 12:14:22.015 | INFO | __main__::56 - # operators to run: 202 -2023-08-18 12:14:22.018 | INFO | __main__::59 - *********** Download trips data *********** -2023-08-18 12:14:45.989 | INFO | __main__::89 - execution time: 0:00:26.775413 -2023-08-18 12:15:03.085 | INFO | __main__::24 - Analysis date: 2023-08-16 -2023-08-18 12:15:04.983 | INFO | __main__::31 - # operators to run: 202 -2023-08-18 12:15:04.984 | INFO | __main__::34 - *********** Download stops data *********** -2023-08-18 12:15:16.711 | INFO | __main__::66 - execution time: 0:00:13.625380 -2023-08-18 12:15:31.459 | INFO | __main__::24 - Analysis date: 2023-08-16 -2023-08-18 12:15:33.142 | INFO | __main__::31 - # operators to run: 202 -2023-08-18 12:15:33.143 | INFO | __main__::35 - *********** Download routelines data *********** -2023-08-18 12:17:49.517 | INFO | __main__::65 - execution time: 0:02:18.055453 -2023-08-18 12:18:05.115 | INFO | __main__::25 - Analysis date: 2023-08-16 -2023-08-18 12:18:06.855 | INFO | __main__::33 - # operators to run: 178 -2023-08-18 12:18:06.856 | INFO | __main__::37 - *********** Download st data *********** -2023-08-18 12:19:34.455 | INFO | __main__::60 - execution time: 0:01:29.339905 -2023-08-24 12:45:30.584 | INFO | __main__::49 - Analysis date: 2023-08-15 -2023-08-24 12:45:33.035 | INFO | __main__::56 - # operators to run: 202 -2023-08-24 12:45:33.036 | INFO | __main__::59 - *********** Download trips data *********** -2023-08-24 12:45:58.618 | INFO | __main__::89 - execution time: 0:00:28.030813 -2023-08-24 12:46:14.946 | INFO | __main__::24 - Analysis date: 2023-08-15 -2023-08-24 12:46:17.017 | INFO | __main__::31 - # operators to run: 202 -2023-08-24 12:46:17.017 | INFO | __main__::34 - *********** Download stops data *********** -2023-08-24 12:46:29.804 | INFO | __main__::66 - execution time: 0:00:14.858181 -2023-08-24 12:46:45.259 | INFO | __main__::24 - Analysis date: 2023-08-15 -2023-08-24 12:46:47.081 | INFO | __main__::31 - # operators to run: 202 -2023-08-24 12:46:47.081 | INFO | __main__::35 - *********** Download routelines data *********** -2023-08-24 12:49:07.495 | INFO | __main__::65 - execution time: 0:02:22.235156 -2023-08-24 12:49:21.834 | INFO | __main__::25 - Analysis date: 2023-08-15 -2023-08-24 12:49:23.160 | INFO | __main__::33 - # operators to run: 180 -2023-08-24 12:49:23.161 | INFO | __main__::37 - *********** Download st data *********** -2023-08-24 12:50:56.588 | INFO | __main__::60 - execution time: 0:01:34.753600 2023-09-14 10:02:47.246 | INFO | __main__::49 - Analysis date: 2023-09-13 2023-09-14 10:02:50.473 | INFO | __main__::56 - # operators to run: 201 2023-09-14 10:02:50.474 | INFO | __main__::59 - *********** Download trips data ***********