Feature/disable uploads (#13)

* Revised model to avoid uploading model log and output rasters through an optional command line argument. This makes test runs in local Docker easier because the model won't be trying to upload rasters after each step, which can be very slow. It also helps users who want to run the model but don't have an s3 account. The default is still to upload results-- the new flag deactivates uploads. (Note: This doesn't fully cut the connection with s3. If input files aren't locally available, the model still tries to download inputs from s3.) Ran the model locally by stage and in run_full_model.py to make sure uploads were indeed blocked at every step and the model still ran. It seemed fine. * Had to fix a few things to make the model work with uploads again. Also, changed some run_full_model.py command line arguments to become flags rather than the user writing true or false.
wri · Mar 31, 2021 · 7e87b0d · 7e87b0d
1 parent 28fc2b4
commit 7e87b0d
Show file tree

Hide file tree

Showing 39 changed files with 685 additions and 646 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -57,10 +57,10 @@ RUN ln -s /usr/bin/python3 /usr/bin/python
 RUN git config --global user.email [email protected]
 
 ## Check out the branch that I'm currently using for model development
-#RUN git checkout model_v_1.2.0
+#RUN git checkout model_v_1.2.1
 #
 ## Makes sure the latest version of the current branch is downloaded
-#RUN git pull origin model_v_1.2.0
+#RUN git pull origin model_v_1.2.1
 
 ## Compile C++ scripts
 #RUN g++ /usr/local/app/emissions/cpp_util/calc_gross_emissions_generic.cpp -o /usr/local/app/emissions/cpp_util/calc_gross_emissions_generic.exe -lgdal && \

diff --git a/analyses/aggregate_results_to_4_km.py b/analyses/aggregate_results_to_4_km.py
@@ -24,7 +24,7 @@
 
 # Converts the 10x10 degree Hansen tiles that are in windows of 40000x1 pixels to windows of 400x400 pixels,
 # which is the resolution of the output tiles. This will allow the 30x30 m pixels in each window to be summed.
-def rewindow(tile):
+def rewindow(tile, no_upload):
 
     # start time
     start = datetime.datetime.now()
@@ -127,7 +127,7 @@ def rewindow(tile):
         uu.print_log("No mangrove tile found for {}".format(tile_id))
 
     # Prints information about the tile that was just processed
-    uu.end_of_fx_summary(start, tile_id, '{}_rewindow'.format(tile_type))
+    uu.end_of_fx_summary(start, tile_id, '{}_rewindow'.format(tile_type), no_upload)
 
 
 # Converts the existing (per ha) values to per pixel values (e.g., emissions/ha to emissions/pixel)
@@ -136,7 +136,7 @@ def rewindow(tile):
 # 0.1x0.1 degree resolution (approximately 10m in the tropics).
 # Each pixel in that raster is the sum of the 30m pixels converted to value/pixel (instead of value/ha).
 # The 0.1x0.1 degree tile is output.
-def aggregate(tile, thresh, sensit_type):
+def aggregate(tile, thresh, sensit_type, no_upload):
 
     # start time
     start = datetime.datetime.now()
@@ -284,12 +284,12 @@ def aggregate(tile, thresh, sensit_type):
         # aggregated.close()
 
     # Prints information about the tile that was just processed
-    uu.end_of_fx_summary(start, tile_id, '{}_0_4deg'.format(tile_type))
+    uu.end_of_fx_summary(start, tile_id, '{}_0_4deg'.format(tile_type), no_upload)
 
 
 # Calculates the percent difference between the standard model's net flux output
 # and the sensitivity model's net flux output
-def percent_diff(std_aggreg_flux, sensit_aggreg_flux, sensit_type):
+def percent_diff(std_aggreg_flux, sensit_aggreg_flux, sensit_type, no_upload):
 
     # start time
     start = datetime.datetime.now()
@@ -315,11 +315,11 @@ def percent_diff(std_aggreg_flux, sensit_aggreg_flux, sensit_type):
     uu.log_subprocess_output_full(cmd)
 
     # Prints information about the tile that was just processed
-    uu.end_of_fx_summary(start, 'global', sensit_aggreg_flux)
+    uu.end_of_fx_summary(start, 'global', sensit_aggreg_flux, no_upload)
 
 
 # Maps where the sources stay sources, sinks stay sinks, sources become sinks, and sinks become sources
-def sign_change(std_aggreg_flux, sensit_aggreg_flux, sensit_type):
+def sign_change(std_aggreg_flux, sensit_aggreg_flux, sensit_type, no_upload):
 
     # start time
     start = datetime.datetime.now()
@@ -372,4 +372,4 @@ def sign_change(std_aggreg_flux, sensit_aggreg_flux, sensit_type):
 
 
     # Prints information about the tile that was just processed
-    uu.end_of_fx_summary(start, 'global', sensit_aggreg_flux)
+    uu.end_of_fx_summary(start, 'global', sensit_aggreg_flux, no_upload)
diff --git a/analyses/create_supplementary_outputs.py b/analyses/create_supplementary_outputs.py
@@ -24,7 +24,7 @@
 import constants_and_names as cn
 import universal_util as uu
 
-def create_supplementary_outputs(tile_id, input_pattern, output_patterns, sensit_type):
+def create_supplementary_outputs(tile_id, input_pattern, output_patterns, sensit_type, no_upload):
 
     # start time
     start = datetime.datetime.now()
@@ -145,4 +145,4 @@ def create_supplementary_outputs(tile_id, input_pattern, output_patterns, sensit
     uu.print_log("  Output tiles created for {}...".format(tile_id))
 
     # Prints information about the tile that was just processed
-    uu.end_of_fx_summary(start, tile_id, output_patterns[0])
+    uu.end_of_fx_summary(start, tile_id, output_patterns[0], no_upload)
diff --git a/analyses/loss_in_raster.py b/analyses/loss_in_raster.py
diff --git a/analyses/mp_aggregate_results_to_4_km.py b/analyses/mp_aggregate_results_to_4_km.py
@@ -29,7 +29,7 @@
 sys.path.append(os.path.join(cn.docker_app,'analyses'))
 import aggregate_results_to_4_km
 
-def mp_aggregate_results_to_4_km(sensit_type, thresh, tile_id_list, std_net_flux = None, run_date = None):
+def mp_aggregate_results_to_4_km(sensit_type, thresh, tile_id_list, std_net_flux = None, run_date = None, no_upload = None):
 
     os.chdir(cn.docker_base_dir)
 
@@ -44,15 +44,15 @@ def mp_aggregate_results_to_4_km(sensit_type, thresh, tile_id_list, std_net_flux
 
     # Files to download for this script
     download_dict = {
-             # cn.annual_gain_AGC_all_types_dir: [cn.pattern_annual_gain_AGC_all_types],
-             # cn.cumul_gain_AGCO2_BGCO2_all_types_dir: [cn.pattern_cumul_gain_AGCO2_BGCO2_all_types],
+             cn.annual_gain_AGC_all_types_dir: [cn.pattern_annual_gain_AGC_all_types],
+             cn.cumul_gain_AGCO2_BGCO2_all_types_dir: [cn.pattern_cumul_gain_AGCO2_BGCO2_all_types],
              cn.gross_emis_all_gases_all_drivers_biomass_soil_dir: [cn.pattern_gross_emis_all_gases_all_drivers_biomass_soil],
              cn.net_flux_dir: [cn.pattern_net_flux]
              }
 
     # Checks whether the canopy cover argument is valid
     if thresh < 0 or thresh > 99:
-        uu.exception_log('Invalid tcd. Please provide an integer between 0 and 99.')
+        uu.exception_log(no_upload, 'Invalid tcd. Please provide an integer between 0 and 99.')
 
 
     # Pixel area tiles-- necessary for calculating sum of pixels for any set of tiles
@@ -131,7 +131,7 @@ def mp_aggregate_results_to_4_km(sensit_type, thresh, tile_id_list, std_net_flux
             processes = 8
         uu.print_log('Rewindow max processors=', processes)
         pool = multiprocessing.Pool(processes)
-        pool.map(aggregate_results_to_4_km.rewindow, tile_list)
+        pool.map(partial(aggregate_results_to_4_km.rewindow, no_upload=no_upload), tile_list)
         # Added these in response to error12: Cannot allocate memory error.
         # This fix was mentioned here: of https://stackoverflow.com/questions/26717120/python-cannot-allocate-memory-using-multiprocessing-pool
         # Could also try this: https://stackoverflow.com/questions/42584525/python-multiprocessing-debugging-oserror-errno-12-cannot-allocate-memory
@@ -141,7 +141,7 @@ def mp_aggregate_results_to_4_km(sensit_type, thresh, tile_id_list, std_net_flux
         # # For single processor use
         # for tile in tile_list:
         #
-        #     aggregate_results_to_4_km.rewindow(tile)
+        #     aggregate_results_to_4_km.rewindow(til, no_upload)
 
         # Converts the existing (per ha) values to per pixel values (e.g., emissions/ha to emissions/pixel)
         # and sums those values in each 400x400 pixel window.
@@ -159,14 +159,15 @@ def mp_aggregate_results_to_4_km(sensit_type, thresh, tile_id_list, std_net_flux
             processes = 8
         uu.print_log('Conversion to per pixel and aggregate max processors=', processes)
         pool = multiprocessing.Pool(processes)
-        pool.map(partial(aggregate_results_to_4_km.aggregate, thresh=thresh, sensit_type=sensit_type), tile_list)
+        pool.map(partial(aggregate_results_to_4_km.aggregate, thresh=thresh, sensit_type=sensit_type,
+                         no_upload=no_upload), tile_list)
         pool.close()
         pool.join()
 
         # # For single processor use
         # for tile in tile_list:
         #
-        #     aggregate_results_to_4_km.aggregate(tile, thresh, sensit_type)
+        #     aggregate_results_to_4_km.aggregate(tile, thresh, sensit_type, no_upload)
 
         # Makes a vrt of all the output 10x10 tiles (10 km resolution)
         out_vrt = "{}_0_4deg.vrt".format(pattern)
@@ -207,10 +208,11 @@ def mp_aggregate_results_to_4_km(sensit_type, thresh, tile_id_list, std_net_flux
             uu.log_subprocess_output_full(cmd)
 
 
-        uu.print_log("Tiles processed. Uploading to s3 now...")
+        # If no_upload flag is not activated, output is uploaded
+        if not no_upload:
 
-        # Uploads all output tiles to s3
-        uu.upload_final_set(output_dir_list[0], out_pattern)
+            uu.print_log("Tiles processed. Uploading to s3 now...")
+            uu.upload_final_set(output_dir_list[0], out_pattern)
 
         # Cleans up the folder before starting on the next raster type
         vrtList = glob.glob('*vrt')
@@ -254,12 +256,16 @@ def mp_aggregate_results_to_4_km(sensit_type, thresh, tile_id_list, std_net_flux
                 uu.print_log('Cannot do comparison. One of the input flux tiles is not valid. Verify that both net flux rasters are on the spot machine.')
 
             uu.print_log("Creating map of percent difference between standard and {} net flux".format(sensit_type))
-            aggregate_results_to_4_km.percent_diff(std_aggreg_flux, sensit_aggreg_flux, sensit_type)
-            uu.upload_final_set(output_dir_list[0], cn.pattern_aggreg_sensit_perc_diff)
+            aggregate_results_to_4_km.percent_diff(std_aggreg_flux, sensit_aggreg_flux, sensit_type, no_upload)
 
             uu.print_log("Creating map of which pixels change sign and which stay the same between standard and {}".format(sensit_type))
-            aggregate_results_to_4_km.sign_change(std_aggreg_flux, sensit_aggreg_flux, sensit_type)
-            uu.upload_final_set(output_dir_list[0], cn.pattern_aggreg_sensit_sign_change)
+            aggregate_results_to_4_km.sign_change(std_aggreg_flux, sensit_aggreg_flux, sensit_type, no_upload)
+
+            # If no_upload flag is not activated, output is uploaded
+            if not no_upload:
+
+                uu.upload_final_set(output_dir_list[0], cn.pattern_aggreg_sensit_perc_diff)
+                uu.upload_final_set(output_dir_list[0], cn.pattern_aggreg_sensit_sign_change)
 
         else:
 
@@ -279,18 +285,23 @@ def mp_aggregate_results_to_4_km(sensit_type, thresh, tile_id_list, std_net_flux
                         help='Tree cover density threshold above which pixels will be included in the aggregation.')
     parser.add_argument('--std-net-flux-aggreg', '-sagg', required=False,
                         help='The s3 standard model net flux aggregated tif, for comparison with the sensitivity analysis map')
+    parser.add_argument('--no-upload', '-nu', action='store_true',
+                       help='Disables uploading of outputs to s3')
     args = parser.parse_args()
     sensit_type = args.model_type
     tile_id_list = args.tile_id_list
     std_net_flux = args.std_net_flux_aggreg
     thresh = args.tcd_threshold
     thresh = int(thresh)
+    no_upload = args.no_upload
 
     # Create the output log
-    uu.initiate_log(tile_id_list=tile_id_list, sensit_type=sensit_type, thresh=thresh, std_net_flux=std_net_flux)
+    uu.initiate_log(tile_id_list=tile_id_list, sensit_type=sensit_type, thresh=thresh, std_net_flux=std_net_flux,
+                    no_upload=no_upload)
 
     # Checks whether the sensitivity analysis and tile_id_list arguments are valid
     uu.check_sensit_type(sensit_type)
     tile_id_list = uu.tile_id_list_check(tile_id_list)
 
-    mp_aggregate_results_to_4_km(sensit_type=sensit_type, tile_id_list=tile_id_list, thresh=thresh, std_net_flux=std_net_flux)
+    mp_aggregate_results_to_4_km(sensit_type=sensit_type, tile_id_list=tile_id_list, thresh=thresh,
+                                 std_net_flux=std_net_flux, no_upload=no_upload)
diff --git a/analyses/mp_create_supplementary_outputs.py b/analyses/mp_create_supplementary_outputs.py
@@ -28,7 +28,7 @@
 sys.path.append(os.path.join(cn.docker_app,'analyses'))
 import create_supplementary_outputs
 
-def mp_create_supplementary_outputs(sensit_type, tile_id_list, run_date = None):
+def mp_create_supplementary_outputs(sensit_type, tile_id_list, run_date = None, no_upload = None):
 
     os.chdir(cn.docker_base_dir)
 
@@ -45,7 +45,7 @@ def mp_create_supplementary_outputs(sensit_type, tile_id_list, run_date = None):
 
     # Files to download for this script
     download_dict = {
-        # cn.cumul_gain_AGCO2_BGCO2_all_types_dir: [cn.pattern_cumul_gain_AGCO2_BGCO2_all_types],
+        cn.cumul_gain_AGCO2_BGCO2_all_types_dir: [cn.pattern_cumul_gain_AGCO2_BGCO2_all_types],
         cn.gross_emis_all_gases_all_drivers_biomass_soil_dir: [cn.pattern_gross_emis_all_gases_all_drivers_biomass_soil],
         cn.net_flux_dir: [cn.pattern_net_flux]
     }
@@ -130,7 +130,7 @@ def mp_create_supplementary_outputs(sensit_type, tile_id_list, run_date = None):
         elif "net_flux" in input_pattern:
             output_patterns = output_pattern_list[6:9]
         else:
-            uu.exception_log("No output patterns found for input pattern. Please check.")
+            uu.exception_log(no_upload, "No output patterns found for input pattern. Please check.")
 
         uu.print_log("Input pattern:", input_pattern)
         uu.print_log("Output patterns:", output_patterns)
@@ -144,13 +144,13 @@ def mp_create_supplementary_outputs(sensit_type, tile_id_list, run_date = None):
         uu.print_log("Creating derivative outputs for {0} with {1} processors...".format(input_pattern, processes))
         pool = multiprocessing.Pool(processes)
         pool.map(partial(create_supplementary_outputs.create_supplementary_outputs, input_pattern=input_pattern,
-                         output_patterns=output_patterns, sensit_type=sensit_type), tile_id_list_input)
+                         output_patterns=output_patterns, sensit_type=sensit_type, no_upload=no_upload), tile_id_list_input)
         pool.close()
         pool.join()
 
         # # For single processor use
         # for tile_id in tile_id_list_input:
-        #     create_supplementary_outputs.create_supplementary_outputs(tile_id, input_pattern, output_patterns, sensit_type)
+        #     create_supplementary_outputs.create_supplementary_outputs(tile_id, input_pattern, output_patterns, sensit_type, no_upload)
 
         # Checks the two forest extent output tiles created from each input tile for whether there is data in them.
         # Because the extent is restricted in the forest extent pixels, some tiles with pixels in the full extent
@@ -171,9 +171,12 @@ def mp_create_supplementary_outputs(sensit_type, tile_id_list, run_date = None):
                 pool.close()
                 pool.join()
 
-    # Uploads output tiles to s3
-    for i in range(0, len(output_dir_list)):
-        uu.upload_final_set(output_dir_list[i], output_pattern_list[i])
+
+    # If no_upload flag is not activated, output is uploaded
+    if not no_upload:
+
+        for i in range(0, len(output_dir_list)):
+            uu.upload_final_set(output_dir_list[i], output_pattern_list[i])
 
 
 if __name__ == '__main__':
@@ -187,16 +190,20 @@ def mp_create_supplementary_outputs(sensit_type, tile_id_list, run_date = None):
                         help='List of tile ids to use in the model. Should be of form 00N_110E or 00N_110E,00N_120E or all.')
     parser.add_argument('--run-date', '-d', required=False,
                         help='Date of run. Must be format YYYYMMDD.')
+    parser.add_argument('--no-upload', '-nu', action='store_true',
+                       help='Disables uploading of outputs to s3')
     args = parser.parse_args()
     sensit_type = args.model_type
     tile_id_list = args.tile_id_list
     run_date = args.run_date
+    no_upload = args.no_upload
 
     # Create the output log
-    uu.initiate_log(tile_id_list=tile_id_list, sensit_type=sensit_type, run_date=run_date)
+    uu.initiate_log(tile_id_list=tile_id_list, sensit_type=sensit_type, run_date=run_date, no_upload=no_upload)
 
     # Checks whether the sensitivity analysis and tile_id_list arguments are valid
     uu.check_sensit_type(sensit_type)
     tile_id_list = uu.tile_id_list_check(tile_id_list)
 
-    mp_create_supplementary_outputs(sensit_type=sensit_type, tile_id_list=tile_id_list, run_date=run_date)
+    mp_create_supplementary_outputs(sensit_type=sensit_type, tile_id_list=tile_id_list,
+                                    run_date=run_date, no_upload=no_upload)