Skip to content

Commit

Permalink
Feature/disable uploads (#13)
Browse files Browse the repository at this point in the history
* Revised model to avoid uploading model log and output rasters through an optional command line argument. This makes test runs in local Docker easier because the model won't be trying to upload rasters after each step, which can be very slow. It also helps users who want to run the model but don't have an s3 account. The default is still to upload results-- the new flag deactivates uploads. (Note: This doesn't fully cut the connection with s3. If input files aren't locally available, the model still tries to download inputs from s3.)
Ran the model locally by stage and in run_full_model.py to make sure uploads were indeed blocked at every step and the model still ran. It seemed fine.

* Had to fix a few things to make the model work with uploads again. Also, changed some run_full_model.py command line arguments to become flags rather than the user writing true or false.
  • Loading branch information
dagibbs22 authored Mar 31, 2021
1 parent 28fc2b4 commit 7e87b0d
Show file tree
Hide file tree
Showing 39 changed files with 685 additions and 646 deletions.
4 changes: 2 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,10 @@ RUN ln -s /usr/bin/python3 /usr/bin/python
RUN git config --global user.email [email protected]

## Check out the branch that I'm currently using for model development
#RUN git checkout model_v_1.2.0
#RUN git checkout model_v_1.2.1
#
## Makes sure the latest version of the current branch is downloaded
#RUN git pull origin model_v_1.2.0
#RUN git pull origin model_v_1.2.1

## Compile C++ scripts
#RUN g++ /usr/local/app/emissions/cpp_util/calc_gross_emissions_generic.cpp -o /usr/local/app/emissions/cpp_util/calc_gross_emissions_generic.exe -lgdal && \
Expand Down
16 changes: 8 additions & 8 deletions analyses/aggregate_results_to_4_km.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@

# Converts the 10x10 degree Hansen tiles that are in windows of 40000x1 pixels to windows of 400x400 pixels,
# which is the resolution of the output tiles. This will allow the 30x30 m pixels in each window to be summed.
def rewindow(tile):
def rewindow(tile, no_upload):

# start time
start = datetime.datetime.now()
Expand Down Expand Up @@ -127,7 +127,7 @@ def rewindow(tile):
uu.print_log("No mangrove tile found for {}".format(tile_id))

# Prints information about the tile that was just processed
uu.end_of_fx_summary(start, tile_id, '{}_rewindow'.format(tile_type))
uu.end_of_fx_summary(start, tile_id, '{}_rewindow'.format(tile_type), no_upload)


# Converts the existing (per ha) values to per pixel values (e.g., emissions/ha to emissions/pixel)
Expand All @@ -136,7 +136,7 @@ def rewindow(tile):
# 0.1x0.1 degree resolution (approximately 10m in the tropics).
# Each pixel in that raster is the sum of the 30m pixels converted to value/pixel (instead of value/ha).
# The 0.1x0.1 degree tile is output.
def aggregate(tile, thresh, sensit_type):
def aggregate(tile, thresh, sensit_type, no_upload):

# start time
start = datetime.datetime.now()
Expand Down Expand Up @@ -284,12 +284,12 @@ def aggregate(tile, thresh, sensit_type):
# aggregated.close()

# Prints information about the tile that was just processed
uu.end_of_fx_summary(start, tile_id, '{}_0_4deg'.format(tile_type))
uu.end_of_fx_summary(start, tile_id, '{}_0_4deg'.format(tile_type), no_upload)


# Calculates the percent difference between the standard model's net flux output
# and the sensitivity model's net flux output
def percent_diff(std_aggreg_flux, sensit_aggreg_flux, sensit_type):
def percent_diff(std_aggreg_flux, sensit_aggreg_flux, sensit_type, no_upload):

# start time
start = datetime.datetime.now()
Expand All @@ -315,11 +315,11 @@ def percent_diff(std_aggreg_flux, sensit_aggreg_flux, sensit_type):
uu.log_subprocess_output_full(cmd)

# Prints information about the tile that was just processed
uu.end_of_fx_summary(start, 'global', sensit_aggreg_flux)
uu.end_of_fx_summary(start, 'global', sensit_aggreg_flux, no_upload)


# Maps where the sources stay sources, sinks stay sinks, sources become sinks, and sinks become sources
def sign_change(std_aggreg_flux, sensit_aggreg_flux, sensit_type):
def sign_change(std_aggreg_flux, sensit_aggreg_flux, sensit_type, no_upload):

# start time
start = datetime.datetime.now()
Expand Down Expand Up @@ -372,4 +372,4 @@ def sign_change(std_aggreg_flux, sensit_aggreg_flux, sensit_type):


# Prints information about the tile that was just processed
uu.end_of_fx_summary(start, 'global', sensit_aggreg_flux)
uu.end_of_fx_summary(start, 'global', sensit_aggreg_flux, no_upload)
4 changes: 2 additions & 2 deletions analyses/create_supplementary_outputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
import constants_and_names as cn
import universal_util as uu

def create_supplementary_outputs(tile_id, input_pattern, output_patterns, sensit_type):
def create_supplementary_outputs(tile_id, input_pattern, output_patterns, sensit_type, no_upload):

# start time
start = datetime.datetime.now()
Expand Down Expand Up @@ -145,4 +145,4 @@ def create_supplementary_outputs(tile_id, input_pattern, output_patterns, sensit
uu.print_log(" Output tiles created for {}...".format(tile_id))

# Prints information about the tile that was just processed
uu.end_of_fx_summary(start, tile_id, output_patterns[0])
uu.end_of_fx_summary(start, tile_id, output_patterns[0], no_upload)
63 changes: 0 additions & 63 deletions analyses/loss_in_raster.py

This file was deleted.

45 changes: 28 additions & 17 deletions analyses/mp_aggregate_results_to_4_km.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
sys.path.append(os.path.join(cn.docker_app,'analyses'))
import aggregate_results_to_4_km

def mp_aggregate_results_to_4_km(sensit_type, thresh, tile_id_list, std_net_flux = None, run_date = None):
def mp_aggregate_results_to_4_km(sensit_type, thresh, tile_id_list, std_net_flux = None, run_date = None, no_upload = None):

os.chdir(cn.docker_base_dir)

Expand All @@ -44,15 +44,15 @@ def mp_aggregate_results_to_4_km(sensit_type, thresh, tile_id_list, std_net_flux

# Files to download for this script
download_dict = {
# cn.annual_gain_AGC_all_types_dir: [cn.pattern_annual_gain_AGC_all_types],
# cn.cumul_gain_AGCO2_BGCO2_all_types_dir: [cn.pattern_cumul_gain_AGCO2_BGCO2_all_types],
cn.annual_gain_AGC_all_types_dir: [cn.pattern_annual_gain_AGC_all_types],
cn.cumul_gain_AGCO2_BGCO2_all_types_dir: [cn.pattern_cumul_gain_AGCO2_BGCO2_all_types],
cn.gross_emis_all_gases_all_drivers_biomass_soil_dir: [cn.pattern_gross_emis_all_gases_all_drivers_biomass_soil],
cn.net_flux_dir: [cn.pattern_net_flux]
}

# Checks whether the canopy cover argument is valid
if thresh < 0 or thresh > 99:
uu.exception_log('Invalid tcd. Please provide an integer between 0 and 99.')
uu.exception_log(no_upload, 'Invalid tcd. Please provide an integer between 0 and 99.')


# Pixel area tiles-- necessary for calculating sum of pixels for any set of tiles
Expand Down Expand Up @@ -131,7 +131,7 @@ def mp_aggregate_results_to_4_km(sensit_type, thresh, tile_id_list, std_net_flux
processes = 8
uu.print_log('Rewindow max processors=', processes)
pool = multiprocessing.Pool(processes)
pool.map(aggregate_results_to_4_km.rewindow, tile_list)
pool.map(partial(aggregate_results_to_4_km.rewindow, no_upload=no_upload), tile_list)
# Added these in response to error12: Cannot allocate memory error.
# This fix was mentioned here: of https://stackoverflow.com/questions/26717120/python-cannot-allocate-memory-using-multiprocessing-pool
# Could also try this: https://stackoverflow.com/questions/42584525/python-multiprocessing-debugging-oserror-errno-12-cannot-allocate-memory
Expand All @@ -141,7 +141,7 @@ def mp_aggregate_results_to_4_km(sensit_type, thresh, tile_id_list, std_net_flux
# # For single processor use
# for tile in tile_list:
#
# aggregate_results_to_4_km.rewindow(tile)
# aggregate_results_to_4_km.rewindow(til, no_upload)

# Converts the existing (per ha) values to per pixel values (e.g., emissions/ha to emissions/pixel)
# and sums those values in each 400x400 pixel window.
Expand All @@ -159,14 +159,15 @@ def mp_aggregate_results_to_4_km(sensit_type, thresh, tile_id_list, std_net_flux
processes = 8
uu.print_log('Conversion to per pixel and aggregate max processors=', processes)
pool = multiprocessing.Pool(processes)
pool.map(partial(aggregate_results_to_4_km.aggregate, thresh=thresh, sensit_type=sensit_type), tile_list)
pool.map(partial(aggregate_results_to_4_km.aggregate, thresh=thresh, sensit_type=sensit_type,
no_upload=no_upload), tile_list)
pool.close()
pool.join()

# # For single processor use
# for tile in tile_list:
#
# aggregate_results_to_4_km.aggregate(tile, thresh, sensit_type)
# aggregate_results_to_4_km.aggregate(tile, thresh, sensit_type, no_upload)

# Makes a vrt of all the output 10x10 tiles (10 km resolution)
out_vrt = "{}_0_4deg.vrt".format(pattern)
Expand Down Expand Up @@ -207,10 +208,11 @@ def mp_aggregate_results_to_4_km(sensit_type, thresh, tile_id_list, std_net_flux
uu.log_subprocess_output_full(cmd)


uu.print_log("Tiles processed. Uploading to s3 now...")
# If no_upload flag is not activated, output is uploaded
if not no_upload:

# Uploads all output tiles to s3
uu.upload_final_set(output_dir_list[0], out_pattern)
uu.print_log("Tiles processed. Uploading to s3 now...")
uu.upload_final_set(output_dir_list[0], out_pattern)

# Cleans up the folder before starting on the next raster type
vrtList = glob.glob('*vrt')
Expand Down Expand Up @@ -254,12 +256,16 @@ def mp_aggregate_results_to_4_km(sensit_type, thresh, tile_id_list, std_net_flux
uu.print_log('Cannot do comparison. One of the input flux tiles is not valid. Verify that both net flux rasters are on the spot machine.')

uu.print_log("Creating map of percent difference between standard and {} net flux".format(sensit_type))
aggregate_results_to_4_km.percent_diff(std_aggreg_flux, sensit_aggreg_flux, sensit_type)
uu.upload_final_set(output_dir_list[0], cn.pattern_aggreg_sensit_perc_diff)
aggregate_results_to_4_km.percent_diff(std_aggreg_flux, sensit_aggreg_flux, sensit_type, no_upload)

uu.print_log("Creating map of which pixels change sign and which stay the same between standard and {}".format(sensit_type))
aggregate_results_to_4_km.sign_change(std_aggreg_flux, sensit_aggreg_flux, sensit_type)
uu.upload_final_set(output_dir_list[0], cn.pattern_aggreg_sensit_sign_change)
aggregate_results_to_4_km.sign_change(std_aggreg_flux, sensit_aggreg_flux, sensit_type, no_upload)

# If no_upload flag is not activated, output is uploaded
if not no_upload:

uu.upload_final_set(output_dir_list[0], cn.pattern_aggreg_sensit_perc_diff)
uu.upload_final_set(output_dir_list[0], cn.pattern_aggreg_sensit_sign_change)

else:

Expand All @@ -279,18 +285,23 @@ def mp_aggregate_results_to_4_km(sensit_type, thresh, tile_id_list, std_net_flux
help='Tree cover density threshold above which pixels will be included in the aggregation.')
parser.add_argument('--std-net-flux-aggreg', '-sagg', required=False,
help='The s3 standard model net flux aggregated tif, for comparison with the sensitivity analysis map')
parser.add_argument('--no-upload', '-nu', action='store_true',
help='Disables uploading of outputs to s3')
args = parser.parse_args()
sensit_type = args.model_type
tile_id_list = args.tile_id_list
std_net_flux = args.std_net_flux_aggreg
thresh = args.tcd_threshold
thresh = int(thresh)
no_upload = args.no_upload

# Create the output log
uu.initiate_log(tile_id_list=tile_id_list, sensit_type=sensit_type, thresh=thresh, std_net_flux=std_net_flux)
uu.initiate_log(tile_id_list=tile_id_list, sensit_type=sensit_type, thresh=thresh, std_net_flux=std_net_flux,
no_upload=no_upload)

# Checks whether the sensitivity analysis and tile_id_list arguments are valid
uu.check_sensit_type(sensit_type)
tile_id_list = uu.tile_id_list_check(tile_id_list)

mp_aggregate_results_to_4_km(sensit_type=sensit_type, tile_id_list=tile_id_list, thresh=thresh, std_net_flux=std_net_flux)
mp_aggregate_results_to_4_km(sensit_type=sensit_type, tile_id_list=tile_id_list, thresh=thresh,
std_net_flux=std_net_flux, no_upload=no_upload)
27 changes: 17 additions & 10 deletions analyses/mp_create_supplementary_outputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
sys.path.append(os.path.join(cn.docker_app,'analyses'))
import create_supplementary_outputs

def mp_create_supplementary_outputs(sensit_type, tile_id_list, run_date = None):
def mp_create_supplementary_outputs(sensit_type, tile_id_list, run_date = None, no_upload = None):

os.chdir(cn.docker_base_dir)

Expand All @@ -45,7 +45,7 @@ def mp_create_supplementary_outputs(sensit_type, tile_id_list, run_date = None):

# Files to download for this script
download_dict = {
# cn.cumul_gain_AGCO2_BGCO2_all_types_dir: [cn.pattern_cumul_gain_AGCO2_BGCO2_all_types],
cn.cumul_gain_AGCO2_BGCO2_all_types_dir: [cn.pattern_cumul_gain_AGCO2_BGCO2_all_types],
cn.gross_emis_all_gases_all_drivers_biomass_soil_dir: [cn.pattern_gross_emis_all_gases_all_drivers_biomass_soil],
cn.net_flux_dir: [cn.pattern_net_flux]
}
Expand Down Expand Up @@ -130,7 +130,7 @@ def mp_create_supplementary_outputs(sensit_type, tile_id_list, run_date = None):
elif "net_flux" in input_pattern:
output_patterns = output_pattern_list[6:9]
else:
uu.exception_log("No output patterns found for input pattern. Please check.")
uu.exception_log(no_upload, "No output patterns found for input pattern. Please check.")

uu.print_log("Input pattern:", input_pattern)
uu.print_log("Output patterns:", output_patterns)
Expand All @@ -144,13 +144,13 @@ def mp_create_supplementary_outputs(sensit_type, tile_id_list, run_date = None):
uu.print_log("Creating derivative outputs for {0} with {1} processors...".format(input_pattern, processes))
pool = multiprocessing.Pool(processes)
pool.map(partial(create_supplementary_outputs.create_supplementary_outputs, input_pattern=input_pattern,
output_patterns=output_patterns, sensit_type=sensit_type), tile_id_list_input)
output_patterns=output_patterns, sensit_type=sensit_type, no_upload=no_upload), tile_id_list_input)
pool.close()
pool.join()

# # For single processor use
# for tile_id in tile_id_list_input:
# create_supplementary_outputs.create_supplementary_outputs(tile_id, input_pattern, output_patterns, sensit_type)
# create_supplementary_outputs.create_supplementary_outputs(tile_id, input_pattern, output_patterns, sensit_type, no_upload)

# Checks the two forest extent output tiles created from each input tile for whether there is data in them.
# Because the extent is restricted in the forest extent pixels, some tiles with pixels in the full extent
Expand All @@ -171,9 +171,12 @@ def mp_create_supplementary_outputs(sensit_type, tile_id_list, run_date = None):
pool.close()
pool.join()

# Uploads output tiles to s3
for i in range(0, len(output_dir_list)):
uu.upload_final_set(output_dir_list[i], output_pattern_list[i])

# If no_upload flag is not activated, output is uploaded
if not no_upload:

for i in range(0, len(output_dir_list)):
uu.upload_final_set(output_dir_list[i], output_pattern_list[i])


if __name__ == '__main__':
Expand All @@ -187,16 +190,20 @@ def mp_create_supplementary_outputs(sensit_type, tile_id_list, run_date = None):
help='List of tile ids to use in the model. Should be of form 00N_110E or 00N_110E,00N_120E or all.')
parser.add_argument('--run-date', '-d', required=False,
help='Date of run. Must be format YYYYMMDD.')
parser.add_argument('--no-upload', '-nu', action='store_true',
help='Disables uploading of outputs to s3')
args = parser.parse_args()
sensit_type = args.model_type
tile_id_list = args.tile_id_list
run_date = args.run_date
no_upload = args.no_upload

# Create the output log
uu.initiate_log(tile_id_list=tile_id_list, sensit_type=sensit_type, run_date=run_date)
uu.initiate_log(tile_id_list=tile_id_list, sensit_type=sensit_type, run_date=run_date, no_upload=no_upload)

# Checks whether the sensitivity analysis and tile_id_list arguments are valid
uu.check_sensit_type(sensit_type)
tile_id_list = uu.tile_id_list_check(tile_id_list)

mp_create_supplementary_outputs(sensit_type=sensit_type, tile_id_list=tile_id_list, run_date=run_date)
mp_create_supplementary_outputs(sensit_type=sensit_type, tile_id_list=tile_id_list,
run_date=run_date, no_upload=no_upload)
Loading

0 comments on commit 7e87b0d

Please sign in to comment.