From e99bc064aaf958a21e0b39284cba99523a99f558 Mon Sep 17 00:00:00 2001 From: "David.Gibbs" Date: Mon, 22 Aug 2022 16:41:05 -0400 Subject: [PATCH 1/9] Develop branch. --- readme.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/readme.md b/readme.md index 9176b346..b941736f 100644 --- a/readme.md +++ b/readme.md @@ -208,7 +208,7 @@ every script. Thus, the table below also explains the potential arguments for th The user can control what model components are run to some extent and set the date part of the output directories. The emissions C++ code has to be be compiled before running the master script (see below). Preparatory scripts like creating soil carbon tiles or mangrove tiles are not included in the master script because -they are run very infrequently. +they are run very infrequently. | Argument | Short argument | Required/Optional | Relevant stage | Description | | -------- | ----- | ----------- | ------- | ------ | From 1c1571365741c2111ae65ab62ba0febf1b4098fb Mon Sep 17 00:00:00 2001 From: dagibbs22 Date: Mon, 22 Aug 2022 16:43:45 -0400 Subject: [PATCH 2/9] Feature/python 3 8 update (#25) * docker-compose works fine and can enter docker container locally. Haven't tried running the model with updated GDAL and Python yet. * Successfully installs the required Python packages. Haven't tried running test tile yet. * Runs test tile 00N_000E. Didn't check that outputs were correct but did verify that the output rasters load and have values in ArcMap. --- Dockerfile | 27 +++++++++++++++------------ requirements.txt | 26 +++++++++++++------------- run_full_model.py | 5 ++++- 3 files changed, 32 insertions(+), 26 deletions(-) diff --git a/Dockerfile b/Dockerfile index 3c6542bb..ac99229a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,8 +1,9 @@ -# Use osgeo GDAL image. It builds off Ubuntu 18.04 and uses GDAL 3.0.4 -FROM osgeo/gdal:ubuntu-small-3.0.4 +# Use osgeo GDAL image. +#Ubuntu 20.04.4 LTS, Python 3.8.10, GDAL 3.4.2 +FROM osgeo/gdal:ubuntu-small-3.4.2 # # Use this if downloading hdf files for burn year analysis -# FROM osgeo/gdal:ubuntu-full-3.0.4 +# FROM osgeo/gdal:ubuntu-full-3.4.2 ENV DIR=/usr/local/app ENV TMP=/usr/local/tmp @@ -14,16 +15,17 @@ ENV SECRETS_PATH /usr/secrets RUN ln -fs /usr/share/zoneinfo/America/New_York /etc/localtime # Install dependencies +# PostGIS extension version based on https://computingforgeeks.com/how-to-install-postgis-on-ubuntu-linux/ RUN apt-get update -y && apt-get install -y \ make \ automake \ g++ \ gcc \ libpq-dev \ - postgresql-10 \ - postgresql-server-dev-10 \ - postgresql-contrib-10 \ - postgresql-10-postgis-2.4 \ + postgresql-12 \ + postgresql-server-dev-12 \ + postgresql-contrib-12 \ + postgresql-12-postgis-3 \ python3-pip \ wget \ nano \ @@ -57,7 +59,7 @@ ENV PGDATABASE=ubuntu # Commented out the start/restart commands because even with running them, postgres isn't running when the container is created. # So there's no point in starting posgres here if it's not active when the instance opens. ####################################### -RUN cp pg_hba.conf /etc/postgresql/10/main/ +RUN cp pg_hba.conf /etc/postgresql/12/main/ # RUN pg_ctlcluster 10 main start # RUN service postgresql restart @@ -68,9 +70,9 @@ RUN pip3 install -r requirements.txt # Link gdal libraries RUN cd /usr/include && ln -s ./ gdal -# Somehow, this makes gdal_calc.py accessible from anywhere in the Docker -#https://www.continualintegration.com/miscellaneous-articles/all/how-do-you-troubleshoot-usr-bin-env-python-no-such-file-or-directory/ -RUN ln -s /usr/bin/python3 /usr/bin/python +# # Somehow, this makes gdal_calc.py accessible from anywhere in the Docker +# #https://www.continualintegration.com/miscellaneous-articles/all/how-do-you-troubleshoot-usr-bin-env-python-no-such-file-or-directory/ +# RUN ln -s /usr/bin/python3 /usr/bin/python # Enable ec2 to interact with GitHub RUN git config --global user.email dagibbs22@gmail.com @@ -82,7 +84,8 @@ RUN git config --global user.email dagibbs22@gmail.com #RUN git pull origin model_v_1.2.2 ## Compile C++ scripts -#RUN g++ /usr/local/app/emissions/cpp_util/calc_gross_emissions_generic.cpp -o /usr/local/app/emissions/cpp_util/calc_gross_emissions_generic.exe -lgdal && \ +RUN g++ /usr/local/app/emissions/cpp_util/calc_gross_emissions_generic.cpp -o /usr/local/app/emissions/cpp_util/calc_gross_emissions_generic.exe -lgdal +# RUN g++ /usr/local/app/emissions/cpp_util/calc_gross_emissions_generic.cpp -o /usr/local/app/emissions/cpp_util/calc_gross_emissions_generic.exe -lgdal && \ # g++ /usr/local/app/emissions/cpp_util/calc_gross_emissions_soil_only.cpp -o /usr/local/app/emissions/cpp_util/calc_gross_emissions_soil_only.exe -lgdal && \ # g++ /usr/local/app/emissions/cpp_util/calc_gross_emissions_no_shifting_ag.cpp -o /usr/local/app/emissions/cpp_util/calc_gross_emissions_no_shifting_ag.exe -lgdal && \ # g++ /usr/local/app/emissions/cpp_util/calc_gross_emissions_convert_to_grassland.cpp -o /usr/local/app/emissions/cpp_util/calc_gross_emissions_convert_to_grassland.exe -lgdal diff --git a/requirements.txt b/requirements.txt index d1baa6e6..2eb23873 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,14 +1,14 @@ -cftime~=1.4.1 -awscli~=1.16.50 -boto3~=1.9.40 -botocore~=1.12.40 -netCDF4~=1.4.2 -numpy~=1.15.4 -pandas~=0.23.4 -psycopg2~=2.7.4 -rasterio~=1.1.5 -scipy~=1.1.0 -simpledbf~=0.2.6 -virtualenv~=16.0.0 -xlrd~=1.1.0 +cftime +awscli +boto3 +botocore +netCDF4 +numpy>=1.18.5 +openpyxl +pandas +psycopg2 +rasterio +scipy +simpledbf +virtualenv psutil diff --git a/run_full_model.py b/run_full_model.py index f10b4099..41e46c4d 100644 --- a/run_full_model.py +++ b/run_full_model.py @@ -12,10 +12,13 @@ starting from the beginning; get carbon pools at time of loss; emissions from biomass and soil python run_full_model.py -si -t std -s all -r -d 20229999 -l 00N_000E -ce loss -p biomass_soil -tcd 30 -ln "00N_000E test" +Run 00N_000E in standard model; save intermediate outputs; do not upload outputs to s3; run all model stages; +starting from the beginning; get carbon pools at time of loss; emissions from biomass and soil +python run_full_model.py -si -t std -s all -r -nu -d 20229999 -l 00N_000E -ce loss -p biomass_soil -tcd 30 -ln "00N_000E test" + FULL STANDARD MODEL RUN: Run all tiles in standard model; save intermediate outputs; do upload outputs to s3; run all model stages; starting from the beginning; get carbon pools at time of loss; emissions from biomass and soil python run_full_model.py -si -t std -s all -r -l all -ce loss -p biomass_soil -tcd 30 -ln "Running all tiles" - ''' import argparse From e068146ac2905d03461ffa6e7765b321f2e3f36a Mon Sep 17 00:00:00 2001 From: dagibbs22 Date: Fri, 26 Aug 2022 14:35:33 -0400 Subject: [PATCH 3/9] Feature/add testing and linting (#26) * Froze all dependencies in `requirements.txt` to their current version. Also, added testing folder and file but haven't tried using them yet. * Used pylint on mp_create_carbon_pools.py. Addressed pretty much all the messages I wanted to. * Continued delinting in create_carbon_pools. Changed all obvious print statements in mp_create_carbon_pools.py and create_carbon_pools.py to fprint. Added docstrings to each function. Testing carbon pool creation for 00N_000E seems to work fine. * Experimenting with setting some variables as global so that I don't have to pass them as arguments: sensit_type, no_upload, save_intermediates, etc. For saving and modifying variables between files, this page seems to be helpful: https://thewebdev.info/2021/10/19/how-to-use-global-variables-between-files-in-python/#:~:text=To%20use%20global%20variables%20between%20files%20in%20Python%2C%20we%20can,reference%20the%20global%20variable%20directly.&text=We%20import%20the%20settings%20and,Then%20we%20call%20settings. * Testing global variables with no_upload. I seem to be able to reset the global variable from run_full_model.py, including in the log. Need to make sure this is actually carrying through to uploading. * Added global variables to constants_and_names.py and top of run_full_model.py. * Changed run_full_model.py through carbon pool step. * Changed carbon pool creation to use global variables. Decided to have functions pass carbon_pool_extent because it's a key parameter of carbon pool creation. * Changed all model stages to use global variables from the command line. Still testing that I didn't break anything in local runs. * Changed some universal_util.py functions to use the global variables instead of passing arguments to them. * Starting to change print statements to f'' print statements throughout the model. * Changed to f print statements for model extent and forest age category steps. * Changed to f print statements for entire removals model. * Changed to f print statements for carbon, emissions, and analyses. Haven't changed in universal_util or constants_and_names. Haven't checked if everything is working alright. * Changed to f print statements for universal_util.py. Didn't change arguments to gdal commands for the most part, though. * Used pylint on all regular model steps and run_full_model.py. Fixed most message that weren't about importing, too many variables, too many statements, or too many branches. I'll work on those structural issues later. * Testing 00N_000E locally with linting of run_full_model.py and all model stages through net flux. Going to try running it on an ec2 instance now. * 00N_000 works in a full local run and 00N_020E works in a full ec2 run. I've linted enough for now. --- .pylintrc | 5 + analyses/aggregate_results_to_4_km.py | 42 +- analyses/create_supplementary_outputs.py | 40 +- analyses/download_tile_set.py | 4 +- analyses/mp_aggregate_results_to_4_km.py | 129 +++--- analyses/mp_create_supplementary_outputs.py | 75 ++-- analyses/mp_net_flux.py | 77 ++-- analyses/mp_tile_statistics.py | 4 +- analyses/net_flux.py | 47 +- burn_date/hansen_burnyear_final.py | 2 +- burn_date/mp_burn_year.py | 6 +- carbon_pools/create_carbon_pools.py | 404 +++++++++-------- carbon_pools/mp_create_carbon_pools.py | 268 +++++------ carbon_pools/mp_create_soil_C.py | 16 +- constants_and_names.py | 31 +- .../continent_ecozone_tiles.py | 0 .../create_inputs_for_C_pools.py | 0 data_prep/model_extent.py | 89 ++-- .../mp_continent_ecozone_tiles.py | 4 +- .../mp_create_inputs_for_C_pools.py | 4 +- data_prep/mp_mangrove_processing.py | 12 +- data_prep/mp_model_extent.py | 104 ++--- .../mp_peatland_processing.py | 4 +- data_prep/mp_plantation_preparation.py | 4 +- data_prep/mp_prep_other_inputs.py | 30 +- data_prep/mp_rewindow_tiles.py | 8 +- .../peatland_processing.py | 2 +- emissions/calculate_gross_emissions.py | 65 ++- emissions/mp_calculate_gross_emissions.py | 159 +++---- removals/US_removal_rates.py | 6 +- ...nual_gain_rate_AGC_BGC_all_forest_types.py | 132 +++--- removals/annual_gain_rate_IPCC_defaults.py | 52 ++- removals/annual_gain_rate_mangrove.py | 12 +- removals/forest_age_category_IPCC.py | 103 +++-- removals/gain_year_count_all_forest_types.py | 265 ++++++----- removals/gross_removals_all_forest_types.py | 57 +-- removals/mp_US_removal_rates.py | 38 +- ...nual_gain_rate_AGC_BGC_all_forest_types.py | 110 ++--- removals/mp_annual_gain_rate_IPCC_defaults.py | 107 ++--- removals/mp_annual_gain_rate_mangrove.py | 35 +- removals/mp_forest_age_category_IPCC.py | 84 ++-- .../mp_gain_year_count_all_forest_types.py | 185 ++++---- .../mp_gross_removals_all_forest_types.py | 109 ++--- requirements.txt | 28 +- run_full_model.py | 419 +++++++++--------- sensitivity_analysis/US_removal_rates.py | 2 +- sensitivity_analysis/legal_AMZ_loss.py | 22 +- sensitivity_analysis/mp_Mekong_loss.py | 5 +- .../mp_Saatchi_biomass_prep.py | 5 +- sensitivity_analysis/mp_US_removal_rates.py | 8 +- sensitivity_analysis/mp_legal_AMZ_loss.py | 52 +-- test/__init__.py | 0 test/carbon_pools/__init__.py | 0 test/carbon_pools/test_carbon_pools.py | 64 +++ universal_util.py | 383 ++++++++-------- 55 files changed, 2114 insertions(+), 1804 deletions(-) create mode 100644 .pylintrc rename {removals => data_prep}/continent_ecozone_tiles.py (100%) rename {carbon_pools => data_prep}/create_inputs_for_C_pools.py (100%) rename {removals => data_prep}/mp_continent_ecozone_tiles.py (96%) rename {carbon_pools => data_prep}/mp_create_inputs_for_C_pools.py (97%) rename {emissions => data_prep}/mp_peatland_processing.py (96%) rename {emissions => data_prep}/peatland_processing.py (99%) create mode 100644 test/__init__.py create mode 100644 test/carbon_pools/__init__.py create mode 100644 test/carbon_pools/test_carbon_pools.py diff --git a/.pylintrc b/.pylintrc new file mode 100644 index 00000000..ab782eb3 --- /dev/null +++ b/.pylintrc @@ -0,0 +1,5 @@ +# .pylintrc + +[MASTER] + +disable=line-too-long, redefined-outer-name, invalid-name \ No newline at end of file diff --git a/analyses/aggregate_results_to_4_km.py b/analyses/aggregate_results_to_4_km.py index a97a4db6..3e76389c 100644 --- a/analyses/aggregate_results_to_4_km.py +++ b/analyses/aggregate_results_to_4_km.py @@ -34,7 +34,7 @@ # 0.1x0.1 degree resolution (approximately 10m in the tropics). # Each pixel in that raster is the sum of the 30m pixels converted to value/pixel (instead of value/ha). # The 0.1x0.1 degree tile is output. -def aggregate(tile, thresh, sensit_type, no_upload): +def aggregate(tile, thresh): # start time start = datetime.datetime.now() @@ -45,11 +45,11 @@ def aggregate(tile, thresh, sensit_type, no_upload): xmin, ymin, xmax, ymax = uu.coords(tile_id) # Name of inputs - focal_tile_rewindow = '{0}_{1}_rewindow.tif'.format(tile_id, tile_type) - pixel_area_rewindow = '{0}_{1}.tif'.format(cn.pattern_pixel_area_rewindow, tile_id) - tcd_rewindow = '{0}_{1}.tif'.format(cn.pattern_tcd_rewindow, tile_id) - gain_rewindow = '{0}_{1}.tif'.format(cn.pattern_gain_rewindow, tile_id) - mangrove_rewindow = '{0}_{1}.tif'.format(tile_id, cn.pattern_mangrove_biomass_2000_rewindow) + focal_tile_rewindow = f'{tile_id}_{tile_type}_rewindow.tif' + pixel_area_rewindow = f'{cn.pattern_pixel_area_rewindow}_{tile_id}.tif' + tcd_rewindow = f'{cn.pattern_tcd_rewindow}_{tile_id}.tif' + gain_rewindow = f'{cn.pattern_gain_rewindow}_{tile_id}.tif' + mangrove_rewindow = f'{tile_id}_{cn.pattern_mangrove_biomass_2000_rewindow}.tif' # Opens input tiles for rasterio in_src = rasterio.open(focal_tile_rewindow) @@ -59,11 +59,11 @@ def aggregate(tile, thresh, sensit_type, no_upload): try: mangrove_src = rasterio.open(mangrove_rewindow) - uu.print_log(" Mangrove tile found for {}".format(tile_id)) + uu.print_log(f' Mangrove tile found for {tile_id}') except: - uu.print_log(" No mangrove tile found for {}".format(tile_id)) + uu.print_log(f' No mangrove tile found for {tile_id}') - uu.print_log(" Converting {} to per-pixel values...".format(tile)) + uu.print_log(f' Converting {tile} to per-pixel values...') # Grabs the windows of the tile (stripes) in order to iterate over the entire tif without running out of memory windows = in_src.block_windows(1) @@ -71,7 +71,7 @@ def aggregate(tile, thresh, sensit_type, no_upload): #2D array in which the 0.04x0.04 deg aggregated sums will be stored sum_array = np.zeros([250,250], 'float32') - out_raster = "{0}_{1}_0_04deg.tif".format(tile_id, tile_type) + out_raster = f'{tile_id}_{tile_type}_0_04deg.tif' uu.check_memory() @@ -129,7 +129,7 @@ def aggregate(tile, thresh, sensit_type, no_upload): if cn.pattern_net_flux in tile_type: sum_array = sum_array / cn.loss_years / cn.tonnes_to_megatonnes - uu.print_log(" Creating aggregated tile for {}...".format(tile)) + uu.print_log(f' Creating aggregated tile for {tile}...') # Converts array to the same output type as the raster that is created below sum_array = np.float32(sum_array) @@ -148,7 +148,7 @@ def aggregate(tile, thresh, sensit_type, no_upload): # print(aggregated) # aggregated.update_tags(a="1") # print(aggregated.tags()) - # uu.add_rasterio_tags(aggregated, sensit_type) + # uu.add_rasterio_tags(aggregated) # print(aggregated.tags()) # if cn.pattern_annual_gain_AGC_all_types in tile_type: # aggregated.update_tags(units='Mg aboveground carbon/pixel, where pixels are 0.04x0.04 degrees)', @@ -185,12 +185,12 @@ def aggregate(tile, thresh, sensit_type, no_upload): # aggregated.close() # Prints information about the tile that was just processed - uu.end_of_fx_summary(start, tile_id, '{}_0_04deg'.format(tile_type), no_upload) + uu.end_of_fx_summary(start, tile_id, f'{tile_type}_0_04deg') # Calculates the percent difference between the standard model's net flux output # and the sensitivity model's net flux output -def percent_diff(std_aggreg_flux, sensit_aggreg_flux, sensit_type, no_upload): +def percent_diff(std_aggreg_flux, sensit_aggreg_flux): # start time start = datetime.datetime.now() @@ -207,7 +207,7 @@ def percent_diff(std_aggreg_flux, sensit_aggreg_flux, sensit_type, no_upload): # fine for all the other analyses, though (including legal_Amazon_loss). # Maybe that divide by 0 is throwing off other values now. perc_diff_calc = '--calc=(A-B)/absolute(B)*100' - perc_diff_outfilename = '{0}_{1}_{2}.tif'.format(cn.pattern_aggreg_sensit_perc_diff, sensit_type, date_formatted) + perc_diff_outfilename = '{0}_{1}_{2}.tif'.format(cn.pattern_aggreg_sensit_perc_diff, cn.SENSIT_TYPE, date_formatted) perc_diff_outfilearg = '--outfile={}'.format(perc_diff_outfilename) # cmd = ['gdal_calc.py', '-A', sensit_aggreg_flux, '-B', std_aggreg_flux, perc_diff_calc, perc_diff_outfilearg, # '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=DEFLATE', '--quiet'] @@ -216,11 +216,11 @@ def percent_diff(std_aggreg_flux, sensit_aggreg_flux, sensit_type, no_upload): uu.log_subprocess_output_full(cmd) # Prints information about the tile that was just processed - uu.end_of_fx_summary(start, 'global', sensit_aggreg_flux, no_upload) + uu.end_of_fx_summary(start, 'global', sensit_aggreg_flux) # Maps where the sources stay sources, sinks stay sinks, sources become sinks, and sinks become sources -def sign_change(std_aggreg_flux, sensit_aggreg_flux, sensit_type, no_upload): +def sign_change(std_aggreg_flux, sensit_aggreg_flux): # start time start = datetime.datetime.now() @@ -240,14 +240,14 @@ def sign_change(std_aggreg_flux, sensit_aggreg_flux, sensit_type, no_upload): sensit_src = rasterio.open(sensit_aggreg_flux) # Creates the sign change raster - dst = rasterio.open('{0}_{1}_{2}.tif'.format(cn.pattern_aggreg_sensit_sign_change, sensit_type, date_formatted), 'w', **kwargs) + dst = rasterio.open('{0}_{1}_{2}.tif'.format(cn.pattern_aggreg_sensit_sign_change, cn.SENSIT_TYPE, date_formatted), 'w', **kwargs) # Adds metadata tags to the output raster - uu.add_rasterio_tags(dst, sensit_type) + uu.add_universal_metadata_rasterio(dst) dst.update_tags( key='1=stays net source. 2=stays net sink. 3=changes from net source to net sink. 4=changes from net sink to net source.') dst.update_tags( - source='Comparison of net flux at 0.04x0.04 degrees from standard model to net flux from {} sensitivity analysis'.format(sensit_type)) + source='Comparison of net flux at 0.04x0.04 degrees from standard model to net flux from {} sensitivity analysis'.format(cn.SENSIT_TYPE)) dst.update_tags( extent='Global') @@ -273,4 +273,4 @@ def sign_change(std_aggreg_flux, sensit_aggreg_flux, sensit_type, no_upload): # Prints information about the tile that was just processed - uu.end_of_fx_summary(start, 'global', sensit_aggreg_flux, no_upload) + uu.end_of_fx_summary(start, 'global', sensit_aggreg_flux) diff --git a/analyses/create_supplementary_outputs.py b/analyses/create_supplementary_outputs.py index 244cec63..e9b39139 100644 --- a/analyses/create_supplementary_outputs.py +++ b/analyses/create_supplementary_outputs.py @@ -24,7 +24,7 @@ import constants_and_names as cn import universal_util as uu -def create_supplementary_outputs(tile_id, input_pattern, output_patterns, sensit_type, no_upload): +def create_supplementary_outputs(tile_id, input_pattern, output_patterns): # start time start = datetime.datetime.now() @@ -33,18 +33,18 @@ def create_supplementary_outputs(tile_id, input_pattern, output_patterns, sensit tile_id = uu.get_tile_id(tile_id) # Names of inputs - focal_tile = '{0}_{1}.tif'.format(tile_id, input_pattern) - pixel_area = '{0}_{1}.tif'.format(cn.pattern_pixel_area, tile_id) - tcd = '{0}_{1}.tif'.format(cn.pattern_tcd, tile_id) - gain = '{0}_{1}.tif'.format(cn.pattern_gain, tile_id) - mangrove = '{0}_{1}.tif'.format(tile_id, cn.pattern_mangrove_biomass_2000) + focal_tile = f'{tile_id}_{input_pattern}.tif' + pixel_area = f'{cn.pattern_pixel_area}_{tile_id}.tif' + tcd = f'{cn.pattern_tcd}_{tile_id}.tif' + gain = f'{cn.pattern_gain}_{tile_id}.tif' + mangrove = f'{tile_id}_{cn.pattern_mangrove_biomass_2000}.tif' # Names of outputs. # Requires that output patterns be listed in main script in the correct order for here # (currently, per pixel full extent, per hectare forest extent, per pixel forest extent). - per_pixel_full_extent = '{0}_{1}.tif'.format(tile_id, output_patterns[0]) - per_hectare_forest_extent = '{0}_{1}.tif'.format(tile_id, output_patterns[1]) - per_pixel_forest_extent = '{0}_{1}.tif'.format(tile_id, output_patterns[2]) + per_pixel_full_extent = f'{tile_id}_{output_patterns[0]}.tif' + per_hectare_forest_extent = f'{tile_id}_{output_patterns[1]}.tif' + per_pixel_forest_extent = f'{tile_id}_{output_patterns[2]}.tif' # Opens input tiles for rasterio in_src = rasterio.open(focal_tile) @@ -59,11 +59,11 @@ def create_supplementary_outputs(tile_id, input_pattern, output_patterns, sensit try: mangrove_src = rasterio.open(mangrove) - uu.print_log(" Mangrove tile found for {}".format(tile_id)) + uu.print_log(f' Mangrove tile found for {tile_id}') except: - uu.print_log(" No mangrove tile found for {}".format(tile_id)) + uu.print_log(f' No mangrove tile found for {tile_id}') - uu.print_log(" Creating outputs for {}...".format(focal_tile)) + uu.print_log(f' Creating outputs for {focal_tile}...') kwargs.update( driver='GTiff', @@ -80,25 +80,25 @@ def create_supplementary_outputs(tile_id, input_pattern, output_patterns, sensit # Adds metadata tags to the output rasters - uu.add_rasterio_tags(per_pixel_full_extent_dst, sensit_type) + uu.add_universal_metadata_rasterio(per_pixel_full_extent_dst) per_pixel_full_extent_dst.update_tags( - units='Mg CO2e/pixel over model duration (2001-20{})'.format(cn.loss_years)) + units=f'Mg CO2e/pixel over model duration (2001-20{cn.loss_years})') per_pixel_full_extent_dst.update_tags( source='per hectare full model extent tile') per_pixel_full_extent_dst.update_tags( extent='Full model extent: ((TCD2000>0 AND WHRC AGB2000>0) OR Hansen gain=1 OR mangrove AGB2000>0) NOT IN pre-2000 plantations') - uu.add_rasterio_tags(per_hectare_forest_extent_dst, sensit_type) + uu.add_universal_metadata_rasterio(per_hectare_forest_extent_dst) per_hectare_forest_extent_dst.update_tags( - units='Mg CO2e/hectare over model duration (2001-20{})'.format(cn.loss_years)) + units=f'Mg CO2e/hectare over model duration (2001-20{cn.loss_years})') per_hectare_forest_extent_dst.update_tags( source='per hectare full model extent tile') per_hectare_forest_extent_dst.update_tags( extent='Forest extent: ((TCD2000>30 AND WHRC AGB2000>0) OR Hansen gain=1 OR mangrove AGB2000>0) NOT IN pre-2000 plantations') - uu.add_rasterio_tags(per_pixel_forest_extent_dst, sensit_type) + uu.add_universal_metadata_rasterio(per_pixel_forest_extent_dst) per_pixel_forest_extent_dst.update_tags( - units='Mg CO2e/pixel over model duration (2001-20{})'.format(cn.loss_years)) + units=f'Mg CO2e/pixel over model duration (2001-20{cn.loss_years})') per_pixel_forest_extent_dst.update_tags( source='per hectare forest model extent tile') per_pixel_forest_extent_dst.update_tags( @@ -143,7 +143,7 @@ def create_supplementary_outputs(tile_id, input_pattern, output_patterns, sensit per_hectare_forest_extent_dst.write_band(1, dst_window_per_hectare_forest_extent, window=window) per_pixel_forest_extent_dst.write_band(1, dst_window_per_pixel_forest_extent, window=window) - uu.print_log(" Output tiles created for {}...".format(tile_id)) + uu.print_log(f' Output tiles created for {tile_id}...') # Prints information about the tile that was just processed - uu.end_of_fx_summary(start, tile_id, output_patterns[0], no_upload) \ No newline at end of file + uu.end_of_fx_summary(start, tile_id, output_patterns[0]) \ No newline at end of file diff --git a/analyses/download_tile_set.py b/analyses/download_tile_set.py index 9d174d37..d3a48297 100644 --- a/analyses/download_tile_set.py +++ b/analyses/download_tile_set.py @@ -103,7 +103,7 @@ def download_tile_set(sensit_type, tile_id_list): parser = argparse.ArgumentParser( description='Download model outputs for specific tile') parser.add_argument('--model-type', '-t', required=True, - help='{}'.format(cn.model_type_arg_help)) + help=f'{cn.model_type_arg_help}') parser.add_argument('--tile_id_list', '-l', required=True, help='List of tile ids to use in the model. Should be of form 00N_110E or 00N_110E,00N_120E or all.') parser.add_argument('--run-date', '-d', required=False, @@ -114,7 +114,7 @@ def download_tile_set(sensit_type, tile_id_list): run_date = args.run_date # Create the output log - uu.initiate_log(tile_id_list=tile_id_list, sensit_type=sensit_type, run_date=run_date) + uu.initiate_log(tile_id_list) # Checks whether the sensitivity analysis and tile_id_list arguments are valid uu.check_sensit_type(sensit_type) diff --git a/analyses/mp_aggregate_results_to_4_km.py b/analyses/mp_aggregate_results_to_4_km.py index e8713e1b..f9e6b81c 100644 --- a/analyses/mp_aggregate_results_to_4_km.py +++ b/analyses/mp_aggregate_results_to_4_km.py @@ -32,7 +32,7 @@ import aggregate_results_to_4_km -def mp_aggregate_results_to_4_km(sensit_type, thresh, tile_id_list, std_net_flux = None, run_date = None, no_upload = None): +def mp_aggregate_results_to_4_km(tile_id_list, thresh, std_net_flux = None): os.chdir(cn.docker_base_dir) @@ -46,32 +46,32 @@ def mp_aggregate_results_to_4_km(sensit_type, thresh, tile_id_list, std_net_flux # Checks whether the canopy cover argument is valid if thresh < 0 or thresh > 99: - uu.exception_log(no_upload, 'Invalid tcd. Please provide an integer between 0 and 99.') + uu.exception_log('Invalid tcd. Please provide an integer between 0 and 99.') # Pixel area tiles-- necessary for calculating sum of pixels for any set of tiles - uu.s3_flexible_download(cn.pixel_area_rewindow_dir, cn.pattern_pixel_area_rewindow, cn.docker_base_dir, sensit_type, tile_id_list) + uu.s3_flexible_download(cn.pixel_area_rewindow_dir, cn.pattern_pixel_area_rewindow, cn.docker_base_dir, cn.SENSIT_TYPE, tile_id_list) # Tree cover density, Hansen gain, and mangrove biomass tiles-- necessary for filtering sums to model extent - uu.s3_flexible_download(cn.tcd_rewindow_dir, cn.pattern_tcd_rewindow, cn.docker_base_dir, sensit_type, tile_id_list) - uu.s3_flexible_download(cn.gain_rewindow_dir, cn.pattern_gain_rewindow, cn.docker_base_dir, sensit_type, tile_id_list) - uu.s3_flexible_download(cn.mangrove_biomass_2000_rewindow_dir, cn.pattern_mangrove_biomass_2000_rewindow, cn.docker_base_dir, sensit_type, tile_id_list) + uu.s3_flexible_download(cn.tcd_rewindow_dir, cn.pattern_tcd_rewindow, cn.docker_base_dir, cn.SENSIT_TYPE, tile_id_list) + uu.s3_flexible_download(cn.gain_rewindow_dir, cn.pattern_gain_rewindow, cn.docker_base_dir, cn.SENSIT_TYPE, tile_id_list) + uu.s3_flexible_download(cn.mangrove_biomass_2000_rewindow_dir, cn.pattern_mangrove_biomass_2000_rewindow, cn.docker_base_dir, cn.SENSIT_TYPE, tile_id_list) - uu.print_log("Model outputs to process are:", download_dict) + uu.print_log(f'Model outputs to process are: {download_dict}') # List of output directories. Modified later for sensitivity analysis. # Output pattern is determined later. output_dir_list = [cn.output_aggreg_dir] # If the model run isn't the standard one, the output directory is changed - if sensit_type != 'std': - uu.print_log("Changing output directory and file name pattern based on sensitivity analysis") - output_dir_list = uu.alter_dirs(sensit_type, output_dir_list) + if cn.SENSIT_TYPE != 'std': + uu.print_log('Changing output directory and file name pattern based on sensitivity analysis') + output_dir_list = uu.alter_dirs(cn.SENSIT_TYPE, output_dir_list) # A date can optionally be provided by the full model script or a run of this script. # This replaces the date in constants_and_names. # Only done if output upload is enabled. - if run_date is not None and no_upload is not None: - output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date) + if cn.RUN_DATE is not None and cn.NO_UPLOAD is not None: + output_dir_list = uu.replace_output_dir_date(output_dir_list, cn.RUN_DATE) # Iterates through the types of tiles to be processed @@ -80,12 +80,12 @@ def mp_aggregate_results_to_4_km(sensit_type, thresh, tile_id_list, std_net_flux download_pattern_name = download_pattern[0] # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list - uu.s3_flexible_download(dir, download_pattern_name, cn.docker_base_dir, sensit_type, tile_id_list) + uu.s3_flexible_download(dir, download_pattern_name, cn.docker_base_dir, cn.SENSIT_TYPE, tile_id_list) if tile_id_list == 'all': # List of tiles to run in the model - tile_id_list = uu.tile_list_s3(dir, sensit_type) + tile_id_list = uu.tile_list_s3(dir, cn.SENSIT_TYPE) # Gets an actual tile id to use as a dummy in creating the actual tile pattern local_tile_list = uu.tile_list_spot_machine(cn.docker_base_dir, download_pattern_name) @@ -95,12 +95,12 @@ def mp_aggregate_results_to_4_km(sensit_type, thresh, tile_id_list, std_net_flux # The renaming function requires a whole tile name, so this passes a dummy time name that is then stripped a few # lines later. tile_id = sample_tile_id # a dummy tile id (but it has to be a real tile id). It is removed later. - output_pattern = uu.sensit_tile_rename(sensit_type, tile_id, download_pattern_name) + output_pattern = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, download_pattern_name) pattern = output_pattern[9:-4] # For sensitivity analysis runs, only aggregates the tiles if they were created as part of the sensitivity analysis - if (sensit_type != 'std') & (sensit_type not in pattern): - uu.print_log("{} not a sensitivity analysis output. Skipping aggregation...".format(pattern) + "\n") + if (cn.SENSIT_TYPE != 'std') & (cn.SENSIT_TYPE not in pattern): + uu.print_log(f'{pattern} not a sensitivity analysis output. Skipping aggregation...', "\n") continue @@ -115,21 +115,22 @@ def mp_aggregate_results_to_4_km(sensit_type, thresh, tile_id_list, std_net_flux # tile_list = ['00N_070W_cumul_gain_AGCO2_BGCO2_t_ha_all_forest_types_2001_15_biomass_swap.tif'] # test tiles - uu.print_log("There are {0} tiles to process for pattern {1}".format(str(len(tile_list)), download_pattern_name) + "\n") - uu.print_log("Processing:", dir, "; ", pattern) + uu.print_log(f'There are {str(len(tile_list))} tiles to process for pattern {download_pattern_name}', "\n") + uu.print_log(f'Processing: {dir}; {pattern}') # Converts the 10x10 degree Hansen tiles that are in windows of 40000x1 pixels to windows of 160x160 pixels, # which is the resolution of the output tiles. This will allow the 30x30 m pixels in each window to be summed. if cn.count == 96: - if sensit_type == 'biomass_swap': + if cn.SENSIT_TYPE == 'biomass_swap': processes = 12 # 12 processors = XXX GB peak else: processes = 16 # 16 processors = XXX GB peak else: processes = 8 - uu.print_log('Rewindow max processors=', processes) + uu.print_log(f'Rewindow max processors= {processes}') pool = multiprocessing.Pool(processes) - pool.map(partial(uu.rewindow, download_pattern_name=download_pattern_name, no_upload=no_upload), tile_id_list) + pool.map(partial(uu.rewindow, download_pattern_name=download_pattern_name), + tile_id_list) # Added these in response to error12: Cannot allocate memory error. # This fix was mentioned here: of https://stackoverflow.com/questions/26717120/python-cannot-allocate-memory-using-multiprocessing-pool # Could also try this: https://stackoverflow.com/questions/42584525/python-multiprocessing-debugging-oserror-errno-12-cannot-allocate-memory @@ -139,7 +140,7 @@ def mp_aggregate_results_to_4_km(sensit_type, thresh, tile_id_list, std_net_flux # # For single processor use # for tile_id in tile_id_list: # - # uu.rewindow(tile_id, download_pattern_name,no_upload) + # uu.rewindow(tile_id, download_pattern_name) # Converts the existing (per ha) values to per pixel values (e.g., emissions/ha to emissions/pixel) @@ -150,30 +151,30 @@ def mp_aggregate_results_to_4_km(sensit_type, thresh, tile_id_list, std_net_flux # The 0.04x0.04 degree tile is output. # For multiprocessor use. This used about 450 GB of memory with count/2, it's okay on an r4.16xlarge if cn.count == 96: - if sensit_type == 'biomass_swap': + if cn.SENSIT_TYPE == 'biomass_swap': processes = 10 # 10 processors = XXX GB peak else: processes = 12 # 16 processors = 180 GB peak; 16 = XXX GB peak; 20 = >750 GB (maxed out) else: processes = 8 - uu.print_log('Conversion to per pixel and aggregate max processors=', processes) + uu.print_log(f'Conversion to per pixel and aggregate max processors={processes}') pool = multiprocessing.Pool(processes) - pool.map(partial(aggregate_results_to_4_km.aggregate, thresh=thresh, sensit_type=sensit_type, - no_upload=no_upload), tile_list) + pool.map(partial(aggregate_results_to_4_km.aggregate, thresh=thresh), + tile_list) pool.close() pool.join() # # For single processor use # for tile in tile_list: # - # aggregate_results_to_4_km.aggregate(tile, thresh, sensit_type, no_upload) + # aggregate_results_to_4_km.aggregate(tile, thresh) # Makes a vrt of all the output 10x10 tiles (10 km resolution) - out_vrt = "{}_0_04deg.vrt".format(pattern) + out_vrt = f'{pattern}_0_04deg.vrt' os.system('gdalbuildvrt -tr 0.04 0.04 {0} *{1}_0_04deg*.tif'.format(out_vrt, pattern)) # Creates the output name for the 10km map - out_pattern = uu.name_aggregated_output(download_pattern_name, thresh, sensit_type) + out_pattern = uu.name_aggregated_output(download_pattern_name, thresh) uu.print_log(out_pattern) # Produces a single raster of all the 10x10 tiles (0.04 degree resolution) @@ -184,7 +185,7 @@ def mp_aggregate_results_to_4_km(sensit_type, thresh, tile_id_list, std_net_flux # Adds metadata tags to output rasters - uu.add_universal_metadata_tags('{0}.tif'.format(out_pattern), sensit_type) + uu.add_universal_metadata_gdal(f'{out_pattern}.tif') # Units are different for annual removal factor, so metadata has to reflect that if 'annual_removal_factor' in out_pattern: @@ -193,7 +194,7 @@ def mp_aggregate_results_to_4_km(sensit_type, thresh, tile_id_list, std_net_flux '-mo', 'source=per hectare version of the same model output, aggregated from 0.00025x0.00025 degree pixels', '-mo', 'extent=Global', '-mo', 'scale=negative values are removals', - '-mo', 'treecover_density_threshold={0} (only model pixels with canopy cover > {0} are included in aggregation'.format(thresh), + '-mo', f'treecover_density_threshold={thresh} (only model pixels with canopy cover > {thresh} are included in aggregation', '{0}.tif'.format(out_pattern)] uu.log_subprocess_output_full(cmd) @@ -202,13 +203,13 @@ def mp_aggregate_results_to_4_km(sensit_type, thresh, tile_id_list, std_net_flux '-mo', 'units=Mg CO2e/yr/pixel, where pixels are 0.04x0.04 degrees', '-mo', 'source=per hectare version of the same model output, aggregated from 0.00025x0.00025 degree pixels', '-mo', 'extent=Global', - '-mo', 'treecover_density_threshold={0} (only model pixels with canopy cover > {0} are included in aggregation'.format(thresh), + '-mo', f'treecover_density_threshold={thresh} (only model pixels with canopy cover > {thresh} are included in aggregation', '{0}.tif'.format(out_pattern)] uu.log_subprocess_output_full(cmd) - # If no_upload flag is not activated (by choice or by lack of AWS credentials), output is uploaded - if not no_upload: + # If cn.NO_UPLOAD flag is not activated (by choice or by lack of AWS credentials), output is uploaded + if not cn.NO_UPLOAD: uu.print_log("Tiles processed. Uploading to s3 now...") uu.upload_final_set(output_dir_list[0], out_pattern) @@ -220,14 +221,14 @@ def mp_aggregate_results_to_4_km(sensit_type, thresh, tile_id_list, std_net_flux for tile_name in tile_list: tile_id = uu.get_tile_id(tile_name) - os.remove('{0}_{1}_rewindow.tif'.format(tile_id, pattern)) - os.remove('{0}_{1}_0_04deg.tif'.format(tile_id, pattern)) + os.remove(f'{tile_id}_{pattern}_rewindow.tif') + os.remove(f'{tile_id}_{pattern}_0_04deg.tif') # Need to delete rewindowed tiles so they aren't confused with the normal tiles for creation of supplementary outputs rewindow_list = glob.glob('*rewindow*tif') for rewindow_tile in rewindow_list: os.remove(rewindow_tile) - uu.print_log("Deleted all rewindowed tiles") + uu.print_log('Deleted all rewindowed tiles') # Compares the net flux from the standard model and the sensitivity analysis in two ways. @@ -237,43 +238,43 @@ def mp_aggregate_results_to_4_km(sensit_type, thresh, tile_id_list, std_net_flux # the outline of the US and clip the standard model net flux to the extent of JPL AGB2000. # Then, manually upload the clipped US_removals and biomass_swap net flux rasters to the spot machine and the # code below should work. - if sensit_type not in ['std', 'biomass_swap', 'US_removals', 'legal_Amazon_loss']: + if cn.SENSIT_TYPE not in ['std', 'biomass_swap', 'US_removals', 'legal_Amazon_loss']: if std_net_flux: - uu.print_log("Standard aggregated flux results provided. Creating comparison maps.") + uu.print_log('Standard aggregated flux results provided. Creating comparison maps.') # Copies the standard model aggregation outputs to s3. Only net flux is used, though. - uu.s3_file_download(std_net_flux, cn.docker_base_dir, sensit_type) + uu.s3_file_download(std_net_flux, cn.docker_base_dir, cn.SENSIT_TYPE) # Identifies the standard model net flux map std_aggreg_flux = os.path.split(std_net_flux)[1] try: # Identifies the sensitivity model net flux map - sensit_aggreg_flux = glob.glob('net_flux_Mt_CO2e_*{}*'.format(sensit_type))[0] + sensit_aggreg_flux = glob.glob('net_flux_Mt_CO2e_*{}*'.format(cn.SENSIT_TYPE))[0] - uu.print_log("Standard model net flux:", std_aggreg_flux) - uu.print_log("Sensitivity model net flux:", sensit_aggreg_flux) + uu.print_log(f'Standard model net flux: {std_aggreg_flux}') + uu.print_log(f'Sensitivity model net flux: {sensit_aggreg_flux}') except: uu.print_log('Cannot do comparison. One of the input flux tiles is not valid. Verify that both net flux rasters are on the spot machine.') - uu.print_log("Creating map of percent difference between standard and {} net flux".format(sensit_type)) - aggregate_results_to_4_km.percent_diff(std_aggreg_flux, sensit_aggreg_flux, sensit_type, no_upload) + uu.print_log(f'Creating map of percent difference between standard and {cn.SENSIT_TYPE} net flux') + aggregate_results_to_4_km.percent_diff(std_aggreg_flux, sensit_aggreg_flux) - uu.print_log("Creating map of which pixels change sign and which stay the same between standard and {}".format(sensit_type)) - aggregate_results_to_4_km.sign_change(std_aggreg_flux, sensit_aggreg_flux, sensit_type, no_upload) + uu.print_log(f'Creating map of which pixels change sign and which stay the same between standard and {cn.SENSIT_TYPE}') + aggregate_results_to_4_km.sign_change(std_aggreg_flux, sensit_aggreg_flux) - # If no_upload flag is not activated (by choice or by lack of AWS credentials), output is uploaded - if not no_upload: + # If cn.NO_UPLOAD flag is not activated (by choice or by lack of AWS credentials), output is uploaded + if not cn.NO_UPLOAD: uu.upload_final_set(output_dir_list[0], cn.pattern_aggreg_sensit_perc_diff) uu.upload_final_set(output_dir_list[0], cn.pattern_aggreg_sensit_sign_change) else: - uu.print_log("No standard aggregated flux results provided. Not creating comparison maps.") + uu.print_log('No standard aggregated flux results provided. Not creating comparison maps.') if __name__ == '__main__': @@ -282,7 +283,7 @@ def mp_aggregate_results_to_4_km(sensit_type, thresh, tile_id_list, std_net_flux parser = argparse.ArgumentParser( description='Create maps of model outputs at aggregated/coarser resolution') parser.add_argument('--model-type', '-t', required=True, - help='{}'.format(cn.model_type_arg_help)) + help=f'{cn.model_type_arg_help}') parser.add_argument('--tile_id_list', '-l', required=True, help='List of tile ids to use in the model. Should be of form 00N_110E or 00N_110E,00N_120E or all.') parser.add_argument('--tcd-threshold', '-tcd', required=False, default=cn.canopy_threshold, @@ -292,24 +293,26 @@ def mp_aggregate_results_to_4_km(sensit_type, thresh, tile_id_list, std_net_flux parser.add_argument('--no-upload', '-nu', action='store_true', help='Disables uploading of outputs to s3') args = parser.parse_args() - sensit_type = args.model_type + + # Sets global variables to the command line arguments + cn.SENSIT_TYPE = args.model_type + cn.RUN_DATE = args.run_date + cn.NO_UPLOAD = args.no_upload + cn.STD_NET_FLUX = args.std_net_flux_aggreg + cn.THRESH = args.tcd_threshold + thresh = int(cn.THRESH) + tile_id_list = args.tile_id_list - std_net_flux = args.std_net_flux_aggreg - thresh = args.tcd_threshold - thresh = int(thresh) - no_upload = args.no_upload # Disables upload to s3 if no AWS credentials are found in environment if not uu.check_aws_creds(): - no_upload = True + cn.NO_UPLOAD = True # Create the output log - uu.initiate_log(tile_id_list=tile_id_list, sensit_type=sensit_type, thresh=thresh, std_net_flux=std_net_flux, - no_upload=no_upload) + uu.initiate_log(tile_id_list) # Checks whether the sensitivity analysis and tile_id_list arguments are valid - uu.check_sensit_type(sensit_type) + uu.check_sensit_type(cn.SENSIT_TYPE) tile_id_list = uu.tile_id_list_check(tile_id_list) - mp_aggregate_results_to_4_km(sensit_type=sensit_type, tile_id_list=tile_id_list, thresh=thresh, - std_net_flux=std_net_flux, no_upload=no_upload) \ No newline at end of file + mp_aggregate_results_to_4_km(tile_id_list, cn.THRESH, std_net_flux=cn.STD_NET_FLUX) \ No newline at end of file diff --git a/analyses/mp_create_supplementary_outputs.py b/analyses/mp_create_supplementary_outputs.py index e08892d2..a8aa2660 100644 --- a/analyses/mp_create_supplementary_outputs.py +++ b/analyses/mp_create_supplementary_outputs.py @@ -28,7 +28,7 @@ sys.path.append(os.path.join(cn.docker_app,'analyses')) import create_supplementary_outputs -def mp_create_supplementary_outputs(sensit_type, tile_id_list, run_date = None, no_upload = None): +def mp_create_supplementary_outputs(tile_id_list): os.chdir(cn.docker_base_dir) @@ -37,10 +37,10 @@ def mp_create_supplementary_outputs(sensit_type, tile_id_list, run_date = None, # If a full model run is specified, the correct set of tiles for the particular script is listed if tile_id_list_outer == 'all': # List of tiles to run in the model - tile_id_list_outer = uu.tile_list_s3(cn.net_flux_dir, sensit_type) + tile_id_list_outer = uu.tile_list_s3(cn.net_flux_dir, cn.SENSIT_TYPE) uu.print_log(tile_id_list_outer) - uu.print_log("There are {} tiles to process".format(str(len(tile_id_list_outer))) + "\n") + uu.print_log(f'There are {str(len(tile_id_list_outer))} tiles to process', "\n") # Files to download for this script @@ -77,24 +77,24 @@ def mp_create_supplementary_outputs(sensit_type, tile_id_list, run_date = None, # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list # Pixel area tiles-- necessary for calculating per pixel values - uu.s3_flexible_download(cn.pixel_area_dir, cn.pattern_pixel_area, cn.docker_base_dir, sensit_type, tile_id_list_outer) + uu.s3_flexible_download(cn.pixel_area_dir, cn.pattern_pixel_area, cn.docker_base_dir, cn.SENSIT_TYPE, tile_id_list_outer) # Tree cover density, Hansen gain, and mangrove biomass tiles-- necessary for masking to forest extent - uu.s3_flexible_download(cn.tcd_dir, cn.pattern_tcd, cn.docker_base_dir, sensit_type, tile_id_list_outer) - uu.s3_flexible_download(cn.gain_dir, cn.pattern_gain, cn.docker_base_dir, sensit_type, tile_id_list_outer) - uu.s3_flexible_download(cn.mangrove_biomass_2000_dir, cn.pattern_mangrove_biomass_2000, cn.docker_base_dir, sensit_type, tile_id_list_outer) + uu.s3_flexible_download(cn.tcd_dir, cn.pattern_tcd, cn.docker_base_dir, cn.SENSIT_TYPE, tile_id_list_outer) + uu.s3_flexible_download(cn.gain_dir, cn.pattern_gain, cn.docker_base_dir, cn.SENSIT_TYPE, tile_id_list_outer) + uu.s3_flexible_download(cn.mangrove_biomass_2000_dir, cn.pattern_mangrove_biomass_2000, cn.docker_base_dir, cn.SENSIT_TYPE, tile_id_list_outer) - uu.print_log("Model outputs to process are:", download_dict) + uu.print_log(f'Model outputs to process are: {download_dict}') # If the model run isn't the standard one, the output directory is changed - if sensit_type != 'std': - uu.print_log("Changing output directory and file name pattern based on sensitivity analysis") - output_dir_list = uu.alter_dirs(sensit_type, output_dir_list) + if cn.SENSIT_TYPE != 'std': + uu.print_log('Changing output directory and file name pattern based on sensitivity analysis') + output_dir_list = uu.alter_dirs(cn.SENSIT_TYPE, output_dir_list) # A date can optionally be provided by the full model script or a run of this script. # This replaces the date in constants_and_names. # Only done if output upload is enabled. - if run_date is not None and no_upload is not None: - output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date) + if cn.RUN_DATE is not None and cn.NO_UPLOAD is not None: + output_dir_list = uu.replace_output_dir_date(output_dir_list, cn.RUN_DATE) # Iterates through input tile sets @@ -108,16 +108,16 @@ def mp_create_supplementary_outputs(sensit_type, tile_id_list, run_date = None, # A new list is named so that tile_id_list stays as the command line argument. if tile_id_list == 'all': # List of tiles to run in the model - tile_id_list_input = uu.tile_list_s3(input_dir, sensit_type) + tile_id_list_input = uu.tile_list_s3(input_dir, cn.SENSIT_TYPE) else: tile_id_list_input = tile_id_list_outer uu.print_log(tile_id_list_input) - uu.print_log("There are {} tiles to process".format(str(len(tile_id_list_input))) + "\n") + uu.print_log(f'There are {str(len(tile_id_list_input))} tiles to process', "\n") # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list - uu.print_log("Downloading tiles from", input_dir) - uu.s3_flexible_download(input_dir, input_pattern, cn.docker_base_dir, sensit_type, tile_id_list_input) + uu.print_log(f'Downloading tiles from {input_dir}') + uu.s3_flexible_download(input_dir, input_pattern, cn.docker_base_dir, cn.SENSIT_TYPE, tile_id_list_input) # Blank list of output patterns, populated below output_patterns = [] @@ -132,10 +132,10 @@ def mp_create_supplementary_outputs(sensit_type, tile_id_list, run_date = None, elif "net_flux" in input_pattern: output_patterns = output_pattern_list[6:9] else: - uu.exception_log(no_upload, "No output patterns found for input pattern. Please check.") + uu.exception_log('No output patterns found for input pattern. Please check.') - uu.print_log("Input pattern:", input_pattern) - uu.print_log("Output patterns:", output_patterns) + uu.print_log(f'Input pattern: {input_pattern}') + uu.print_log(f'Output patterns: {output_patterns}') # Gross removals: 20 processors = >740 GB peak; 15 = 570 GB peak; 17 = 660 GB peak; 18 = 670 GB peak # Gross emissions: 17 processors = 660 GB peak; 18 = 710 GB peak @@ -143,16 +143,17 @@ def mp_create_supplementary_outputs(sensit_type, tile_id_list, run_date = None, processes = 18 else: processes = 2 - uu.print_log("Creating derivative outputs for {0} with {1} processors...".format(input_pattern, processes)) + uu.print_log(f'Creating derivative outputs for {input_pattern} with {processes} processors...') pool = multiprocessing.Pool(processes) pool.map(partial(create_supplementary_outputs.create_supplementary_outputs, input_pattern=input_pattern, - output_patterns=output_patterns, sensit_type=sensit_type, no_upload=no_upload), tile_id_list_input) + output_patterns=output_patterns), + tile_id_list_input) pool.close() pool.join() # # For single processor use # for tile_id in tile_id_list_input: - # create_supplementary_outputs.create_supplementary_outputs(tile_id, input_pattern, output_patterns, sensit_type, no_upload) + # create_supplementary_outputs.create_supplementary_outputs(tile_id, input_pattern, output_patterns) # Checks the two forest extent output tiles created from each input tile for whether there is data in them. # Because the extent is restricted in the forest extent pixels, some tiles with pixels in the full extent @@ -160,22 +161,22 @@ def mp_create_supplementary_outputs(sensit_type, tile_id_list, run_date = None, for output_pattern in output_patterns[1:3]: if cn.count <= 2: # For local tests processes = 1 - uu.print_log("Checking for empty tiles of {0} pattern with {1} processors using light function...".format(output_pattern, processes)) + uu.print_log(f'Checking for empty tiles of {output_pattern} pattern with {processes} processors using light function...') pool = multiprocessing.Pool(processes) pool.map(partial(uu.check_and_delete_if_empty_light, output_pattern=output_pattern), tile_id_list_input) pool.close() pool.join() else: processes = 55 # 50 processors = 560 GB peak for gross removals; 55 = XXX GB peak - uu.print_log("Checking for empty tiles of {0} pattern with {1} processors...".format(output_pattern, processes)) + uu.print_log(f'Checking for empty tiles of {output_pattern} pattern with {processes} processors...') pool = multiprocessing.Pool(processes) pool.map(partial(uu.check_and_delete_if_empty, output_pattern=output_pattern), tile_id_list_input) pool.close() pool.join() - # If no_upload flag is not activated (by choice or by lack of AWS credentials), output is uploaded - if not no_upload: + # If cn.NO_UPLOAD flag is not activated (by choice or by lack of AWS credentials), output is uploaded + if not cn.NO_UPLOAD: for i in range(0, len(output_dir_list)): uu.upload_final_set(output_dir_list[i], output_pattern_list[i]) @@ -187,7 +188,7 @@ def mp_create_supplementary_outputs(sensit_type, tile_id_list, run_date = None, parser = argparse.ArgumentParser( description='Create tiles of model outputs at forest extent and per-pixel values') parser.add_argument('--model-type', '-t', required=True, - help='{}'.format(cn.model_type_arg_help)) + help=f'{cn.model_type_arg_help}') parser.add_argument('--tile_id_list', '-l', required=True, help='List of tile ids to use in the model. Should be of form 00N_110E or 00N_110E,00N_120E or all.') parser.add_argument('--run-date', '-d', required=False, @@ -195,21 +196,23 @@ def mp_create_supplementary_outputs(sensit_type, tile_id_list, run_date = None, parser.add_argument('--no-upload', '-nu', action='store_true', help='Disables uploading of outputs to s3') args = parser.parse_args() - sensit_type = args.model_type + + # Sets global variables to the command line arguments + cn.SENSIT_TYPE = args.model_type + cn.RUN_DATE = args.run_date + cn.NO_UPLOAD = args.no_upload + tile_id_list = args.tile_id_list - run_date = args.run_date - no_upload = args.no_upload # Disables upload to s3 if no AWS credentials are found in environment if not uu.check_aws_creds(): - no_upload = True + cn.NO_UPLOAD = True # Create the output log - uu.initiate_log(tile_id_list=tile_id_list, sensit_type=sensit_type, run_date=run_date, no_upload=no_upload) + uu.initiate_log(tile_id_list) # Checks whether the sensitivity analysis and tile_id_list arguments are valid - uu.check_sensit_type(sensit_type) + uu.check_sensit_type(cn.SENSIT_TYPE) tile_id_list = uu.tile_id_list_check(tile_id_list) - mp_create_supplementary_outputs(sensit_type=sensit_type, tile_id_list=tile_id_list, - run_date=run_date, no_upload=no_upload) \ No newline at end of file + mp_create_supplementary_outputs(tile_id_list=tile_id_list) \ No newline at end of file diff --git a/analyses/mp_net_flux.py b/analyses/mp_net_flux.py index 9501a7d5..09be0515 100644 --- a/analyses/mp_net_flux.py +++ b/analyses/mp_net_flux.py @@ -1,19 +1,26 @@ -### Calculates the net emissions over the study period, with units of Mg CO2e/ha on a pixel-by-pixel basis. -### This only uses gross emissions from biomass+soil (doesn't run with gross emissions from soil_only). +""" +Calculates the net GHG flux over the study period, with units of Mg CO2e/ha on a pixel-by-pixel basis. +This only uses gross emissions from biomass+soil (doesn't run with gross emissions from soil_only). +""" -import multiprocessing import argparse -import os -import datetime from functools import partial +import multiprocessing +import os import sys + sys.path.append('../') import constants_and_names as cn import universal_util as uu sys.path.append(os.path.join(cn.docker_app,'analyses')) import net_flux -def mp_net_flux(sensit_type, tile_id_list, run_date = None, no_upload = None): +def mp_net_flux(tile_id_list): + """ + :param tile_id_list: list of tile ids to process + :return: 1 set of tiles with net GHG flux (gross emissions minus gross removals). + Units: Mg CO2e/ha over the model period + """ os.chdir(cn.docker_base_dir) @@ -22,10 +29,10 @@ def mp_net_flux(sensit_type, tile_id_list, run_date = None, no_upload = None): # List of tiles to run in the model tile_id_list = uu.create_combined_tile_list(cn.gross_emis_all_gases_all_drivers_biomass_soil_dir, cn.cumul_gain_AGCO2_BGCO2_all_types_dir, - sensit_type=sensit_type) + sensit_type=cn.SENSIT_TYPE) uu.print_log(tile_id_list) - uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n") + uu.print_log(f'There are {str(len(tile_id_list))} tiles to process', "\n") # Files to download for this script @@ -42,46 +49,47 @@ def mp_net_flux(sensit_type, tile_id_list, run_date = None, no_upload = None): # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list for key, values in download_dict.items(): - dir = key + directory = key pattern = values[0] - uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list) + uu.s3_flexible_download(directory, pattern, cn.docker_base_dir, cn.SENSIT_TYPE, tile_id_list) # If the model run isn't the standard one, the output directory and file names are changed - if sensit_type != 'std': - uu.print_log("Changing output directory and file name pattern based on sensitivity analysis") - output_dir_list = uu.alter_dirs(sensit_type, output_dir_list) - output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list) + if cn.SENSIT_TYPE != 'std': + uu.print_log('Changing output directory and file name pattern based on sensitivity analysis') + output_dir_list = uu.alter_dirs(cn.SENSIT_TYPE, output_dir_list) + output_pattern_list = uu.alter_patterns(cn.SENSIT_TYPE, output_pattern_list) # A date can optionally be provided by the full model script or a run of this script. # This replaces the date in constants_and_names. # Only done if output upload is enabled. - if run_date is not None and no_upload is not None: - output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date) + if cn.RUN_DATE is not None and cn.NO_UPLOAD is not None: + output_dir_list = uu.replace_output_dir_date(output_dir_list, cn.RUN_DATE) # Creates a single filename pattern to pass to the multiprocessor call pattern = output_pattern_list[0] if cn.count == 96: - if sensit_type == 'biomass_swap': + if cn.SENSIT_TYPE == 'biomass_swap': processes = 32 # 32 processors = XXX GB peak else: processes = 40 # 38 = 690 GB peak; 40 = 715 GB peak else: processes = 9 - uu.print_log('Net flux max processors=', processes) - pool = multiprocessing.Pool(processes) - pool.map(partial(net_flux.net_calc, pattern=pattern, sensit_type=sensit_type, no_upload=no_upload), tile_id_list) - pool.close() - pool.join() + uu.print_log(f'Net flux max processors={processes}') + with multiprocessing.Pool(processes) as pool: + pool.map(partial(net_flux.net_calc, pattern=pattern), + tile_id_list) + pool.close() + pool.join() # # For single processor use # for tile_id in tile_id_list: - # net_flux.net_calc(tile_id, output_pattern_list[0], sensit_type, no_upload) + # net_flux.net_calc(tile_id, output_pattern_list[0]) - # If no_upload flag is not activated (by choice or by lack of AWS credentials), output is uploaded - if not no_upload: + # If cn.NO_UPLOAD flag is not activated (by choice or by lack of AWS credentials), output is uploaded + if not cn.NO_UPLOAD: uu.upload_final_set(output_dir_list[0], output_pattern_list[0]) @@ -91,7 +99,7 @@ def mp_net_flux(sensit_type, tile_id_list, run_date = None, no_upload = None): parser = argparse.ArgumentParser( description='Creates tiles of net GHG flux over model period') parser.add_argument('--model-type', '-t', required=True, - help='{}'.format(cn.model_type_arg_help)) + help=f'{cn.model_type_arg_help}') parser.add_argument('--tile_id_list', '-l', required=True, help='List of tile ids to use in the model. Should be of form 00N_110E or 00N_110E,00N_120E or all.') parser.add_argument('--run-date', '-d', required=False, @@ -99,20 +107,23 @@ def mp_net_flux(sensit_type, tile_id_list, run_date = None, no_upload = None): parser.add_argument('--no-upload', '-nu', action='store_true', help='Disables uploading of outputs to s3') args = parser.parse_args() - sensit_type = args.model_type + + # Sets global variables to the command line arguments + cn.SENSIT_TYPE = args.model_type + cn.RUN_DATE = args.run_date + cn.NO_UPLOAD = args.no_upload + tile_id_list = args.tile_id_list - run_date = args.run_date - no_upload = args.no_upload # Disables upload to s3 if no AWS credentials are found in environment if not uu.check_aws_creds(): - no_upload = True + cn.NO_UPLOAD = True # Create the output log - uu.initiate_log(tile_id_list=tile_id_list, sensit_type=sensit_type, run_date=run_date, no_upload=no_upload) + uu.initiate_log(tile_id_list) # Checks whether the sensitivity analysis and tile_id_list arguments are valid - uu.check_sensit_type(sensit_type) + uu.check_sensit_type(cn.SENSIT_TYPE) tile_id_list = uu.tile_id_list_check(tile_id_list) - mp_net_flux(sensit_type=sensit_type, tile_id_list=tile_id_list, run_date=run_date, no_upload=no_upload) \ No newline at end of file + mp_net_flux(tile_id_list) diff --git a/analyses/mp_tile_statistics.py b/analyses/mp_tile_statistics.py index 82ac336e..7b28c1cd 100644 --- a/analyses/mp_tile_statistics.py +++ b/analyses/mp_tile_statistics.py @@ -197,7 +197,7 @@ def mp_tile_statistics(sensit_type, tile_id_list): parser = argparse.ArgumentParser( description='Create tiles of the annual AGB and BGB removals rates for mangrove forests') parser.add_argument('--model-type', '-t', required=True, - help='{}'.format(cn.model_type_arg_help)) + help=f'{cn.model_type_arg_help}') parser.add_argument('--tile_id_list', '-l', required=True, help='List of tile ids to use in the model. Should be of form 00N_110E or 00N_110E,00N_120E or all.') args = parser.parse_args() @@ -205,7 +205,7 @@ def mp_tile_statistics(sensit_type, tile_id_list): tile_id_list = args.tile_id_list # Create the output log - uu.initiate_log(sensit_type=sensit_type, tile_id_list=tile_id_list) + uu.initiate_log(tile_id_list) # Checks whether the sensitivity analysis and tile_id_list arguments are valid uu.check_sensit_type(sensit_type) diff --git a/analyses/net_flux.py b/analyses/net_flux.py index 409c1f74..5d8e8cef 100644 --- a/analyses/net_flux.py +++ b/analyses/net_flux.py @@ -1,15 +1,24 @@ -### Calculates the net emissions over the study period, with units of Mg CO2/ha on a pixel-by-pixel basis +""" +Function to create net flux tiles +""" -import os import datetime import numpy as np import rasterio import sys + sys.path.append('../') import constants_and_names as cn import universal_util as uu -def net_calc(tile_id, pattern, sensit_type, no_upload): +def net_calc(tile_id, pattern): + """ + Creates net GHG flux tile set + :param tile_id: tile to be processed, identified by its tile id + :param pattern: pattern for output tile names + :return: 1 tile with net GHG flux (gross emissions minus gross removals). + Units: Mg CO2e/ha over the model period + """ uu.print_log("Calculating net flux for", tile_id) @@ -17,11 +26,11 @@ def net_calc(tile_id, pattern, sensit_type, no_upload): start = datetime.datetime.now() # Names of the removals and emissions tiles - removals_in = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_cumul_gain_AGCO2_BGCO2_all_types) - emissions_in = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_gross_emis_all_gases_all_drivers_biomass_soil) + removals_in = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_cumul_gain_AGCO2_BGCO2_all_types) + emissions_in = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_gross_emis_all_gases_all_drivers_biomass_soil) # Output net emissions file - net_flux = '{0}_{1}.tif'.format(tile_id, pattern) + net_flux = f'{tile_id}_{pattern}.tif' try: removals_src = rasterio.open(removals_in) @@ -29,9 +38,9 @@ def net_calc(tile_id, pattern, sensit_type, no_upload): kwargs = removals_src.meta # Grabs the windows of the tile (stripes) so we can iterate over the entire tif without running out of memory windows = removals_src.block_windows(1) - uu.print_log(" Gross removals tile found for {}".format(removals_in)) - except: - uu.print_log(" No gross removals tile found for {}".format(removals_in)) + uu.print_log(f' Gross removals tile found for {removals_in}') + except rasterio.errors.RasterioIOError: + uu.print_log(f' No gross removals tile found for {removals_in}') try: emissions_src = rasterio.open(emissions_in) @@ -39,9 +48,9 @@ def net_calc(tile_id, pattern, sensit_type, no_upload): kwargs = emissions_src.meta # Grabs the windows of the tile (stripes) so we can iterate over the entire tif without running out of memory windows = emissions_src.block_windows(1) - uu.print_log(" Gross emissions tile found for {}".format(emissions_in)) - except: - uu.print_log(" No gross emissions tile found for {}".format(emissions_in)) + uu.print_log(f' Gross emissions tile found for {emissions_in}') + except rasterio.errors.RasterioIOError: + uu.print_log(f' No gross emissions tile found for {emissions_in}') # Skips the tile if there is neither a gross emissions nor a gross removals tile. # This should only occur for biomass_swap sensitivity analysis, which gets its net flux tile list from @@ -55,17 +64,17 @@ def net_calc(tile_id, pattern, sensit_type, no_upload): nodata=0, dtype='float32' ) - except: - uu.print_log("No gross emissions or gross removals for {}. Skipping tile.".format(tile_id)) + except rasterio.errors.RasterioIOError: + uu.print_log(f'No gross emissions or gross removals for {tile_id}. Skipping tile.') return # Opens the output tile, giving it the arguments of the input tiles net_flux_dst = rasterio.open(net_flux, 'w', **kwargs) # Adds metadata tags to the output raster - uu.add_rasterio_tags(net_flux_dst, sensit_type) + uu.add_universal_metadata_rasterio(net_flux_dst) net_flux_dst.update_tags( - units='Mg CO2e/ha over model duration (2001-20{})'.format(cn.loss_years)) + units=f'Mg CO2e/ha over model duration (2001-20{cn.loss_years})') net_flux_dst.update_tags( source='Gross emissions - gross removals') net_flux_dst.update_tags( @@ -81,11 +90,11 @@ def net_calc(tile_id, pattern, sensit_type, no_upload): # Creates windows for each input tile try: removals_window = removals_src.read(1, window=window).astype('float32') - except: + except UnboundLocalError: removals_window = np.zeros((window.height, window.width)).astype('float32') try: emissions_window = emissions_src.read(1, window=window).astype('float32') - except: + except UnboundLocalError: emissions_window = np.zeros((window.height, window.width)).astype('float32') # Subtracts removals from emissions to calculate net flux (negative is net sink, positive is net source) @@ -94,4 +103,4 @@ def net_calc(tile_id, pattern, sensit_type, no_upload): net_flux_dst.write_band(1, dst_data, window=window) # Prints information about the tile that was just processed - uu.end_of_fx_summary(start, tile_id, pattern, no_upload) \ No newline at end of file + uu.end_of_fx_summary(start, tile_id, pattern) diff --git a/burn_date/hansen_burnyear_final.py b/burn_date/hansen_burnyear_final.py index 77383987..384b797e 100644 --- a/burn_date/hansen_burnyear_final.py +++ b/burn_date/hansen_burnyear_final.py @@ -141,7 +141,7 @@ def hansen_burnyear(tile_id, no_upload): out_tile_tagged = rasterio.open(out_tile, 'w', **kwargs) # Adds metadata tags to the output raster - uu.add_rasterio_tags(out_tile_tagged, 'std') + uu.add_universal_metadata_rasterio(out_tile_tagged) out_tile_tagged.update_tags( units='year (2001, 2002, 2003...)') out_tile_tagged.update_tags( diff --git a/burn_date/mp_burn_year.py b/burn_date/mp_burn_year.py index 6149bceb..96a9a552 100644 --- a/burn_date/mp_burn_year.py +++ b/burn_date/mp_burn_year.py @@ -51,7 +51,7 @@ def mp_burn_year(tile_id_list, run_date = None, no_upload = None): tile_id_list = uu.tile_list_s3(cn.pixel_area_dir) uu.print_log(tile_id_list) - uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n") + uu.print_log(f'There are {str(len(tile_id_list))} tiles to process', "\n") # List of output directories and output file name patterns output_dir_list = [cn.burn_year_dir] @@ -263,10 +263,10 @@ def mp_burn_year(tile_id_list, run_date = None, no_upload = None): args = parser.parse_args() tile_id_list = args.tile_id_list run_date = args.run_date - no_upload = args.no_upload + no_upload = args.NO_UPLOAD # Create the output log - uu.initiate_log(tile_id_list=tile_id_list, sensit_type='std', run_date=run_date, no_upload=no_upload) + uu.initiate_log(tile_id_list) # Checks whether the tile_id_list argument is valid tile_id_list = uu.tile_id_list_check(tile_id_list) diff --git a/carbon_pools/create_carbon_pools.py b/carbon_pools/create_carbon_pools.py index bd2435c9..eaee24b6 100644 --- a/carbon_pools/create_carbon_pools.py +++ b/carbon_pools/create_carbon_pools.py @@ -1,16 +1,26 @@ +"""Functions to create carbon pools (Mg C/ha)""" + import datetime -import sys -import pandas as pd import os -import numpy as np import rasterio +import sys +import numpy as np +import pandas as pd + sys.path.append('../') import constants_and_names as cn import universal_util as uu -# Creates a dictionary of biomass in belowground, deadwood, and litter emitted_pools to aboveground biomass pool def mangrove_pool_ratio_dict(gain_table_simplified, tropical_dry, tropical_wet, subtropical): + """ + Creates a dictionary of biomass in belowground, deadwood, and litter emitted_pools to aboveground biomass pool + :param gain_table_simplified: Table of removal factors for mangroves + :param tropical_dry: Belowground:aboveground biomass ratio for tropical dry mangroves + :param tropical_wet: Belowground:aboveground biomass ratio for tropical wet mangroves + :param subtropical: Belowground:aboveground biomass ratio for subtropical mangroves + :return: BGB:AGB ratio for mangroves + """ # Creates x_pool:aboveground biomass ratio dictionary for the three mangrove types, where the keys correspond to # the "mangType" field in the removals rate spreadsheet. @@ -38,39 +48,44 @@ def mangrove_pool_ratio_dict(gain_table_simplified, tropical_dry, tropical_wet, return mang_x_pool_AGB_ratio -# Creates aboveground carbon emitted_pools in 2000 and/or the year of loss (loss pixels only) -def create_AGC(tile_id, sensit_type, carbon_pool_extent, no_upload): +def create_AGC(tile_id, carbon_pool_extent): + """ + Creates aboveground carbon emitted_pools in 2000 and/or the year of loss (loss pixels only) + :param tile_id: tile to be processed, identified by its tile id + :param carbon_pool_extent: the pixels and years for which carbon pools are caculated: loss or 2000 + :return: Aboveground carbon density in the specified pixels for the specified years (Mg C/ha) + """ # Start time start = datetime.datetime.now() # Names of the input tiles. Creates the names even if the files don't exist. - removal_forest_type = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_removal_forest_type) - mangrove_biomass_2000 = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_mangrove_biomass_2000) - gain = uu.sensit_tile_rename(sensit_type, cn.pattern_gain, tile_id) - annual_gain_AGC = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_annual_gain_AGC_all_types) - cumul_gain_AGCO2 = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_cumul_gain_AGCO2_all_types) + removal_forest_type = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_removal_forest_type) + mangrove_biomass_2000 = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_mangrove_biomass_2000) + gain = uu.sensit_tile_rename(cn.SENSIT_TYPE, cn.pattern_gain, tile_id) + annual_gain_AGC = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_annual_gain_AGC_all_types) + cumul_gain_AGCO2 = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_cumul_gain_AGCO2_all_types) # Biomass tile name depends on the sensitivity analysis - if sensit_type == 'biomass_swap': - natrl_forest_biomass_2000 = '{0}_{1}.tif'.format(tile_id, cn.pattern_JPL_unmasked_processed) - uu.print_log("Using JPL biomass tile for {} sensitivity analysis".format(sensit_type)) + if cn.SENSIT_TYPE == 'biomass_swap': + natrl_forest_biomass_2000 = f'{tile_id}_{cn.pattern_JPL_unmasked_processed}.tif' + uu.print_log(f'Using JPL biomass tile for {cn.SENSIT_TYPE} sensitivity analysis') else: - natrl_forest_biomass_2000 = '{0}_{1}.tif'.format(tile_id, cn.pattern_WHRC_biomass_2000_unmasked) - uu.print_log("Using WHRC biomass tile for {} sensitivity analysis".format(sensit_type)) + natrl_forest_biomass_2000 = f'{tile_id}_{cn.pattern_WHRC_biomass_2000_unmasked}.tif' + uu.print_log(f'Using WHRC biomass tile for {cn.SENSIT_TYPE} sensitivity analysis') - uu.print_log(" Reading input files for {}...".format(tile_id)) + uu.print_log(f' Reading input files for {tile_id}...') # Loss tile name depends on the sensitivity analysis - if sensit_type == 'legal_Amazon_loss': - uu.print_log(" Brazil-specific loss tile found for {}".format(tile_id)) - loss_year = '{}_{}.tif'.format(tile_id, cn.pattern_Brazil_annual_loss_processed) - elif os.path.exists('{}_{}.tif'.format(tile_id, cn.pattern_Mekong_loss_processed)): - uu.print_log(" Mekong-specific loss tile found for {}".format(tile_id)) - loss_year = '{}_{}.tif'.format(tile_id, cn.pattern_Mekong_loss_processed) + if cn.SENSIT_TYPE == 'legal_Amazon_loss': + uu.print_log(f' Brazil-specific loss tile found for {tile_id}') + loss_year = f'{tile_id}_{cn.pattern_Brazil_annual_loss_processed}.tif' + elif os.path.exists(f'{tile_id}_{cn.pattern_Mekong_loss_processed}.tif'): + uu.print_log(f' Mekong-specific loss tile found for {tile_id}') + loss_year = f'{tile_id}_{cn.pattern_Mekong_loss_processed}.tif' else: - uu.print_log(" Hansen loss tile found for {}".format(tile_id)) - loss_year = '{0}_{1}.tif'.format(cn.pattern_loss, tile_id) + uu.print_log(f' Hansen loss tile found for {tile_id}') + loss_year = f'{cn.pattern_loss}_{tile_id}.tif' # This input is required to exist loss_year_src = rasterio.open(loss_year) @@ -78,39 +93,39 @@ def create_AGC(tile_id, sensit_type, carbon_pool_extent, no_upload): # Opens the input tiles if they exist try: annual_gain_AGC_src = rasterio.open(annual_gain_AGC) - uu.print_log(" Aboveground removal factor tile found for", tile_id) - except: - uu.print_log(" No aboveground removal factor tile for", tile_id) + uu.print_log(f' Aboveground removal factor tile found for {tile_id}') + except rasterio.errors.RasterioIOError: + uu.print_log(f' No aboveground removal factor tile for {tile_id}') try: cumul_gain_AGCO2_src = rasterio.open(cumul_gain_AGCO2) - uu.print_log(" Gross aboveground removal tile found for", tile_id) - except: - uu.print_log(" No gross aboveground removal tile for", tile_id) + uu.print_log(f' Gross aboveground removal tile found for {tile_id}') + except rasterio.errors.RasterioIOError: + uu.print_log(f' No gross aboveground removal tile for {tile_id}') try: mangrove_biomass_2000_src = rasterio.open(mangrove_biomass_2000) - uu.print_log(" Mangrove tile found for", tile_id) - except: - uu.print_log(" No mangrove tile for", tile_id) + uu.print_log(f' Mangrove tile found for {tile_id}') + except rasterio.errors.RasterioIOError: + uu.print_log(f' No mangrove tile for {tile_id}') try: natrl_forest_biomass_2000_src = rasterio.open(natrl_forest_biomass_2000) - uu.print_log(" Biomass found for", tile_id) - except: - uu.print_log(" No biomass found for", tile_id) + uu.print_log(f' Biomass found for {tile_id}') + except rasterio.errors.RasterioIOError: + uu.print_log(f' No biomass found for {tile_id}') try: gain_src = rasterio.open(gain) - uu.print_log(" Gain tile found for", tile_id) - except: - uu.print_log(" No gain tile found for", tile_id) + uu.print_log(f' Gain tile found for {tile_id}') + except rasterio.errors.RasterioIOError: + uu.print_log(f' No gain tile found for {tile_id}') try: removal_forest_type_src = rasterio.open(removal_forest_type) - uu.print_log(" Removal type tile found for", tile_id) - except: - uu.print_log(" No removal type tile found for", tile_id) + uu.print_log(f' Removal type tile found for {tile_id}') + except rasterio.errors.RasterioIOError: + uu.print_log(f' No removal type tile found for {tile_id}') # Grabs the windows of a tile to iterate over the entire tif without running out of memory @@ -132,12 +147,12 @@ def create_AGC(tile_id, sensit_type, carbon_pool_extent, no_upload): # The output files: aboveground carbon density in 2000 and in the year of loss. Creates names and rasters to write to. if '2000' in carbon_pool_extent: output_pattern_list = [cn.pattern_AGC_2000] - if sensit_type != 'std': - output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list) - AGC_2000 = '{0}_{1}.tif'.format(tile_id, output_pattern_list[0]) + if cn.SENSIT_TYPE != 'std': + output_pattern_list = uu.alter_patterns(cn.SENSIT_TYPE, output_pattern_list) + AGC_2000 = f'{tile_id}_{output_pattern_list[0]}.tif' dst_AGC_2000 = rasterio.open(AGC_2000, 'w', **kwargs) # Adds metadata tags to the output raster - uu.add_rasterio_tags(dst_AGC_2000, sensit_type) + uu.add_universal_metadata_rasterio(dst_AGC_2000) dst_AGC_2000.update_tags( units='megagrams aboveground carbon (AGC)/ha') dst_AGC_2000.update_tags( @@ -146,12 +161,12 @@ def create_AGC(tile_id, sensit_type, carbon_pool_extent, no_upload): extent='aboveground biomass in 2000 (WHRC if standard model, JPL if biomass_swap sensitivity analysis) and mangrove AGB. Mangrove AGB has precedence.') if 'loss' in carbon_pool_extent: output_pattern_list = [cn.pattern_AGC_emis_year] - if sensit_type != 'std': - output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list) - AGC_emis_year = '{0}_{1}.tif'.format(tile_id, output_pattern_list[0]) + if cn.SENSIT_TYPE != 'std': + output_pattern_list = uu.alter_patterns(cn.SENSIT_TYPE, output_pattern_list) + AGC_emis_year = f'{tile_id}_{output_pattern_list[0]}.tif' dst_AGC_emis_year = rasterio.open(AGC_emis_year, 'w', **kwargs) # Adds metadata tags to the output raster - uu.add_rasterio_tags(dst_AGC_emis_year, sensit_type) + uu.add_universal_metadata_rasterio(dst_AGC_emis_year) dst_AGC_emis_year.update_tags( units='megagrams aboveground carbon (AGC)/ha') dst_AGC_emis_year.update_tags( @@ -160,7 +175,7 @@ def create_AGC(tile_id, sensit_type, carbon_pool_extent, no_upload): extent='tree cover loss pixels within model extent') - uu.print_log(" Creating aboveground carbon density for {0} using carbon_pool_extent '{1}'...".format(tile_id, carbon_pool_extent)) + uu.print_log(f' Creating aboveground carbon density for {tile_id} using carbon_pool_extent {carbon_pool_extent}') uu.check_memory() @@ -171,27 +186,27 @@ def create_AGC(tile_id, sensit_type, carbon_pool_extent, no_upload): loss_year_window = loss_year_src.read(1, window=window) try: annual_gain_AGC_window = annual_gain_AGC_src.read(1, window=window) - except: + except UnboundLocalError: annual_gain_AGC_window = np.zeros((window.height, window.width), dtype='float32') try: cumul_gain_AGCO2_window = cumul_gain_AGCO2_src.read(1, window=window) - except: + except UnboundLocalError: cumul_gain_AGCO2_window = np.zeros((window.height, window.width), dtype='float32') try: removal_forest_type_window = removal_forest_type_src.read(1, window=window) - except: + except UnboundLocalError: removal_forest_type_window = np.zeros((window.height, window.width), dtype='uint8') try: gain_window = gain_src.read(1, window=window) - except: + except UnboundLocalError: gain_window = np.zeros((window.height, window.width), dtype='uint8') try: mangrove_biomass_2000_window = mangrove_biomass_2000_src.read(1, window=window) - except: + except UnboundLocalError: mangrove_biomass_2000_window = np.zeros((window.height, window.width), dtype='uint8') try: natrl_forest_biomass_2000_window = natrl_forest_biomass_2000_src.read(1, window=window) - except: + except UnboundLocalError: natrl_forest_biomass_2000_window = np.zeros((window.height, window.width), dtype='uint8') @@ -254,34 +269,40 @@ def create_AGC(tile_id, sensit_type, carbon_pool_extent, no_upload): # Prints information about the tile that was just processed if 'loss' in carbon_pool_extent: - uu.end_of_fx_summary(start, tile_id, cn.pattern_AGC_emis_year, no_upload) + uu.end_of_fx_summary(start, tile_id, cn.pattern_AGC_emis_year) else: - uu.end_of_fx_summary(start, tile_id, cn.pattern_AGC_2000, no_upload) + uu.end_of_fx_summary(start, tile_id, cn.pattern_AGC_2000) -# Creates belowground carbon tiles (both in 2000 and loss year) -def create_BGC(tile_id, mang_BGB_AGB_ratio, carbon_pool_extent, sensit_type, no_upload): +def create_BGC(tile_id, mang_BGB_AGB_ratio, carbon_pool_extent): + """ + Creates belowground carbon tiles (both in 2000 and loss year) + :param tile_id: tile to be processed, identified by its tile id + :param mang_BGB_AGB_ratio: BGB:AGB ratio for mangroves + :param carbon_pool_extent: carbon_pool_extent: the pixels and years for which carbon pools are caculated: loss or 2000 + :return: Belowground carbon density in the specified pixels for the specified years (Mg C/ha) + """ start = datetime.datetime.now() # Names of the input tiles - removal_forest_type = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_removal_forest_type) - cont_ecozone = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_cont_eco_processed) + removal_forest_type = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_removal_forest_type) + cont_ecozone = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_cont_eco_processed) # For BGC 2000, opens AGC, names the output tile, creates the output tile if '2000' in carbon_pool_extent: - AGC_2000 = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_AGC_2000) + AGC_2000 = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_AGC_2000) AGC_2000_src = rasterio.open(AGC_2000) kwargs = AGC_2000_src.meta kwargs.update(driver='GTiff', count=1, compress='DEFLATE', nodata=0) windows = AGC_2000_src.block_windows(1) output_pattern_list = [cn.pattern_BGC_2000] - if sensit_type != 'std': - output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list) - BGC_2000 = '{0}_{1}.tif'.format(tile_id, output_pattern_list[0]) + if cn.SENSIT_TYPE != 'std': + output_pattern_list = uu.alter_patterns(cn.SENSIT_TYPE, output_pattern_list) + BGC_2000 = f'{tile_id}_{output_pattern_list[0]}.tif' dst_BGC_2000 = rasterio.open(BGC_2000, 'w', **kwargs) # Adds metadata tags to the output raster - uu.add_rasterio_tags(dst_BGC_2000, sensit_type) + uu.add_universal_metadata_rasterio(dst_BGC_2000) dst_BGC_2000.update_tags( units='megagrams belowground carbon (BGC)/ha') dst_BGC_2000.update_tags( @@ -291,18 +312,18 @@ def create_BGC(tile_id, mang_BGB_AGB_ratio, carbon_pool_extent, sensit_type, no_ # For BGC in emissions year, opens AGC, names the output tile, creates the output tile if 'loss' in carbon_pool_extent: - AGC_emis_year = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_AGC_emis_year) + AGC_emis_year = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_AGC_emis_year) AGC_emis_year_src = rasterio.open(AGC_emis_year) kwargs = AGC_emis_year_src.meta kwargs.update(driver='GTiff', count=1, compress='DEFLATE', nodata=0) windows = AGC_emis_year_src.block_windows(1) output_pattern_list = [cn.pattern_BGC_emis_year] - if sensit_type != 'std': - output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list) - BGC_emis_year = '{0}_{1}.tif'.format(tile_id, output_pattern_list[0]) + if cn.SENSIT_TYPE != 'std': + output_pattern_list = uu.alter_patterns(cn.SENSIT_TYPE, output_pattern_list) + BGC_emis_year = f'{tile_id}_{output_pattern_list[0]}.tif' dst_BGC_emis_year = rasterio.open(BGC_emis_year, 'w', **kwargs) # Adds metadata tags to the output raster - uu.add_rasterio_tags(dst_BGC_emis_year, sensit_type) + uu.add_universal_metadata_rasterio(dst_BGC_emis_year) dst_BGC_emis_year.update_tags( units='megagrams belowground carbon (BGC)/ha') dst_BGC_emis_year.update_tags( @@ -311,22 +332,22 @@ def create_BGC(tile_id, mang_BGB_AGB_ratio, carbon_pool_extent, sensit_type, no_ extent='tree cover loss pixels within model extent') - uu.print_log(" Reading input files for {}...".format(tile_id)) + uu.print_log(f' Reading input files for {tile_id}') # Opens inputs that are used regardless of whether calculating BGC2000 or BGC in emissions year try: cont_ecozone_src = rasterio.open(cont_ecozone) - uu.print_log(" Continent-ecozone tile found for", tile_id) - except: - uu.print_log(" No Continent-ecozone tile found for", tile_id) + uu.print_log(f' Continent-ecozone tile found for {tile_id}') + except rasterio.errors.RasterioIOError: + uu.print_log(f' No Continent-ecozone tile found for {tile_id}') try: removal_forest_type_src = rasterio.open(removal_forest_type) - uu.print_log(" Removal forest type tile found for", tile_id) - except: - uu.print_log(" No Removal forest type tile found for", tile_id) + uu.print_log(f' Removal forest type tile found for {tile_id}') + except rasterio.errors.RasterioIOError: + uu.print_log(f' No Removal forest type tile found for {tile_id}') - uu.print_log(" Creating belowground carbon density for {0} using carbon_pool_extent '{1}'...".format(tile_id, carbon_pool_extent)) + uu.print_log(f' Creating belowground carbon density for {tile_id} using carbon_pool_extent {carbon_pool_extent}') uu.check_memory() @@ -336,12 +357,12 @@ def create_BGC(tile_id, mang_BGB_AGB_ratio, carbon_pool_extent, sensit_type, no_ # Creates windows from inputs that are used regardless of whether calculating BGC2000 or BGC in emissions year try: cont_ecozone_window = cont_ecozone_src.read(1, window=window).astype('float32') - except: + except UnboundLocalError: cont_ecozone_window = np.zeros((window.height, window.width), dtype='float32') try: removal_forest_type_window = removal_forest_type_src.read(1, window=window) - except: + except UnboundLocalError: removal_forest_type_window = np.zeros((window.height, window.width)) # Applies the mangrove BGB:AGB ratios (3 different ratios) to the ecozone raster to create a raster of BGB:AGB ratios @@ -374,45 +395,52 @@ def create_BGC(tile_id, mang_BGB_AGB_ratio, carbon_pool_extent, sensit_type, no_ # Prints information about the tile that was just processed if 'loss' in carbon_pool_extent: - uu.end_of_fx_summary(start, tile_id, cn.pattern_BGC_emis_year, no_upload) + uu.end_of_fx_summary(start, tile_id, cn.pattern_BGC_emis_year) else: - uu.end_of_fx_summary(start, tile_id, cn.pattern_BGC_2000, no_upload) + uu.end_of_fx_summary(start, tile_id, cn.pattern_BGC_2000) -# Creates deadwood and litter carbon tiles (in 2000 and/or in loss year) -def create_deadwood_litter(tile_id, mang_deadwood_AGB_ratio, mang_litter_AGB_ratio, carbon_pool_extent, sensit_type, no_upload): +def create_deadwood_litter(tile_id, mang_deadwood_AGB_ratio, mang_litter_AGB_ratio, carbon_pool_extent): + """ + Creates deadwood and litter carbon tiles (in 2000 and/or in loss year) + :param tile_id: tile to be processed, identified by its tile id + :param mang_deadwood_AGB_ratio: ratio of deadwood carbon to aboveground carbon for mangroves + :param mang_litter_AGB_ratio: ratio of litter carbon to aboveground carbon for mangroves + :param carbon_pool_extent: the pixels and years for which carbon pools are caculated: loss or 2000 + :return: Deadwood and litter carbon density tiles in the specified pixels for the specified years (Mg C/ha) + """ start = datetime.datetime.now() # Names of the input tiles. Creates the names even if the files don't exist. - mangrove_biomass_2000 = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_mangrove_biomass_2000) - bor_tem_trop = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_bor_tem_trop_processed) - cont_eco = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_cont_eco_processed) - precip = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_precip) - elevation = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_elevation) - if sensit_type == 'biomass_swap': - natrl_forest_biomass_2000 = '{0}_{1}.tif'.format(tile_id, cn.pattern_JPL_unmasked_processed) - uu.print_log("Using JPL biomass tile for {} sensitivity analysis".format(sensit_type)) + mangrove_biomass_2000 = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_mangrove_biomass_2000) + bor_tem_trop = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_bor_tem_trop_processed) + cont_eco = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_cont_eco_processed) + precip = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_precip) + elevation = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_elevation) + if cn.SENSIT_TYPE == 'biomass_swap': + natrl_forest_biomass_2000 = f'{tile_id}_{cn.pattern_JPL_unmasked_processed}.tif' + uu.print_log(f'Using JPL biomass tile for {cn.SENSIT_TYPE} sensitivity analysis') else: - natrl_forest_biomass_2000 = '{0}_{1}.tif'.format(tile_id, cn.pattern_WHRC_biomass_2000_unmasked) - uu.print_log("Using WHRC biomass tile for {} sensitivity analysis".format(sensit_type)) + natrl_forest_biomass_2000 = f'{tile_id}_{cn.pattern_WHRC_biomass_2000_unmasked}.tif' + uu.print_log(f'Using WHRC biomass tile for {cn.SENSIT_TYPE} sensitivity analysis') # For deadwood and litter 2000, opens AGC, names the output tiles, creates the output tiles if '2000' in carbon_pool_extent: - AGC_2000 = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_AGC_2000) + AGC_2000 = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_AGC_2000) AGC_2000_src = rasterio.open(AGC_2000) kwargs = AGC_2000_src.meta kwargs.update(driver='GTiff', count=1, compress='DEFLATE', nodata=0) windows = AGC_2000_src.block_windows(1) output_pattern_list = [cn.pattern_deadwood_2000, cn.pattern_litter_2000] - if sensit_type != 'std': - output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list) - deadwood_2000 = '{0}_{1}.tif'.format(tile_id, output_pattern_list[0]) - litter_2000 = '{0}_{1}.tif'.format(tile_id, output_pattern_list[1]) + if cn.SENSIT_TYPE != 'std': + output_pattern_list = uu.alter_patterns(cn.SENSIT_TYPE, output_pattern_list) + deadwood_2000 = f'{tile_id}_{output_pattern_list[0]}.tif' + litter_2000 = f'{tile_id}_{output_pattern_list[1]}.tif' dst_deadwood_2000 = rasterio.open(deadwood_2000, 'w', **kwargs) dst_litter_2000 = rasterio.open(litter_2000, 'w', **kwargs) # Adds metadata tags to the output raster - uu.add_rasterio_tags(dst_deadwood_2000, sensit_type) + uu.add_universal_metadata_rasterio(dst_deadwood_2000) dst_deadwood_2000.update_tags( units='megagrams deadwood carbon/ha') dst_deadwood_2000.update_tags( @@ -420,7 +448,7 @@ def create_deadwood_litter(tile_id, mang_deadwood_AGB_ratio, mang_litter_AGB_rat dst_deadwood_2000.update_tags( extent='aboveground biomass in 2000 (WHRC if standard model, JPL if biomass_swap sensitivity analysis) and mangrove AGB. Mangrove AGB has precedence.') # Adds metadata tags to the output raster - uu.add_rasterio_tags(dst_litter_2000, sensit_type) + uu.add_universal_metadata_rasterio(dst_litter_2000) dst_litter_2000.update_tags( units='megagrams litter carbon/ha') dst_litter_2000.update_tags( @@ -430,21 +458,21 @@ def create_deadwood_litter(tile_id, mang_deadwood_AGB_ratio, mang_litter_AGB_rat # For deadwood and litter in emissions year, opens AGC, names the output tiles, creates the output tiles if 'loss' in carbon_pool_extent: - AGC_emis_year = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_AGC_emis_year) + AGC_emis_year = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_AGC_emis_year) AGC_emis_year_src = rasterio.open(AGC_emis_year) kwargs = AGC_emis_year_src.meta kwargs.update(driver='GTiff', count=1, compress='DEFLATE', nodata=0) windows = AGC_emis_year_src.block_windows(1) output_pattern_list = [cn.pattern_deadwood_emis_year_2000, cn.pattern_litter_emis_year_2000] - if sensit_type != 'std': - output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list) - deadwood_emis_year = '{0}_{1}.tif'.format(tile_id, output_pattern_list[0]) - litter_emis_year = '{0}_{1}.tif'.format(tile_id, output_pattern_list[1]) + if cn.SENSIT_TYPE != 'std': + output_pattern_list = uu.alter_patterns(cn.SENSIT_TYPE, output_pattern_list) + deadwood_emis_year = f'{tile_id}_{output_pattern_list[0]}.tif' + litter_emis_year = f'{tile_id}_{output_pattern_list[1]}.tif' dst_deadwood_emis_year = rasterio.open(deadwood_emis_year, 'w', **kwargs) dst_litter_emis_year = rasterio.open(litter_emis_year, 'w', **kwargs) # Adds metadata tags to the output raster - uu.add_rasterio_tags(dst_deadwood_emis_year, sensit_type) + uu.add_universal_metadata_rasterio(dst_deadwood_emis_year) dst_deadwood_emis_year.update_tags( units='megagrams deadwood carbon/ha') dst_deadwood_emis_year.update_tags( @@ -452,7 +480,7 @@ def create_deadwood_litter(tile_id, mang_deadwood_AGB_ratio, mang_litter_AGB_rat dst_deadwood_emis_year.update_tags( extent='tree cover loss pixels within model extent') # Adds metadata tags to the output raster - uu.add_rasterio_tags(dst_litter_emis_year, sensit_type) + uu.add_universal_metadata_rasterio(dst_litter_emis_year) dst_litter_emis_year.update_tags( units='megagrams litter carbon/ha') dst_litter_emis_year.update_tags( @@ -460,49 +488,49 @@ def create_deadwood_litter(tile_id, mang_deadwood_AGB_ratio, mang_litter_AGB_rat dst_litter_emis_year.update_tags( extent='tree cover loss pixels within model extent') - uu.print_log(" Reading input files for {}...".format(tile_id)) + uu.print_log(f' Reading input files for {tile_id}') try: precip_src = rasterio.open(precip) - uu.print_log(" Precipitation tile found for", tile_id) - except: - uu.print_log(" No precipitation tile biomass for", tile_id) + uu.print_log(f' Precipitation tile found for {tile_id}') + except rasterio.errors.RasterioIOError: + uu.print_log(f' No precipitation tile biomass for {tile_id}') try: elevation_src = rasterio.open(elevation) - uu.print_log(" Elevation tile found for", tile_id) - except: - uu.print_log(" No elevation tile biomass for", tile_id) + uu.print_log(f' Elevation tile found for {tile_id}') + except rasterio.errors.RasterioIOError: + uu.print_log(f' No elevation tile biomass for {tile_id}') # Opens the mangrove biomass tile if it exists try: bor_tem_trop_src = rasterio.open(bor_tem_trop) - uu.print_log(" Boreal/temperate/tropical tile found for", tile_id) - except: - uu.print_log(" No boreal/temperate/tropical tile biomass for", tile_id) + uu.print_log(f' Boreal/temperate/tropical tile found for {tile_id}') + except rasterio.errors.RasterioIOError: + uu.print_log(f' No boreal/temperate/tropical tile biomass for {tile_id}') # Opens the mangrove biomass tile if it exists try: mangrove_biomass_2000_src = rasterio.open(mangrove_biomass_2000) - uu.print_log(" Mangrove biomass found for", tile_id) - except: - uu.print_log(" No mangrove biomass for", tile_id) + uu.print_log(f' Mangrove biomass found for {tile_id}') + except rasterio.errors.RasterioIOError: + uu.print_log(f' No mangrove biomass for {tile_id}') # Opens the WHRC/JPL biomass tile if it exists try: natrl_forest_biomass_2000_src = rasterio.open(natrl_forest_biomass_2000) - uu.print_log(" Biomass found for", tile_id) - except: - uu.print_log(" No biomass for", tile_id) + uu.print_log(f' Biomass found for {tile_id}') + except rasterio.errors.RasterioIOError: + uu.print_log(f' No biomass for {tile_id}') # Opens the continent-ecozone tile if it exists try: cont_ecozone_src = rasterio.open(cont_eco) - uu.print_log(" Continent-ecozone tile found for", tile_id) - except: - uu.print_log(" No Continent-ecozone tile found for", tile_id) + uu.print_log(f' Continent-ecozone tile found for {tile_id}') + except rasterio.errors.RasterioIOError: + uu.print_log(f' No Continent-ecozone tile found for {tile_id}') - uu.print_log(" Creating deadwood and litter carbon density for {0} using carbon_pool_extent '{1}'...".format(tile_id, carbon_pool_extent)) + uu.print_log(f' Creating deadwood and litter carbon density for {tile_id} using carbon_pool_extent {carbon_pool_extent}') uu.check_memory() @@ -521,27 +549,27 @@ def create_deadwood_litter(tile_id, mang_deadwood_AGB_ratio, mang_litter_AGB_rat # # clipping to AGC2000; I'm doing that just as a formality. It feels more complete. # try: # AGC_2000_window = AGC_2000_src.read(1, window=window) - # except: + # except UnboundLocalError: # AGC_2000_window = np.zeros((window.height, window.width), dtype='float32') try: AGC_emis_year_window = AGC_emis_year_src.read(1, window=window) - except: + except UnboundLocalError: AGC_emis_year_window = np.zeros((window.height, window.width), dtype='float32') try: cont_ecozone_window = cont_ecozone_src.read(1, window=window).astype('float32') - except: + except UnboundLocalError: cont_ecozone_window = np.zeros((window.height, window.width), dtype='float32') try: bor_tem_trop_window = bor_tem_trop_src.read(1, window=window) - except: + except UnboundLocalError: bor_tem_trop_window = np.zeros((window.height, window.width)) try: precip_window = precip_src.read(1, window=window) - except: + except UnboundLocalError: precip_window = np.zeros((window.height, window.width)) try: elevation_window = elevation_src.read(1, window=window) - except: + except UnboundLocalError: elevation_window = np.zeros((window.height, window.width)) # This allows the script to bypass the few tiles that have mangrove biomass but not WHRC biomass @@ -641,7 +669,7 @@ def create_deadwood_litter(tile_id, mang_deadwood_AGB_ratio, mang_litter_AGB_rat # Same as above but for litter try: cont_ecozone_window = cont_ecozone_src.read(1, window=window).astype('float32') - except: + except UnboundLocalError: cont_ecozone_window = np.zeros((window.height, window.width), dtype='float32') # Applies the mangrove deadwood:AGB ratios (2 different ratios) to the ecozone raster to create a raster of deadwood:AGB ratios @@ -681,29 +709,34 @@ def create_deadwood_litter(tile_id, mang_deadwood_AGB_ratio, mang_litter_AGB_rat # Prints information about the tile that was just processed if 'loss' in carbon_pool_extent: - uu.end_of_fx_summary(start, tile_id, cn.pattern_deadwood_emis_year_2000, no_upload) + uu.end_of_fx_summary(start, tile_id, cn.pattern_deadwood_emis_year_2000) else: - uu.end_of_fx_summary(start, tile_id, cn.pattern_deadwood_2000, no_upload) + uu.end_of_fx_summary(start, tile_id, cn.pattern_deadwood_2000) -# Creates soil carbon tiles in loss pixels only -def create_soil_emis_extent(tile_id, pattern, sensit_type, no_upload): +def create_soil_emis_extent(tile_id, pattern): + """ + Creates soil carbon tiles in loss pixels only + :param tile_id: tile to be processed, identified by its tile id + :param pattern: tile pattern to be processed + :return: Soil organic carbon density tile in the specified pixels for the specified years (Mg C/ha) + """ start = datetime.datetime.now() # Names of the input tiles. Creates the names even if the files don't exist. - soil_full_extent = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_soil_C_full_extent_2000) - AGC_emis_year = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_AGC_emis_year) + soil_full_extent = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_soil_C_full_extent_2000) + AGC_emis_year = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_AGC_emis_year) if os.path.exists(soil_full_extent) & os.path.exists(AGC_emis_year): - uu.print_log("Soil C 2000 and loss found for {}. Proceeding with soil C in loss extent.".format(tile_id)) + uu.print_log(f'Soil C 2000 and loss found for {tile_id}. Proceeding with soil C in loss extent.') else: - return uu.print_log("Soil C 2000 and/or loss not found for {}. Skipping soil C in loss extent.".format(tile_id)) + return uu.print_log(f'Soil C 2000 and/or loss not found for {tile_id}. Skipping soil C in loss extent.') # Name of output tile - soil_emis_year = '{0}_{1}.tif'.format(tile_id, pattern) + soil_emis_year = f'{tile_id}_{pattern}.tif' - uu.print_log(" Reading input files for {}...".format(tile_id)) + uu.print_log(f' Reading input files for {tile_id}...') # Both of these tiles should exist and thus be able to be opened soil_full_extent_src = rasterio.open(soil_full_extent) @@ -728,7 +761,7 @@ def create_soil_emis_extent(tile_id, pattern, sensit_type, no_upload): dst_soil_emis_year = rasterio.open(soil_emis_year, 'w', **kwargs) # Adds metadata tags to the output raster - uu.add_rasterio_tags(dst_soil_emis_year, sensit_type) + uu.add_universal_metadata_rasterio(dst_soil_emis_year) dst_soil_emis_year.update_tags( units='megagrams soil carbon/ha') dst_soil_emis_year.update_tags( @@ -736,7 +769,7 @@ def create_soil_emis_extent(tile_id, pattern, sensit_type, no_upload): dst_soil_emis_year.update_tags( extent='tree cover loss pixels') - uu.print_log(" Creating soil carbon density for loss pixels in {}...".format(tile_id)) + uu.print_log(f' Creating soil carbon density for loss pixels in {tile_id}...') uu.check_memory() @@ -758,11 +791,16 @@ def create_soil_emis_extent(tile_id, pattern, sensit_type, no_upload): dst_soil_emis_year.write_band(1, soil_output, window=window) # Prints information about the tile that was just processed - uu.end_of_fx_summary(start, tile_id, pattern, no_upload) + uu.end_of_fx_summary(start, tile_id, pattern) -# Creates total carbon tiles (both in 2000 and loss year) -def create_total_C(tile_id, carbon_pool_extent, sensit_type, no_upload): +def create_total_C(tile_id, carbon_pool_extent): + """ + Creates total carbon tiles (both in 2000 and loss year) + :param tile_id: tile to be processed, identified by its tile id + :param carbon_pool_extent: the pixels and years for which carbon pools are caculated: loss or 2000 + :return: Total carbon density tile in the specified pixels for the specified years (Mg C/ha) + """ start = datetime.datetime.now() @@ -772,31 +810,31 @@ def create_total_C(tile_id, carbon_pool_extent, sensit_type, no_upload): # If litter in 2000 is being created, is uses the 2000 AGC tile. # The other inputs tiles aren't affected by whether the output is for 2000 or for the loss year. if '2000' in carbon_pool_extent: - AGC_2000 = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_AGC_2000) - BGC_2000 = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_BGC_2000) - deadwood_2000 = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_deadwood_2000) - litter_2000 = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_litter_2000) - soil_2000 = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_soil_C_full_extent_2000) + AGC_2000 = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_AGC_2000) + BGC_2000 = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_BGC_2000) + deadwood_2000 = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_deadwood_2000) + litter_2000 = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_litter_2000) + soil_2000 = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_soil_C_full_extent_2000) AGC_2000_src = rasterio.open(AGC_2000) BGC_2000_src = rasterio.open(BGC_2000) deadwood_2000_src = rasterio.open(deadwood_2000) litter_2000_src = rasterio.open(litter_2000) try: soil_2000_src = rasterio.open(soil_2000) - uu.print_log(" Soil C 2000 tile found for", tile_id) - except: - uu.print_log(" No soil C 2000 tile found for", tile_id) + uu.print_log(f' Soil C 2000 tile found for {tile_id}') + except rasterio.errors.RasterioIOError: + uu.print_log(f' No soil C 2000 tile found for {tile_id}') kwargs = AGC_2000_src.meta kwargs.update(driver='GTiff', count=1, compress='DEFLATE', nodata=0) windows = AGC_2000_src.block_windows(1) output_pattern_list = [cn.pattern_total_C_2000] - if sensit_type != 'std': - output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list) - total_C_2000 = '{0}_{1}.tif'.format(tile_id, output_pattern_list[0]) + if cn.SENSIT_TYPE != 'std': + output_pattern_list = uu.alter_patterns(cn.SENSIT_TYPE, output_pattern_list) + total_C_2000 = f'{tile_id}_{output_pattern_list[0]}.tif' dst_total_C_2000 = rasterio.open(total_C_2000, 'w', **kwargs) # Adds metadata tags to the output raster - uu.add_rasterio_tags(dst_total_C_2000, sensit_type) + uu.add_universal_metadata_rasterio(dst_total_C_2000) dst_total_C_2000.update_tags( units='megagrams total (all emitted_pools) carbon/ha') dst_total_C_2000.update_tags( @@ -806,31 +844,31 @@ def create_total_C(tile_id, carbon_pool_extent, sensit_type, no_upload): if 'loss' in carbon_pool_extent: - AGC_emis_year = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_AGC_emis_year) - BGC_emis_year = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_BGC_emis_year) - deadwood_emis_year = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_deadwood_emis_year_2000) - litter_emis_year = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_litter_emis_year_2000) - soil_emis_year = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_soil_C_emis_year_2000) + AGC_emis_year = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_AGC_emis_year) + BGC_emis_year = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_BGC_emis_year) + deadwood_emis_year = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_deadwood_emis_year_2000) + litter_emis_year = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_litter_emis_year_2000) + soil_emis_year = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_soil_C_emis_year_2000) AGC_emis_year_src = rasterio.open(AGC_emis_year) BGC_emis_year_src = rasterio.open(BGC_emis_year) deadwood_emis_year_src = rasterio.open(deadwood_emis_year) litter_emis_year_src = rasterio.open(litter_emis_year) try: soil_emis_year_src = rasterio.open(soil_emis_year) - uu.print_log(" Soil C emission year tile found for", tile_id) - except: - uu.print_log(" No soil C emission year tile found for", tile_id) + uu.print_log(f' Soil C emission year tile found for {tile_id}') + except rasterio.errors.RasterioIOError: + uu.print_log(f' No soil C emission year tile found for {tile_id}') kwargs = AGC_emis_year_src.meta kwargs.update(driver='GTiff', count=1, compress='DEFLATE', nodata=0) windows = AGC_emis_year_src.block_windows(1) output_pattern_list = [cn.pattern_total_C_emis_year] - if sensit_type != 'std': - output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list) - total_C_emis_year = '{0}_{1}.tif'.format(tile_id, output_pattern_list[0]) + if cn.SENSIT_TYPE != 'std': + output_pattern_list = uu.alter_patterns(cn.SENSIT_TYPE, output_pattern_list) + total_C_emis_year = f'{tile_id}_{output_pattern_list[0]}.tif' dst_total_C_emis_year = rasterio.open(total_C_emis_year, 'w', **kwargs) # Adds metadata tags to the output raster - uu.add_rasterio_tags(dst_total_C_emis_year, sensit_type) + uu.add_universal_metadata_rasterio(dst_total_C_emis_year) dst_total_C_emis_year.update_tags( units='megagrams total (all emitted_pools) carbon/ha') dst_total_C_emis_year.update_tags( @@ -839,7 +877,7 @@ def create_total_C(tile_id, carbon_pool_extent, sensit_type, no_upload): extent='tree cover loss pixels within model extent') - uu.print_log(" Creating total carbon density for {0} using carbon_pool_extent '{1}'...".format(tile_id, carbon_pool_extent)) + uu.print_log(f' Creating total carbon density for {tile_id} using carbon_pool_extent {carbon_pool_extent}...') uu.check_memory() @@ -855,7 +893,7 @@ def create_total_C(tile_id, carbon_pool_extent, sensit_type, no_upload): litter_2000_window = litter_2000_src.read(1, window=window) try: soil_2000_window = soil_2000_src.read(1, window=window) - except: + except UnboundLocalError: soil_2000_window = np.zeros((window.height, window.width)) total_C_2000_window = AGC_2000_window + BGC_2000_window + deadwood_2000_window + litter_2000_window + soil_2000_window @@ -876,7 +914,7 @@ def create_total_C(tile_id, carbon_pool_extent, sensit_type, no_upload): litter_emis_year_window = litter_emis_year_src.read(1, window=window) try: soil_emis_year_window = soil_emis_year_src.read(1, window=window) - except: + except UnboundLocalError: soil_emis_year_window = np.zeros((window.height, window.width)) total_C_emis_year_window = AGC_emis_year_window + BGC_emis_year_window + deadwood_emis_year_window + litter_emis_year_window + soil_emis_year_window @@ -890,6 +928,6 @@ def create_total_C(tile_id, carbon_pool_extent, sensit_type, no_upload): # Prints information about the tile that was just processed if 'loss' in carbon_pool_extent: - uu.end_of_fx_summary(start, tile_id, cn.pattern_total_C_emis_year, no_upload) + uu.end_of_fx_summary(start, tile_id, cn.pattern_total_C_emis_year) else: - uu.end_of_fx_summary(start, tile_id, cn.pattern_total_C_2000, no_upload) + uu.end_of_fx_summary(start, tile_id, cn.pattern_total_C_2000) diff --git a/carbon_pools/mp_create_carbon_pools.py b/carbon_pools/mp_create_carbon_pools.py index e45d61c8..9481eb27 100644 --- a/carbon_pools/mp_create_carbon_pools.py +++ b/carbon_pools/mp_create_carbon_pools.py @@ -1,4 +1,4 @@ -''' +""" This script creates carbon pools in the year of loss (emitted-year carbon) and in 2000. For the year 2000, it creates aboveground, belowground, deadwood, litter, and total carbon emitted_pools (soil is created in a separate script but is brought in to create total carbon). All but total carbon are to the extent @@ -18,53 +18,58 @@ Which carbon emitted_pools are being generated (2000 and/or loss pixels) is controlled through the command line argument --carbon-pool-extent (-ce). This extent argument determines which AGC function is used and how the outputs of the other emitted_pools' scripts are named. Carbon emitted_pools in both 2000 and in the year of loss can be created in a single run by using '2000,loss' or 'loss,2000'. -''' -import multiprocessing -import pandas as pd -from subprocess import Popen, PIPE, STDOUT, check_call -import datetime -import glob -import os +python mp_create_carbon_pools.py -t std -l 00N_000E -d 20229999 -si -nu -ce loss +""" + import argparse from functools import partial +import glob +import multiprocessing +import os +import pandas as pd import sys + sys.path.append('../') import constants_and_names as cn import universal_util as uu sys.path.append(os.path.join(cn.docker_app,'carbon_pools')) import create_carbon_pools -def mp_create_carbon_pools(sensit_type, tile_id_list, carbon_pool_extent, run_date = None, no_upload = None, - save_intermediates = None): +def mp_create_carbon_pools(tile_id_list, carbon_pool_extent): + """ + :param tile_id_list: list of tile ids to process + :param carbon_pool_extent: the pixels and years for which carbon pools are caculated: loss or 2000 + :return: set of tiles with each carbon pool density (Mg/ha): aboveground, belowground, dead wood, litter, soil, total + """ os.chdir(cn.docker_base_dir) - if (sensit_type != 'std') & (carbon_pool_extent != 'loss'): - uu.exception_log(no_upload, "Sensitivity analysis run must use 'loss' extent") + if (cn.SENSIT_TYPE != 'std') & (carbon_pool_extent != 'loss'): + uu.exception_log("Sensitivity analysis run must use loss extent") # Checks the validity of the carbon_pool_extent argument if (carbon_pool_extent not in ['loss', '2000', 'loss,2000', '2000,loss']): - uu.exception_log(no_upload, "Invalid carbon_pool_extent input. Please choose loss, 2000, loss,2000 or 2000,loss.") - + uu.exception_log('Invalid carbon_pool_extent input. Please choose loss, 2000, loss,2000 or 2000,loss.') # If a full model run is specified, the correct set of tiles for the particular script is listed. # For runs generating carbon pools in emissions year, only tiles with model extent and loss are relevant # because there must be loss pixels for emissions-year carbon pools to exist. if (tile_id_list == 'all') & (carbon_pool_extent == 'loss'): # Lists the tiles that have both model extent and loss pixels, both being necessary precursors for emissions - model_extent_tile_id_list = uu.tile_list_s3(cn.model_extent_dir, sensit_type=sensit_type) - loss_tile_id_list = uu.tile_list_s3(cn.loss_dir, sensit_type=sensit_type) - uu.print_log("Carbon pool at emissions year is combination of model_extent and loss tiles:") + model_extent_tile_id_list = uu.tile_list_s3(cn.model_extent_dir, sensit_type=cn.SENSIT_TYPE) + loss_tile_id_list = uu.tile_list_s3(cn.loss_dir, sensit_type=cn.SENSIT_TYPE) + uu.print_log('Carbon pool at emissions year is combination of model_extent and loss tiles:') tile_id_list = list(set(model_extent_tile_id_list).intersection(loss_tile_id_list)) # For runs generating carbon pools in 2000, all model extent tiles are relevant. if (tile_id_list == 'all') & (carbon_pool_extent != 'loss'): - tile_id_list = uu.tile_list_s3(cn.model_extent_dir, sensit_type=sensit_type) + tile_id_list = uu.tile_list_s3(cn.model_extent_dir, sensit_type=cn.SENSIT_TYPE) uu.print_log(tile_id_list) - uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n") + uu.print_log(f'There are {str(len(tile_id_list))} tiles to process' + "\n") + output_dir_list = [] output_pattern_list = [] @@ -91,15 +96,15 @@ def mp_create_carbon_pools(sensit_type, tile_id_list, carbon_pool_extent, run_da } # Adds the correct AGB tiles to the download dictionary depending on the model run - if sensit_type == 'biomass_swap': + if cn.SENSIT_TYPE == 'biomass_swap': download_dict[cn.JPL_processed_dir] = [cn.pattern_JPL_unmasked_processed] else: download_dict[cn.WHRC_biomass_2000_unmasked_dir] = [cn.pattern_WHRC_biomass_2000_unmasked] # Adds the correct loss tile to the download dictionary depending on the model run - if sensit_type == 'legal_Amazon_loss': + if cn.SENSIT_TYPE == 'legal_Amazon_loss': download_dict[cn.Brazil_annual_loss_processed_dir] = [cn.pattern_Brazil_annual_loss_processed] - elif sensit_type == 'Mekong_loss': + elif cn.SENSIT_TYPE == 'Mekong_loss': download_dict[cn.Mekong_loss_processed_dir] = [cn.pattern_Mekong_loss_processed] else: download_dict[cn.loss_dir] = [cn.pattern_loss] @@ -129,15 +134,15 @@ def mp_create_carbon_pools(sensit_type, tile_id_list, carbon_pool_extent, run_da } # Adds the correct AGB tiles to the download dictionary depending on the model run - if sensit_type == 'biomass_swap': + if cn.SENSIT_TYPE == 'biomass_swap': download_dict[cn.JPL_processed_dir] = [cn.pattern_JPL_unmasked_processed] else: download_dict[cn.WHRC_biomass_2000_unmasked_dir] = [cn.pattern_WHRC_biomass_2000_unmasked] # Adds the correct loss tile to the download dictionary depending on the model run - if sensit_type == 'legal_Amazon_loss': + if cn.SENSIT_TYPE == 'legal_Amazon_loss': download_dict[cn.Brazil_annual_loss_processed_dir] = [cn.pattern_Brazil_annual_loss_processed] - elif sensit_type == 'Mekong_loss': + elif cn.SENSIT_TYPE == 'Mekong_loss': download_dict[cn.Mekong_loss_processed_dir] = [cn.pattern_Mekong_loss_processed] else: download_dict[cn.loss_dir] = [cn.pattern_loss] @@ -145,24 +150,24 @@ def mp_create_carbon_pools(sensit_type, tile_id_list, carbon_pool_extent, run_da # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list for key, values in download_dict.items(): - dir = key + directory = key pattern = values[0] - uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list) + uu.s3_flexible_download(directory, pattern, cn.docker_base_dir, cn.SENSIT_TYPE, tile_id_list) # If the model run isn't the standard one, the output directory and file names are changed - if sensit_type != 'std': - uu.print_log("Changing output directory and file name pattern based on sensitivity analysis") - output_dir_list = uu.alter_dirs(sensit_type, output_dir_list) - output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list) + if cn.SENSIT_TYPE != 'std': + uu.print_log('Changing output directory and file name pattern based on sensitivity analysis') + output_dir_list = uu.alter_dirs(cn.SENSIT_TYPE, output_dir_list) + output_pattern_list = uu.alter_patterns(cn.SENSIT_TYPE, output_pattern_list) else: - uu.print_log("Output directory list for standard model:", output_dir_list) + uu.print_log(f'Output directory list for standard model: {output_dir_list}') # A date can optionally be provided by the full model script or a run of this script. # This replaces the date in constants_and_names. # Only done if output upload is enabled. - if run_date is not None and no_upload is not None: - output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date) + if cn.RUN_DATE is not None and cn.NO_UPLOAD is not None: + output_dir_list = uu.replace_output_dir_date(output_dir_list, cn.RUN_DATE) # Table with IPCC Wetland Supplement Table 4.4 default mangrove removals rates # cmd = ['aws', 's3', 'cp', os.path.join(cn.gain_spreadsheet_dir, cn.gain_spreadsheet), cn.docker_base_dir, '--no-sign-request'] @@ -172,7 +177,7 @@ def mp_create_carbon_pools(sensit_type, tile_id_list, carbon_pool_extent, run_da pd.options.mode.chained_assignment = None # Imports the table with the ecozone-continent codes and the carbon removals rates - gain_table = pd.read_excel("{}".format(cn.gain_spreadsheet), + gain_table = pd.read_excel(f'{cn.gain_spreadsheet}', sheet_name="mangrove gain, for model") # Removes rows with duplicate codes (N. and S. America for the same ecozone) @@ -193,11 +198,11 @@ def mp_create_carbon_pools(sensit_type, tile_id_list, carbon_pool_extent, run_da cn.litter_to_above_trop_wet_mang, cn.litter_to_above_subtrop_mang) - uu.print_log("Creating tiles of aboveground carbon in {}".format(carbon_pool_extent)) + uu.print_log(f'Creating tiles of aboveground carbon in {carbon_pool_extent}') if cn.count == 96: # More processors can be used for loss carbon pools than for 2000 carbon pools if carbon_pool_extent == 'loss': - if sensit_type == 'biomass_swap': + if cn.SENSIT_TYPE == 'biomass_swap': processes = 16 # 16 processors = XXX GB peak else: processes = 20 # 25 processors > 750 GB peak; 16 = 560 GB peak; @@ -206,19 +211,19 @@ def mp_create_carbon_pools(sensit_type, tile_id_list, carbon_pool_extent, run_da processes = 15 # 12 processors = 490 GB peak (stops around 455, then increases slowly); 15 = XXX GB peak else: processes = 2 - uu.print_log('AGC loss year max processors=', processes) - pool = multiprocessing.Pool(processes) - pool.map(partial(create_carbon_pools.create_AGC, - sensit_type=sensit_type, carbon_pool_extent=carbon_pool_extent, no_upload=no_upload), tile_id_list) - pool.close() - pool.join() + uu.print_log(f'AGC loss year max processors={processes}') + with multiprocessing.Pool(processes) as pool: + pool.map(partial(create_carbon_pools.create_AGC, carbon_pool_extent=carbon_pool_extent), + tile_id_list) + pool.close() + pool.join() # # For single processor use # for tile_id in tile_id_list: - # create_carbon_pools.create_AGC(tile_id, sensit_type, carbon_pool_extent, no_upload) + # create_carbon_pools.create_AGC(tile_id, carbon_pool_extent) - # If no_upload flag is not activated (by choice or by lack of AWS credentials), output is uploaded - if not no_upload: + # If cn.NO_UPLOAD flag is not activated (by choice or by lack of AWS credentials), output is uploaded + if not cn.NO_UPLOAD: if carbon_pool_extent in ['loss', '2000']: uu.upload_final_set(output_dir_list[0], output_pattern_list[0]) @@ -228,25 +233,25 @@ def mp_create_carbon_pools(sensit_type, tile_id_list, carbon_pool_extent, run_da uu.check_storage() - if not save_intermediates: + if not cn.SAVE_INTERMEDIATES: - uu.print_log(":::::Freeing up memory for belowground carbon creation; deleting unneeded tiles") - tiles_to_delete = glob.glob('*{}*tif'.format(cn.pattern_annual_gain_AGC_all_types)) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_cumul_gain_AGCO2_all_types))) - uu.print_log(" Deleting", len(tiles_to_delete), "tiles...") + uu.print_log(':::::Freeing up memory for belowground carbon creation; deleting unneeded tiles') + tiles_to_delete = glob.glob(f'*{cn.pattern_annual_gain_AGC_all_types}*tif') + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_cumul_gain_AGCO2_all_types}*tif')) + uu.print_log(f' Deleting {len(tiles_to_delete)} tiles...') for tile_to_delete in tiles_to_delete: os.remove(tile_to_delete) - uu.print_log(":::::Deleted unneeded tiles") + uu.print_log(':::::Deleted unneeded tiles') uu.check_storage() - uu.print_log("Creating tiles of belowground carbon in {}".format(carbon_pool_extent)) + uu.print_log(f'Creating tiles of belowground carbon in {carbon_pool_extent}') # Creates a single filename pattern to pass to the multiprocessor call if cn.count == 96: # More processors can be used for loss carbon pools than for 2000 carbon pools if carbon_pool_extent == 'loss': - if sensit_type == 'biomass_swap': + if cn.SENSIT_TYPE == 'biomass_swap': processes = 30 # 30 processors = XXX GB peak else: processes = 39 # 20 processors = 370 GB peak; 32 = 590 GB peak; 36 = 670 GB peak; 38 = 690 GB peak; 39 = XXX GB peak @@ -254,20 +259,20 @@ def mp_create_carbon_pools(sensit_type, tile_id_list, carbon_pool_extent, run_da processes = 30 # 20 processors = 370 GB peak; 25 = 460 GB peak; 30 = XXX GB peak else: processes = 2 - uu.print_log('BGC max processors=', processes) - pool = multiprocessing.Pool(processes) - pool.map(partial(create_carbon_pools.create_BGC, mang_BGB_AGB_ratio=mang_BGB_AGB_ratio, - carbon_pool_extent=carbon_pool_extent, - sensit_type=sensit_type, no_upload=no_upload), tile_id_list) - pool.close() - pool.join() + uu.print_log(f'BGC max processors={processes}') + with multiprocessing.Pool(processes) as pool: + pool.map(partial(create_carbon_pools.create_BGC, mang_BGB_AGB_ratio=mang_BGB_AGB_ratio, + carbon_pool_extent=carbon_pool_extent), + tile_id_list) + pool.close() + pool.join() # # For single processor use # for tile_id in tile_id_list: - # create_carbon_pools.create_BGC(tile_id, mang_BGB_AGB_ratio, carbon_pool_extent, sensit_type, no_upload) + # create_carbon_pools.create_BGC(tile_id, mang_BGB_AGB_ratio, carbon_pool_extent) - # If no_upload flag is not activated (by choice or by lack of AWS credentials), output is uploaded - if not no_upload: + # If cn.NO_UPLOAD flag is not activated (by choice or by lack of AWS credentials), output is uploaded + if not cn.NO_UPLOAD: if carbon_pool_extent in ['loss', '2000']: uu.upload_final_set(output_dir_list[1], output_pattern_list[1]) @@ -282,26 +287,26 @@ def mp_create_carbon_pools(sensit_type, tile_id_list, carbon_pool_extent, run_da # Thus must delete AGC, BGC, and soil C 2000 for creation of deadwood and litter, then copy them back to spot machine # for total C 2000 calculation. if '2000' in carbon_pool_extent: - uu.print_log(":::::Freeing up memory for deadwood and litter carbon 2000 creation; deleting unneeded tiles") + uu.print_log(':::::Freeing up memory for deadwood and litter carbon 2000 creation; deleting unneeded tiles') tiles_to_delete = [] - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_BGC_2000))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_removal_forest_type))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_gain))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_soil_C_full_extent_2000))) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_BGC_2000}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_removal_forest_type}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_gain}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_soil_C_full_extent_2000}*tif')) - uu.print_log(" Deleting", len(tiles_to_delete), "tiles...") + uu.print_log(f' Deleting {len(tiles_to_delete)} tiles...') for tile_to_delete in tiles_to_delete: os.remove(tile_to_delete) - uu.print_log(":::::Deleted unneeded tiles") + uu.print_log(':::::Deleted unneeded tiles') uu.check_storage() - uu.print_log("Creating tiles of deadwood and litter carbon in {}".format(carbon_pool_extent)) + uu.print_log(f'Creating tiles of deadwood and litter carbon in {carbon_pool_extent}') if cn.count == 96: # More processors can be used for loss carbon pools than for 2000 carbon pools if carbon_pool_extent == 'loss': - if sensit_type == 'biomass_swap': + if cn.SENSIT_TYPE == 'biomass_swap': processes = 10 # 10 processors = XXX GB peak else: # 32 processors = >750 GB peak; 24 > 750 GB peak; 14 = 685 GB peak (stops around 600, then increases very very slowly); @@ -315,22 +320,21 @@ def mp_create_carbon_pools(sensit_type, tile_id_list, carbon_pool_extent, run_da processes = 16 # 7 processors = 320 GB peak; 14 = 620 GB peak; 16 = XXX GB peak else: processes = 2 - uu.print_log('Deadwood and litter max processors=', processes) - pool = multiprocessing.Pool(processes) - pool.map( - partial(create_carbon_pools.create_deadwood_litter, mang_deadwood_AGB_ratio=mang_deadwood_AGB_ratio, - mang_litter_AGB_ratio=mang_litter_AGB_ratio, - carbon_pool_extent=carbon_pool_extent, - sensit_type=sensit_type, no_upload=no_upload), tile_id_list) - pool.close() - pool.join() + uu.print_log(f'Deadwood and litter max processors={processes}') + with multiprocessing.Pool(processes) as pool: + pool.map(partial(create_carbon_pools.create_deadwood_litter, mang_deadwood_AGB_ratio=mang_deadwood_AGB_ratio, + mang_litter_AGB_ratio=mang_litter_AGB_ratio, + carbon_pool_extent=carbon_pool_extent), + tile_id_list) + pool.close() + pool.join() # # For single processor use # for tile_id in tile_id_list: - # create_carbon_pools.create_deadwood_litter(tile_id, mang_deadwood_AGB_ratio, mang_litter_AGB_ratio, carbon_pool_extent, sensit_type, no_upload) + # create_carbon_pools.create_deadwood_litter(tile_id, mang_deadwood_AGB_ratio, mang_litter_AGB_ratio, carbon_pool_extent) - # If no_upload flag is not activated (by choice or by lack of AWS credentials), output is uploaded - if not no_upload: + # If cn.NO_UPLOAD flag is not activated (by choice or by lack of AWS credentials), output is uploaded + if not cn.NO_UPLOAD: if carbon_pool_extent in ['loss', '2000']: uu.upload_final_set(output_dir_list[2], output_pattern_list[2]) # deadwood @@ -343,26 +347,26 @@ def mp_create_carbon_pools(sensit_type, tile_id_list, carbon_pool_extent, run_da uu.check_storage() - if not save_intermediates: + if not cn.SAVE_INTERMEDIATES: - uu.print_log(":::::Freeing up memory for soil and total carbon creation; deleting unneeded tiles") + uu.print_log(':::::Freeing up memory for soil and total carbon creation; deleting unneeded tiles') tiles_to_delete = [] - tiles_to_delete .extend(glob.glob('*{}*tif'.format(cn.pattern_elevation))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_precip))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_WHRC_biomass_2000_unmasked))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_JPL_unmasked_processed))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_cont_eco_processed))) - uu.print_log(" Deleting", len(tiles_to_delete), "tiles...") + tiles_to_delete .extend(glob.glob(f'*{cn.pattern_elevation}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_precip}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_WHRC_biomass_2000_unmasked}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_JPL_unmasked_processed}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_cont_eco_processed}*tif')) + uu.print_log(f' Deleting {len(tiles_to_delete)} tiles...') for tile_to_delete in tiles_to_delete: os.remove(tile_to_delete) - uu.print_log(":::::Deleted unneeded tiles") + uu.print_log(':::::Deleted unneeded tiles') uu.check_storage() if 'loss' in carbon_pool_extent: - uu.print_log("Creating tiles of soil carbon in loss extent") + uu.print_log('Creating tiles of soil carbon in loss extent') # If pools in 2000 weren't generated, soil carbon in emissions extent is 4. # If pools in 2000 were generated, soil carbon in emissions extent is 10. @@ -374,7 +378,7 @@ def mp_create_carbon_pools(sensit_type, tile_id_list, carbon_pool_extent, run_da if cn.count == 96: # More processors can be used for loss carbon pools than for 2000 carbon pools if carbon_pool_extent == 'loss': - if sensit_type == 'biomass_swap': + if cn.SENSIT_TYPE == 'biomass_swap': processes = 36 # 36 processors = XXX GB peak else: processes = 44 # 24 processors = 360 GB peak; 32 = 490 GB peak; 38 = 580 GB peak; 42 = 640 GB peak; 44 = XXX GB peak @@ -382,19 +386,19 @@ def mp_create_carbon_pools(sensit_type, tile_id_list, carbon_pool_extent, run_da processes = 12 # 12 processors = XXX GB peak else: processes = 2 - uu.print_log('Soil carbon loss year max processors=', processes) - pool = multiprocessing.Pool(processes) - pool.map(partial(create_carbon_pools.create_soil_emis_extent, pattern=pattern, - sensit_type=sensit_type, no_upload=no_upload), tile_id_list) - pool.close() - pool.join() + uu.print_log(f'Soil carbon loss year max processors={processes}') + with multiprocessing.Pool(processes) as pool: + pool.map(partial(create_carbon_pools.create_soil_emis_extent, pattern=pattern), + tile_id_list) + pool.close() + pool.join() # # For single processor use # for tile_id in tile_id_list: - # create_carbon_pools.create_soil_emis_extent(tile_id, pattern, sensit_type, no_upload) + # create_carbon_pools.create_soil_emis_extent(tile_id, pattern) - # If no_upload flag is not activated (by choice or by lack of AWS credentials), output is uploaded - if not no_upload: + # If cn.NO_UPLOAD flag is not activated (by choice or by lack of AWS credentials), output is uploaded + if not cn.NO_UPLOAD: # If pools in 2000 weren't generated, soil carbon in emissions extent is 4. # If pools in 2000 were generated, soil carbon in emissions extent is 10. @@ -406,7 +410,7 @@ def mp_create_carbon_pools(sensit_type, tile_id_list, carbon_pool_extent, run_da uu.check_storage() if '2000' in carbon_pool_extent: - uu.print_log("Skipping soil for 2000 carbon pool calculation. Soil carbon in 2000 already created.") + uu.print_log('Skipping soil for 2000 carbon pool calculation. Soil carbon in 2000 already created.') uu.check_storage() @@ -422,16 +426,16 @@ def mp_create_carbon_pools(sensit_type, tile_id_list, carbon_pool_extent, run_da } for key, values in download_dict.items(): - dir = key + directory = key pattern = values[0] - uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list) + uu.s3_flexible_download(directory, pattern, cn.docker_base_dir, cn.SENSIT_TYPE, tile_id_list) - uu.print_log("Creating tiles of total carbon") + uu.print_log('Creating tiles of total carbon') if cn.count == 96: # More processors can be used for loss carbon pools than for 2000 carbon pools if carbon_pool_extent == 'loss': - if sensit_type == 'biomass_swap': + if cn.SENSIT_TYPE == 'biomass_swap': processes = 14 # 14 processors = XXX GB peak else: processes = 19 # 20 processors > 750 GB peak (by just a bit, I think); 15 = 550 GB peak; 18 = 660 GB peak; 19 = XXX GB peak @@ -439,19 +443,19 @@ def mp_create_carbon_pools(sensit_type, tile_id_list, carbon_pool_extent, run_da processes = 12 # 12 processors = XXX GB peak else: processes = 2 - uu.print_log('Total carbon loss year max processors=', processes) - pool = multiprocessing.Pool(processes) - pool.map(partial(create_carbon_pools.create_total_C, carbon_pool_extent=carbon_pool_extent, - sensit_type=sensit_type, no_upload=no_upload), tile_id_list) - pool.close() - pool.join() + uu.print_log(f'Total carbon loss year max processors={processes}') + with multiprocessing.Pool(processes) as pool: + pool.map(partial(create_carbon_pools.create_total_C, carbon_pool_extent=carbon_pool_extent), + tile_id_list) + pool.close() + pool.join() # # For single processor use # for tile_id in tile_id_list: - # create_carbon_pools.create_total_C(tile_id, carbon_pool_extent, sensit_type, no_upload) + # create_carbon_pools.create_total_C(tile_id, carbon_pool_extent) - # If no_upload flag is not activated (by choice or by lack of AWS credentials), output is uploaded - if not no_upload: + # If cn.NO_UPLOAD flag is not activated (by choice or by lack of AWS credentials), output is uploaded + if not cn.NO_UPLOAD: if carbon_pool_extent in ['loss', '2000']: uu.upload_final_set(output_dir_list[5], output_pattern_list[5]) @@ -468,7 +472,7 @@ def mp_create_carbon_pools(sensit_type, tile_id_list, carbon_pool_extent, run_da parser = argparse.ArgumentParser( description='Creates tiles of carbon pool densities in the year of loss or in 2000') parser.add_argument('--model-type', '-t', required=True, - help='{}'.format(cn.model_type_arg_help)) + help=f'{cn.model_type_arg_help}') parser.add_argument('--tile_id_list', '-l', required=True, help='List of tile ids to use in the model. Should be of form 00N_110E or 00N_110E,00N_120E or all.') parser.add_argument('--carbon_pool_extent', '-ce', required=True, @@ -480,25 +484,25 @@ def mp_create_carbon_pools(sensit_type, tile_id_list, carbon_pool_extent, run_da parser.add_argument('--save-intermediates', '-si', action='store_true', help='Saves intermediate model outputs rather than deleting them to save storage') args = parser.parse_args() - sensit_type = args.model_type + + # Sets global variables to the command line arguments + cn.SENSIT_TYPE = args.model_type + cn.RUN_DATE = args.run_date + cn.NO_UPLOAD = args.no_upload + cn.SAVE_INTERMEDIATES = args.save_intermediates + cn.CARBON_POOL_EXTENT = args.carbon_pool_extent # Tells the pool creation functions to calculate carbon emitted_pools as they were at the year of loss in loss pixels only + tile_id_list = args.tile_id_list - carbon_pool_extent = args.carbon_pool_extent # Tells the pool creation functions to calculate carbon emitted_pools as they were at the year of loss in loss pixels only - run_date = args.run_date - no_upload = args.no_upload - save_intermediates = args.save_intermediates # Disables upload to s3 if no AWS credentials are found in environment if not uu.check_aws_creds(): - no_upload = True + cn.NO_UPLOAD = True # Create the output log - uu.initiate_log(tile_id_list=tile_id_list, sensit_type=sensit_type, run_date=run_date, - carbon_pool_extent=carbon_pool_extent, no_upload=no_upload, save_intermediates=save_intermediates) + uu.initiate_log(tile_id_list) # Checks whether the sensitivity analysis and tile_id_list arguments are valid - uu.check_sensit_type(sensit_type) + uu.check_sensit_type(cn.SENSIT_TYPE) tile_id_list = uu.tile_id_list_check(tile_id_list) - mp_create_carbon_pools(sensit_type=sensit_type, tile_id_list=tile_id_list, - carbon_pool_extent=carbon_pool_extent, run_date=run_date, no_upload=no_upload, - save_intermediates=save_intermediates) + mp_create_carbon_pools(tile_id_list, cn.CARBON_POOL_EXTENT) diff --git a/carbon_pools/mp_create_soil_C.py b/carbon_pools/mp_create_soil_C.py index 30773b52..af689fe8 100644 --- a/carbon_pools/mp_create_soil_C.py +++ b/carbon_pools/mp_create_soil_C.py @@ -41,7 +41,7 @@ def mp_create_soil_C(tile_id_list, no_upload=None): ) uu.print_log(tile_id_list) - uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n") + uu.print_log(f'There are {str(len(tile_id_list))} tiles to process', "\n") # List of output directories and output file name patterns @@ -96,7 +96,7 @@ def mp_create_soil_C(tile_id_list, no_upload=None): # # create_soil_C.create_mangrove_soil_C(tile_id, no_Upload) - uu.print_log('Done making mangrove soil C tiles', '\n') + uu.print_log('Done making mangrove soil C tiles', "\n") uu.print_log("Making mineral soil C vrt...") check_call('gdalbuildvrt mineral_soil_C.vrt *{}*'.format(cn.pattern_mineral_soil_C_raw), shell=True) @@ -112,8 +112,8 @@ def mp_create_soil_C(tile_id_list, no_upload=None): processes = int(cn.count/2) uu.print_log("Creating mineral soil C density tiles with {} processors...".format(processes)) pool = multiprocessing.Pool(processes) - pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt, - no_upload=no_upload), tile_id_list) + pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), + tile_id_list) pool.close() pool.join() @@ -236,8 +236,8 @@ def mp_create_soil_C(tile_id_list, no_upload=None): processes = 2 uu.print_log("Creating mineral soil C stock stdev tiles with {} processors...".format(processes)) pool = multiprocessing.Pool(processes) - pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt, - no_upload=no_upload), tile_id_list) + pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), + tile_id_list) pool.close() pool.join() @@ -291,14 +291,14 @@ def mp_create_soil_C(tile_id_list, no_upload=None): args = parser.parse_args() tile_id_list = args.tile_id_list run_date = args.run_date - no_upload = args.no_upload + no_upload = args.NO_UPLOAD # Disables upload to s3 if no AWS credentials are found in environment if not uu.check_aws_creds(): no_upload = True # Create the output log - uu.initiate_log(tile_id_list, run_date=run_date) + uu.initiate_log(tile_id_list) tile_id_list = uu.tile_id_list_check(tile_id_list) mp_create_soil_C(tile_id_list=tile_id_list, no_upload=no_upload) \ No newline at end of file diff --git a/constants_and_names.py b/constants_and_names.py index 8b1fda7d..4491237e 100644 --- a/constants_and_names.py +++ b/constants_and_names.py @@ -8,10 +8,39 @@ ######## ######## # Model version -version = '1.2.2' +version = '1.2.3' version_filename = version.replace('.', '_') +# Global variables that can be modified by the command line +global NO_UPLOAD +NO_UPLOAD = False +global SENSIT_TYPE +SENSIT_TYPE = 'std' +global RUN_DATE +RUN_DATE = None +global STAGE_INPUT +STAGE_INPUT = '' +global RUN_THROUGH +RUN_THROUGH = True +global CARBON_POOL_EXTENT +CARBON_POOL_EXTENT = '' +global EMITTED_POOLS +EMITTED_POOLS = '' +global THRESH +THRESH = '' +global STD_NET_FLUX +STD_NET_FLUX = '' +global INCLUDE_MANGROVES +INCLUDE_MANGROVES = False +global INCLUDE_US +INCLUDE_US = False +global SAVE_INTERMEDIATES +SAVE_INTERMEDIATES = True +global LOG_NOTE +LOG_NOTE = '' + + # Number of years of tree cover loss. If input loss raster is changed, this must be changed, too. loss_years = 21 diff --git a/removals/continent_ecozone_tiles.py b/data_prep/continent_ecozone_tiles.py similarity index 100% rename from removals/continent_ecozone_tiles.py rename to data_prep/continent_ecozone_tiles.py diff --git a/carbon_pools/create_inputs_for_C_pools.py b/data_prep/create_inputs_for_C_pools.py similarity index 100% rename from carbon_pools/create_inputs_for_C_pools.py rename to data_prep/create_inputs_for_C_pools.py diff --git a/data_prep/model_extent.py b/data_prep/model_extent.py index c32709f4..6f2d6ece 100644 --- a/data_prep/model_extent.py +++ b/data_prep/model_extent.py @@ -1,47 +1,56 @@ +""" +Function to create model extent tiles +""" + import datetime import numpy as np import os import rasterio -import logging import sys + sys.path.append('../') import constants_and_names as cn import universal_util as uu # @uu.counter -def model_extent(tile_id, pattern, sensit_type, no_upload): +def model_extent(tile_id, pattern): + """ + :param tile_id: tile to be processed, identified by its tile id + :param pattern: pattern for output tile names + :return: tile where pixels = 1 are included in the model and pixels = 0 are not included in the model + """ # I don't know why, but this needs to be here and not just in mp_model_extent os.chdir(cn.docker_base_dir) - uu.print_log("Delineating model extent:", tile_id) + uu.print_log(f'Delineating model extent: {tile_id}') # Start time start = datetime.datetime.now() # Names of the input tiles - mangrove = '{0}_{1}.tif'.format(tile_id, cn.pattern_mangrove_biomass_2000) - gain = '{0}_{1}.tif'.format(cn.pattern_gain, tile_id) - pre_2000_plantations = '{0}_{1}.tif'.format(tile_id, cn.pattern_plant_pre_2000) + mangrove = f'{tile_id}_{cn.pattern_mangrove_biomass_2000}.tif' + gain = f'{cn.pattern_gain}_{tile_id}.tif' + pre_2000_plantations = f'{tile_id}_{cn.pattern_plant_pre_2000}.tif' # Tree cover tile name depends on the sensitivity analysis. # PRODES extent 2000 stands in for Hansen TCD - if sensit_type == 'legal_Amazon_loss': - tcd = '{0}_{1}.tif'.format(tile_id, cn.pattern_Brazil_forest_extent_2000_processed) - uu.print_log("Using PRODES extent 2000 tile {0} for {1} sensitivity analysis".format(tile_id, sensit_type)) + if cn.SENSIT_TYPE == 'legal_Amazon_loss': + tcd = f'{tile_id}_{cn.pattern_Brazil_forest_extent_2000_processed}.tif' + uu.print_log(f'Using PRODES extent 2000 tile {tile_id} for {cn.SENSIT_TYPE} sensitivity analysis') else: - tcd = '{0}_{1}.tif'.format(cn.pattern_tcd, tile_id) - uu.print_log("Using Hansen tcd tile {0} for {1} model run".format(tile_id, sensit_type)) + tcd = f'{cn.pattern_tcd}_{tile_id}.tif' + uu.print_log(f'Using Hansen tcd tile {tile_id} for {cn.SENSIT_TYPE} model run') # Biomass tile name depends on the sensitivity analysis - if sensit_type == 'biomass_swap': - biomass = '{0}_{1}.tif'.format(tile_id, cn.pattern_JPL_unmasked_processed) - uu.print_log("Using JPL biomass tile {0} for {1} sensitivity analysis".format(tile_id, sensit_type)) + if cn.SENSIT_TYPE == 'biomass_swap': + biomass = f'{tile_id}_{cn.pattern_JPL_unmasked_processed}.tif' + uu.print_log(f'Using JPL biomass tile {tile_id} for {cn.SENSIT_TYPE} sensitivity analysis') else: - biomass = '{0}_{1}.tif'.format(tile_id, cn.pattern_WHRC_biomass_2000_unmasked) - uu.print_log("Using WHRC biomass tile {0} for {1} model run".format(tile_id, sensit_type)) + biomass = f'{tile_id}_{cn.pattern_WHRC_biomass_2000_unmasked}.tif' + uu.print_log(f'Using WHRC biomass tile {tile_id} for {cn.SENSIT_TYPE} model run') - out_tile = '{0}_{1}.tif'.format(tile_id, pattern) + out_tile = f'{tile_id}_{pattern}.tif' # Opens biomass tile with rasterio.open(tcd) as tcd_src: @@ -63,37 +72,37 @@ def model_extent(tile_id, pattern, sensit_type, no_upload): # Checks whether each input tile exists try: mangroves_src = rasterio.open(mangrove) - uu.print_log(" Mangrove tile found for {}".format(tile_id)) - except: - uu.print_log(" No mangrove tile found for {}".format(tile_id)) + uu.print_log(f' Mangrove tile found for {tile_id}') + except rasterio.errors.RasterioIOError: + uu.print_log(f' No mangrove tile found for {tile_id}') try: gain_src = rasterio.open(gain) - uu.print_log(" Gain tile found for {}".format(tile_id)) - except: - uu.print_log(" No gain tile found for {}".format(tile_id)) + uu.print_log(f' Gain tile found for {tile_id}') + except rasterio.errors.RasterioIOError: + uu.print_log(f' No gain tile found for {tile_id}') try: biomass_src = rasterio.open(biomass) - uu.print_log(" Biomass tile found for {}".format(tile_id)) - except: - uu.print_log(" No biomass tile found for {}".format(tile_id)) + uu.print_log(f' Biomass tile found for {tile_id}') + except rasterio.errors.RasterioIOError: + uu.print_log(f' No biomass tile found for {tile_id}') try: pre_2000_plantations_src = rasterio.open(pre_2000_plantations) - uu.print_log(" Pre-2000 plantation tile found for {}".format(tile_id)) - except: - uu.print_log(" No pre-2000 plantation tile found for {}".format(tile_id)) + uu.print_log(f' Pre-2000 plantation tile found for {tile_id}') + except rasterio.errors.RasterioIOError: + uu.print_log(f' No pre-2000 plantation tile found for {tile_id}') # Opens the output tile, giving it the metadata of the input tiles dst = rasterio.open(out_tile, 'w', **kwargs) # Adds metadata tags to the output raster - uu.add_rasterio_tags(dst, sensit_type) + uu.add_universal_metadata_rasterio(dst) dst.update_tags( units='unitless. 1 = in model extent. 0 = not in model extent') - if sensit_type == 'biomass_swap': + if cn.SENSIT_TYPE == 'biomass_swap': dst.update_tags( source='Pixels with ((Hansen 2000 tree cover AND NASA JPL AGB2000) OR Hansen gain OR mangrove biomass 2000) NOT pre-2000 plantations') else: @@ -103,7 +112,7 @@ def model_extent(tile_id, pattern, sensit_type, no_upload): extent='Full model extent. This defines which pixels are included in the model.') - uu.print_log(" Creating model extent for {}".format(tile_id)) + uu.print_log(f' Creating model extent for {tile_id}') uu.check_memory() @@ -115,30 +124,30 @@ def model_extent(tile_id, pattern, sensit_type, no_upload): # If the tile does not exist, it creates an array of 0s. try: mangrove_window = mangroves_src.read(1, window=window).astype('uint8') - except: + except UnboundLocalError: mangrove_window = np.zeros((window.height, window.width), dtype=int) try: gain_window = gain_src.read(1, window=window) - except: + except UnboundLocalError: gain_window = np.zeros((window.height, window.width), dtype=int) try: biomass_window = biomass_src.read(1, window=window) - except: + except UnboundLocalError: biomass_window = np.zeros((window.height, window.width), dtype=int) try: tcd_window = tcd_src.read(1, window=window) - except: + except UnboundLocalError: tcd_window = np.zeros((window.height, window.width), dtype=int) try: pre_2000_plantations_window = pre_2000_plantations_src.read(1, window=window) - except: + except UnboundLocalError: pre_2000_plantations_window = np.zeros((window.height, window.width), dtype=int) # Array of pixels that have both biomass and tree cover density tcd_with_biomass_window = np.where((biomass_window > 0) & (tcd_window > 0), 1, 0) # For all moel types except legal_Amazon_loss sensitivity analysis - if sensit_type != 'legal_Amazon_loss': + if cn.SENSIT_TYPE != 'legal_Amazon_loss': # Array of pixels with (biomass AND tcd) OR mangrove biomass OR Hansen gain forest_extent = np.where((tcd_with_biomass_window == 1) | (mangrove_window > 1) | (gain_window == 1), 1, 0) @@ -156,7 +165,5 @@ def model_extent(tile_id, pattern, sensit_type, no_upload): # Writes the output window to the output dst.write_band(1, forest_extent, window=window) - - # Prints information about the tile that was just processed - uu.end_of_fx_summary(start, tile_id, pattern, no_upload) \ No newline at end of file + uu.end_of_fx_summary(start, tile_id, pattern) diff --git a/removals/mp_continent_ecozone_tiles.py b/data_prep/mp_continent_ecozone_tiles.py similarity index 96% rename from removals/mp_continent_ecozone_tiles.py rename to data_prep/mp_continent_ecozone_tiles.py index b513deb9..23330ccb 100644 --- a/removals/mp_continent_ecozone_tiles.py +++ b/data_prep/mp_continent_ecozone_tiles.py @@ -37,7 +37,7 @@ def mp_continent_ecozone_tiles(tile_id_list, run_date = None): tile_id_list = uu.create_combined_tile_list(cn.pattern_WHRC_biomass_2000_non_mang_non_planted, cn.mangrove_biomass_2000_dir) uu.print_log(tile_id_list) - uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n") + uu.print_log(f'There are {str(len(tile_id_list))} tiles to process', "\n") # if the continent-ecozone shapefile hasn't already been downloaded, it will be downloaded and unzipped @@ -88,6 +88,6 @@ def mp_continent_ecozone_tiles(tile_id_list, run_date = None): no_upload = True # Create the output log - uu.initiate_log(tile_id_list=tile_id_list, run_date=run_date) + uu.initiate_log(tile_id_list) mp_continent_ecozone_tiles(tile_id_list=tile_id_list, run_date=run_date) \ No newline at end of file diff --git a/carbon_pools/mp_create_inputs_for_C_pools.py b/data_prep/mp_create_inputs_for_C_pools.py similarity index 97% rename from carbon_pools/mp_create_inputs_for_C_pools.py rename to data_prep/mp_create_inputs_for_C_pools.py index 72596b67..3248bf8c 100644 --- a/carbon_pools/mp_create_inputs_for_C_pools.py +++ b/data_prep/mp_create_inputs_for_C_pools.py @@ -86,13 +86,13 @@ def mp_create_inputs_for_C_pools(tile_id_list, run_date = None, no_upload = None args = parser.parse_args() tile_id_list = args.tile_id_list run_date = args.run_date - no_upload = args.no_upload + no_upload = args.NO_UPLOAD # Disables upload to s3 if no AWS credentials are found in environment if not uu.check_aws_creds(): no_upload = True # Create the output log - uu.initiate_log(tile_id_list, run_date=run_date, no_upload=no_upload) + uu.initiate_log(tile_id_list) mp_create_inputs_for_C_pools(tile_id_list, run_date=run_date, no_upload=no_upload) \ No newline at end of file diff --git a/data_prep/mp_mangrove_processing.py b/data_prep/mp_mangrove_processing.py index 0b9bc2ba..ccc81c38 100644 --- a/data_prep/mp_mangrove_processing.py +++ b/data_prep/mp_mangrove_processing.py @@ -23,7 +23,7 @@ def mp_mangrove_processing(tile_id_list, run_date = None, no_upload = None): tile_id_list = uu.tile_list_s3(cn.pixel_area_dir) uu.print_log(tile_id_list) - uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n") + uu.print_log(f'There are {str(len(tile_id_list))} tiles to process', "\n") # Downloads zipped raw mangrove files @@ -46,13 +46,13 @@ def mp_mangrove_processing(tile_id_list, run_date = None, no_upload = None): processes=int(cn.count/4) uu.print_log('Mangrove preprocessing max processors=', processes) pool = multiprocessing.Pool(processes) - pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt, - no_upload=no_upload), tile_id_list) + pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), + tile_id_list) # # For single processor use, for testing purposes # for tile_id in tile_id_list: # - # mangrove_processing.create_mangrove_tiles(tile_id, source_raster, out_pattern, no_upload) + # mangrove_processing.create_mangrove_tiles(tile_id, source_raster, out_pattern) # Checks if each tile has data in it. Only tiles with data are uploaded. upload_dir = cn.mangrove_biomass_2000_dir @@ -76,13 +76,13 @@ def mp_mangrove_processing(tile_id_list, run_date = None, no_upload = None): args = parser.parse_args() tile_id_list = args.tile_id_list run_date = args.run_date - no_upload = args.no_upload + no_upload = args.NO_UPLOAD # Disables upload to s3 if no AWS credentials are found in environment if not uu.check_aws_creds(): no_upload = True # Create the output log - uu.initiate_log(tile_id_list=tile_id_list, run_date=run_date, no_upload=no_upload) + uu.initiate_log(tile_id_list) mp_mangrove_processing(tile_id_list=tile_id_list, run_date=run_date, no_upload=no_upload) \ No newline at end of file diff --git a/data_prep/mp_model_extent.py b/data_prep/mp_model_extent.py index 67a05680..507a8a82 100644 --- a/data_prep/mp_model_extent.py +++ b/data_prep/mp_model_extent.py @@ -1,21 +1,18 @@ -''' +""" This script creates a binary raster of the model extent at the pixel level. The model extent is ((TCD2000>0 AND WHRC AGB2000>0) OR Hansen gain=1 OR mangrove AGB2000>0) NOT IN pre-2000 plantations The rest of the model uses this to mask its extent. For biomass_swap sensitivity analysis, NASA JPL AGB 2000 replaces WHRC 2000. For legal_Amazon_loss sensitivity analysis, PRODES 2000 forest extent replaces Hansen tree cover 2000 and Hansen gain pixels and mangrove pixels outside of (PRODES extent AND WHRC AGB) are not included. -''' +""" - -import multiprocessing -from functools import partial -import pandas as pd -import datetime import argparse -from subprocess import Popen, PIPE, STDOUT, check_call +from functools import partial +import multiprocessing import os import sys + sys.path.append('../') import constants_and_names as cn import universal_util as uu @@ -23,17 +20,21 @@ import model_extent -def mp_model_extent(sensit_type, tile_id_list, run_date = None, no_upload = None): +def mp_model_extent(tile_id_list): + """ + :param tile_id_list: list of tile ids to process + :return: 1 set of tiles where pixels = 1 are included in the model and pixels = 0 are not included in the model + """ os.chdir(cn.docker_base_dir) # If a full model run is specified, the correct set of tiles for the particular script is listed if tile_id_list == 'all': # List of tiles to run in the model. Which biomass tiles to use depends on sensitivity analysis - if sensit_type == 'biomass_swap': - tile_id_list = uu.tile_list_s3(cn.JPL_processed_dir, sensit_type) - elif sensit_type == 'legal_Amazon_loss': - tile_id_list = uu.tile_list_s3(cn.Brazil_forest_extent_2000_processed_dir, sensit_type) + if cn.SENSIT_TYPE == 'biomass_swap': + tile_id_list = uu.tile_list_s3(cn.JPL_processed_dir, cn.SENSIT_TYPE) + elif cn.SENSIT_TYPE == 'legal_Amazon_loss': + tile_id_list = uu.tile_list_s3(cn.Brazil_forest_extent_2000_processed_dir, cn.SENSIT_TYPE) else: tile_id_list = uu.create_combined_tile_list(cn.WHRC_biomass_2000_unmasked_dir, cn.mangrove_biomass_2000_dir, @@ -41,7 +42,7 @@ def mp_model_extent(sensit_type, tile_id_list, run_date = None, no_upload = None ) uu.print_log(tile_id_list) - uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n") + uu.print_log(f'There are {str(len(tile_id_list))} tiles to process', "\n") # Files to download for this script. @@ -51,12 +52,12 @@ def mp_model_extent(sensit_type, tile_id_list, run_date = None, no_upload = None cn.plant_pre_2000_processed_dir: [cn.pattern_plant_pre_2000] } - if sensit_type == 'legal_Amazon_loss': + if cn.SENSIT_TYPE == 'legal_Amazon_loss': download_dict[cn.Brazil_forest_extent_2000_processed_dir] = [cn.pattern_Brazil_forest_extent_2000_processed] else: download_dict[cn.tcd_dir] = [cn.pattern_tcd] - if sensit_type == 'biomass_swap': + if cn.SENSIT_TYPE == 'biomass_swap': download_dict[cn.JPL_processed_dir] = [cn.pattern_JPL_unmasked_processed] else: download_dict[cn.WHRC_biomass_2000_unmasked_dir] = [cn.pattern_WHRC_biomass_2000_unmasked] @@ -68,22 +69,22 @@ def mp_model_extent(sensit_type, tile_id_list, run_date = None, no_upload = None # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list for key, values in download_dict.items(): - dir = key + directory = key pattern = values[0] - uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list) + uu.s3_flexible_download(directory, pattern, cn.docker_base_dir, cn.SENSIT_TYPE, tile_id_list) # If the model run isn't the standard one, the output directory and file names are changed - if sensit_type != 'std': - uu.print_log("Changing output directory and file name pattern based on sensitivity analysis") - output_dir_list = uu.alter_dirs(sensit_type, output_dir_list) - output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list) + if cn.SENSIT_TYPE != 'std': + uu.print_log('Changing output directory and file name pattern based on sensitivity analysis') + output_dir_list = uu.alter_dirs(cn.SENSIT_TYPE, output_dir_list) + output_pattern_list = uu.alter_patterns(cn.SENSIT_TYPE, output_pattern_list) # A date can optionally be provided by the full model script or a run of this script. # This replaces the date in constants_and_names. # Only done if output upload is enabled. - if run_date is not None and no_upload is not None: - output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date) + if cn.RUN_DATE is not None and cn.NO_UPLOAD is False: + output_dir_list = uu.replace_output_dir_date(output_dir_list, cn.RUN_DATE) # Creates a single filename pattern to pass to the multiprocessor call pattern = output_pattern_list[0] @@ -91,7 +92,7 @@ def mp_model_extent(sensit_type, tile_id_list, run_date = None, no_upload = None # This configuration of the multiprocessing call is necessary for passing multiple arguments to the main function # It is based on the example here: http://spencerimp.blogspot.com/2015/12/python-multiprocess-with-multiple.html if cn.count == 96: - if sensit_type == 'biomass_swap': + if cn.SENSIT_TYPE == 'biomass_swap': processes = 38 else: processes = 45 # 30 processors = 480 GB peak (sporadic decreases followed by sustained increases); @@ -99,36 +100,35 @@ def mp_model_extent(sensit_type, tile_id_list, run_date = None, no_upload = None else: processes = 3 uu.print_log('Model extent processors=', processes) - pool = multiprocessing.Pool(processes) - pool.map(partial(model_extent.model_extent, pattern=pattern, sensit_type=sensit_type, no_upload=no_upload), tile_id_list) - pool.close() - pool.join() + with multiprocessing.Pool(processes) as pool: + pool.map(partial(model_extent.model_extent, pattern=pattern), tile_id_list) + pool.close() + pool.join() # # For single processor use # for tile_id in tile_id_list: - # model_extent.model_extent(tile_id, pattern, sensit_type, no_upload) + # model_extent.model_extent(tile_id, pattern) output_pattern = output_pattern_list[0] if cn.count <= 2: # For local tests processes = 1 - uu.print_log( - "Checking for empty tiles of {0} pattern with {1} processors using light function...".format(output_pattern, processes)) - pool = multiprocessing.Pool(processes) - pool.map(partial(uu.check_and_delete_if_empty_light, output_pattern=output_pattern), tile_id_list) - pool.close() - pool.join() + uu.print_log(f'Checking for empty tiles of {output_pattern} pattern with {output_pattern} processors using light function...') + with multiprocessing.Pool(processes) as pool: + pool.map(partial(uu.check_and_delete_if_empty_light, output_pattern=output_pattern), tile_id_list) + pool.close() + pool.join() else: processes = 58 # 50 processors = 620 GB peak; 55 = 640 GB; 58 = 650 GB (continues to increase very slowly several hundred tiles in) - uu.print_log("Checking for empty tiles of {0} pattern with {1} processors...".format(output_pattern, processes)) - pool = multiprocessing.Pool(processes) - pool.map(partial(uu.check_and_delete_if_empty, output_pattern=output_pattern), tile_id_list) - pool.close() - pool.join() + uu.print_log(f'Checking for empty tiles of {output_pattern} pattern with {output_pattern} processors using light function...') + with multiprocessing.Pool(processes) as pool: + pool.map(partial(uu.check_and_delete_if_empty, output_pattern=output_pattern), tile_id_list) + pool.close() + pool.join() # If no_upload flag is not activated (by choice or by lack of AWS credentials), output is uploaded - if not no_upload: + if not cn.NO_UPLOAD: uu.upload_final_set(output_dir_list[0], output_pattern_list[0]) @@ -140,7 +140,7 @@ def mp_model_extent(sensit_type, tile_id_list, run_date = None, no_upload = None parser = argparse.ArgumentParser( description='Create tiles of the pixels included in the model (model extent)') parser.add_argument('--model-type', '-t', required=True, - help='{}'.format(cn.model_type_arg_help)) + help=f'{cn.model_type_arg_help}') parser.add_argument('--tile_id_list', '-l', required=True, help='List of tile ids to use in the model. Should be of form 00N_110E or 00N_110E,00N_120E or all.') parser.add_argument('--run-date', '-d', required=False, @@ -148,21 +148,23 @@ def mp_model_extent(sensit_type, tile_id_list, run_date = None, no_upload = None parser.add_argument('--no-upload', '-nu', action='store_true', help='Disables uploading of outputs to s3') args = parser.parse_args() - sensit_type = args.model_type + + # Sets global variables to the command line arguments + cn.SENSIT_TYPE = args.model_type + cn.RUN_DATE = args.run_date + cn.NO_UPLOAD = args.no_upload + tile_id_list = args.tile_id_list - run_date = args.run_date - no_upload = args.no_upload # Disables upload to s3 if no AWS credentials are found in environment if not uu.check_aws_creds(): - no_upload = True + cn.NO_UPLOAD = True # Create the output log - uu.initiate_log(tile_id_list=tile_id_list, sensit_type=sensit_type, run_date=run_date, no_upload=no_upload) + uu.initiate_log(tile_id_list) # Checks whether the sensitivity analysis and tile_id_list arguments are valid - uu.check_sensit_type(sensit_type) + uu.check_sensit_type(cn.SENSIT_TYPE) tile_id_list = uu.tile_id_list_check(tile_id_list) - mp_model_extent(sensit_type=sensit_type, tile_id_list=tile_id_list, run_date=run_date, no_upload=no_upload) - + mp_model_extent(tile_id_list=tile_id_list) diff --git a/emissions/mp_peatland_processing.py b/data_prep/mp_peatland_processing.py similarity index 96% rename from emissions/mp_peatland_processing.py rename to data_prep/mp_peatland_processing.py index 84bcda9d..f02cd16c 100644 --- a/emissions/mp_peatland_processing.py +++ b/data_prep/mp_peatland_processing.py @@ -29,7 +29,7 @@ def mp_peatland_processing(tile_id_list, run_date = None): tile_id_list = uu.tile_list_s3(cn.pixel_area_dir) uu.print_log(tile_id_list) - uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n") + uu.print_log(f'There are {str(len(tile_id_list))} tiles to process', "\n") # List of output directories and output file name patterns @@ -112,7 +112,7 @@ def mp_peatland_processing(tile_id_list, run_date = None): sensit_type='std' # Create the output log - uu.initiate_log(tile_id_list=tile_id_list, sensit_type=sensit_type, run_date=run_date) + uu.initiate_log(tile_id_list) # Checks whether the tile_id_list argument is valid tile_id_list = uu.tile_id_list_check(tile_id_list) diff --git a/data_prep/mp_plantation_preparation.py b/data_prep/mp_plantation_preparation.py index 54d6f47f..60002610 100644 --- a/data_prep/mp_plantation_preparation.py +++ b/data_prep/mp_plantation_preparation.py @@ -477,7 +477,7 @@ def mp_plantation_preparation(gadm_index_shp, planted_index_shp, tile_id_list, r args = parser.parse_args() tile_id_list = args.tile_id_list run_date = args.run_date - no_upload = args.no_upload + no_upload = args.NO_UPLOAD # Creates the directory and shapefile names for the two possible arguments (index shapefiles) gadm_index = os.path.split(args.gadm_tile_index) @@ -494,7 +494,7 @@ def mp_plantation_preparation(gadm_index_shp, planted_index_shp, tile_id_list, r no_upload = True # Create the output log - uu.initiate_log(tile_id_list=tile_id_list, sensit_type=sensit_type, run_date=run_date, no_upload=no_upload) + uu.initiate_log(tile_id_list) # Checks whether the sensitivity analysis and tile_id_list arguments are valid uu.check_sensit_type(sensit_type) diff --git a/data_prep/mp_prep_other_inputs.py b/data_prep/mp_prep_other_inputs.py index de38b6f2..b2242a65 100644 --- a/data_prep/mp_prep_other_inputs.py +++ b/data_prep/mp_prep_other_inputs.py @@ -34,7 +34,7 @@ def mp_prep_other_inputs(tile_id_list, run_date, no_upload = None): ) uu.print_log(tile_id_list) - uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n") + uu.print_log(f'There are {str(len(tile_id_list))} tiles to process', "\n") ''' Before processing the driver, it needs to be reprojected from Goode Homolosine to WGS84. @@ -80,7 +80,7 @@ def mp_prep_other_inputs(tile_id_list, run_date, no_upload = None): # If the model run isn't the standard one, the output directory and file names are changed if sensit_type != 'std': - uu.print_log("Changing output directory and file name pattern based on sensitivity analysis") + uu.print_log('Changing output directory and file name pattern based on sensitivity analysis') output_dir_list = uu.alter_dirs(sensit_type, output_dir_list) output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list) @@ -130,8 +130,8 @@ def mp_prep_other_inputs(tile_id_list, run_date, no_upload = None): processes = int(cn.count/2) uu.print_log("Creating tree cover loss driver tiles with {} processors...".format(processes)) pool = multiprocessing.Pool(processes) - pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt, - no_upload=no_upload), tile_id_list) + pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), + tile_id_list) pool.close() pool.join() @@ -146,7 +146,7 @@ def mp_prep_other_inputs(tile_id_list, run_date, no_upload = None): # processes = int(cn.count/2) # uu.print_log("Creating young natural forest removals rate tiles with {} processors...".format(processes)) # pool = multiprocessing.Pool(processes) - # pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt, no_upload=no_upload), tile_id_list) + # pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list) # pool.close() # pool.join() # @@ -160,7 +160,7 @@ def mp_prep_other_inputs(tile_id_list, run_date, no_upload = None): # processes = int(cn.count/2) # uu.print_log("Creating standard deviation for young natural forest removal rate tiles with {} processors...".format(processes)) # pool = multiprocessing.Pool(processes) - # pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt, no_upload=no_upload), tile_id_list) + # pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list) # pool.close() # pool.join() # @@ -198,7 +198,7 @@ def mp_prep_other_inputs(tile_id_list, run_date, no_upload = None): # processes = int(cn.count/2) # uu.print_log("Creating European natural forest removals rate tiles with {} processors...".format(processes)) # pool = multiprocessing.Pool(processes) - # pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt, no_upload=no_upload), tile_id_list) + # pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list) # pool.close() # pool.join() # @@ -212,7 +212,7 @@ def mp_prep_other_inputs(tile_id_list, run_date, no_upload = None): # processes = int(cn.count/2) # uu.print_log("Creating standard deviation for European natural forest removals rate tiles with {} processors...".format(processes)) # pool = multiprocessing.Pool(processes) - # pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt, no_upload=no_upload), tile_id_list) + # pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list) # pool.close() # pool.join() # @@ -233,7 +233,7 @@ def mp_prep_other_inputs(tile_id_list, run_date, no_upload = None): # processes = int(cn.count/2) # uu.print_log("Creating primary forest tiles with {} processors...".format(processes)) # pool = multiprocessing.Pool(processes) - # pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt, no_upload=no_upload), tile_id_list) + # pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list) # pool.close() # pool.join() # @@ -261,7 +261,7 @@ def mp_prep_other_inputs(tile_id_list, run_date, no_upload = None): # processes = int(cn.count/2) # uu.print_log("Creating US forest age category tiles with {} processors...".format(processes)) # pool = multiprocessing.Pool(processes) - # pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt, no_upload=no_upload), tile_id_list) + # pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list) # pool.close() # pool.join() # @@ -275,7 +275,7 @@ def mp_prep_other_inputs(tile_id_list, run_date, no_upload = None): # processes = int(cn.count/2) # uu.print_log("Creating US forest group tiles with {} processors...".format(processes)) # pool = multiprocessing.Pool(processes) - # pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt, no_upload=no_upload), tile_id_list) + # pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list) # pool.close() # pool.join() # @@ -289,7 +289,7 @@ def mp_prep_other_inputs(tile_id_list, run_date, no_upload = None): # processes = int(cn.count/2) # uu.print_log("Creating US forest region tiles with {} processors...".format(processes)) # pool = multiprocessing.Pool(processes) - # pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt, no_upload=no_upload), tile_id_list) + # pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list) # pool.close() # pool.join() # @@ -331,7 +331,7 @@ def mp_prep_other_inputs(tile_id_list, run_date, no_upload = None): pool.map(partial(uu.check_and_delete_if_empty, output_pattern=output_pattern), tile_id_list) pool.close() pool.join() - uu.print_log('\n') + uu.print_log("\n") # Uploads output tiles to s3 @@ -352,14 +352,14 @@ def mp_prep_other_inputs(tile_id_list, run_date, no_upload = None): args = parser.parse_args() tile_id_list = args.tile_id_list run_date = args.run_date - no_upload = args.no_upload + no_upload = args.NO_UPLOAD # Disables upload to s3 if no AWS credentials are found in environment if not uu.check_aws_creds(): no_upload = True # Create the output log - uu.initiate_log(tile_id_list=tile_id_list, run_date=run_date, no_upload=no_upload) + uu.initiate_log(tile_id_list) # Checks whether the tile_id_list argument is valid tile_id_list = uu.tile_id_list_check(tile_id_list) diff --git a/data_prep/mp_rewindow_tiles.py b/data_prep/mp_rewindow_tiles.py index 1c82d794..b7e991d6 100644 --- a/data_prep/mp_rewindow_tiles.py +++ b/data_prep/mp_rewindow_tiles.py @@ -78,8 +78,8 @@ def mp_rewindow_tiles(tile_id_list, run_date = None, no_upload = None): processes = 8 uu.print_log('Rewindow max processors=', processes) pool = multiprocessing.Pool(processes) - pool.map(partial(uu.rewindow, download_pattern_name=download_pattern_name, - no_upload=no_upload), tile_id_list) + pool.map(partial(uu.rewindow, download_pattern_name=download_pattern_name), + tile_id_list) pool.close() pool.join() @@ -112,14 +112,14 @@ def mp_rewindow_tiles(tile_id_list, run_date = None, no_upload = None): args = parser.parse_args() tile_id_list = args.tile_id_list run_date = args.run_date - no_upload = args.no_upload + no_upload = args.NO_UPLOAD # Disables upload to s3 if no AWS credentials are found in environment if not uu.check_aws_creds(): no_upload = True # Create the output log - uu.initiate_log(tile_id_list=tile_id_list, run_date=run_date, no_upload=no_upload) + uu.initiate_log(tile_id_list) # Checks whether the tile_id_list argument is valid tile_id_list = uu.tile_id_list_check(tile_id_list) diff --git a/emissions/peatland_processing.py b/data_prep/peatland_processing.py similarity index 99% rename from emissions/peatland_processing.py rename to data_prep/peatland_processing.py index 9e9a6499..56b8b9ff 100644 --- a/emissions/peatland_processing.py +++ b/data_prep/peatland_processing.py @@ -98,7 +98,7 @@ def create_peat_mask_tiles(tile_id): out_tile_tagged = rasterio.open(out_tile, 'w', **kwargs) # Adds metadata tags to the output raster - uu.add_rasterio_tags(out_tile_tagged, 'std') + uu.add_universal_metadata_rasterio(out_tile_tagged) out_tile_tagged.update_tags( key='1 = peat. 0 = not peat.') out_tile_tagged.update_tags( diff --git a/emissions/calculate_gross_emissions.py b/emissions/calculate_gross_emissions.py index f4fa95c8..410d8c59 100644 --- a/emissions/calculate_gross_emissions.py +++ b/emissions/calculate_gross_emissions.py @@ -1,17 +1,28 @@ -from subprocess import Popen, PIPE, STDOUT, check_call +""" +Function to call C++ executable that calculates gross emissions +""" + import datetime -import rasterio -from shutil import copyfile -import os import sys + sys.path.append('../') import constants_and_names as cn import universal_util as uu -# Calls the c++ script to calculate gross emissions -def calc_emissions(tile_id, emitted_pools, sensit_type, folder, no_upload): - - uu.print_log("Calculating gross emissions for", tile_id, "using", sensit_type, "model type...") +def calc_emissions(tile_id, emitted_pools, folder): + """ + Calls the c++ script to calculate gross emissions + :param tile_id: tile to be processed, identified by its tile id + :param emitted_pools: Whether emissions from soil only is calculated, or emissions from biomass and soil. + Options are: soil_only or biomass_soil. + :param folder: + :return: 10 tiles: 6 tiles with emissions for each driver; CO2 emissions from all drivers; + non-CO2 emissions from all drivers; all gases (CO2 and non-CO2 from all drivers); + emissions decision tree nodes (used for QC). + Units: Mg CO2e/ha over entire model period. + """ + + uu.print_log(f'Calculating gross emissions for {tile_id} using {cn.SENSIT_TYPE} model type...') start = datetime.datetime.now() @@ -20,49 +31,35 @@ def calc_emissions(tile_id, emitted_pools, sensit_type, folder, no_upload): # Runs the correct c++ script given the emitted_pools (biomass+soil or soil_only) and model type selected. # soil_only, no_shiftin_ag, and convert_to_grassland have special gross emissions C++ scripts. # The other sensitivity analyses and the standard model all use the same gross emissions C++ script. - if (emitted_pools == 'soil_only') & (sensit_type == 'std'): - cmd = ['{0}/calc_gross_emissions_soil_only.exe'.format(cn.c_emis_compile_dst), tile_id, sensit_type, folder] + if (emitted_pools == 'soil_only') & (cn.SENSIT_TYPE == 'std'): + cmd = [f'{cn.c_emis_compile_dst}/calc_gross_emissions_soil_only.exe', tile_id, cn.SENSIT_TYPE, folder] - elif (emitted_pools == 'biomass_soil') & (sensit_type in ['convert_to_grassland', 'no_shifting_ag']): - cmd = ['{0}/calc_gross_emissions_{1}.exe'.format(cn.c_emis_compile_dst, sensit_type), tile_id, sensit_type, folder] + elif (emitted_pools == 'biomass_soil') & (cn.SENSIT_TYPE in ['convert_to_grassland', 'no_shifting_ag']): + cmd = [f'{cn.c_emis_compile_dst}/calc_gross_emissions_{cn.SENSIT_TYPE}.exe', tile_id, cn.SENSIT_TYPE, folder] # This C++ script has an extra argument that names the input carbon emitted_pools and output emissions correctly - elif (emitted_pools == 'biomass_soil') & (sensit_type not in ['no_shifting_ag', 'convert_to_grassland']): - cmd = ['{0}/calc_gross_emissions_generic.exe'.format(cn.c_emis_compile_dst), tile_id, sensit_type, folder] + elif (emitted_pools == 'biomass_soil') & (cn.SENSIT_TYPE not in ['no_shifting_ag', 'convert_to_grassland']): + cmd = [f'{cn.c_emis_compile_dst}/calc_gross_emissions_generic.exe', tile_id, cn.SENSIT_TYPE, folder] else: - uu.exception_log(no_upload, 'Pool and/or sensitivity analysis option not valid') + uu.exception_log('Pool and/or sensitivity analysis option not valid') uu.log_subprocess_output_full(cmd) # Identifies which pattern to use for counting tile completion pattern = cn.pattern_gross_emis_commod_biomass_soil - if (emitted_pools == 'biomass_soil') & (sensit_type == 'std'): + if (emitted_pools == 'biomass_soil') & (cn.SENSIT_TYPE == 'std'): pattern = pattern - elif (emitted_pools == 'biomass_soil') & (sensit_type != 'std'): - pattern = pattern + "_" + sensit_type + elif (emitted_pools == 'biomass_soil') & (cn.SENSIT_TYPE != 'std'): + pattern = pattern + "_" + cn.SENSIT_TYPE elif emitted_pools == 'soil_only': pattern = pattern.replace('biomass_soil', 'soil_only') else: - uu.exception_log(no_upload, 'Pool option not valid') + uu.exception_log('Pool option not valid') # Prints information about the tile that was just processed - uu.end_of_fx_summary(start, tile_id, pattern, no_upload) - - -# Adds metadata tags to the output rasters -def add_metadata_tags(tile_id, pattern, sensit_type): - - # Adds metadata tags to output rasters - uu.add_universal_metadata_tags('{0}_{1}.tif'.format(tile_id, pattern), sensit_type) - - cmd = ['gdal_edit.py', '-mo', - 'units=Mg CO2e/ha over model duration (2001-20{})'.format(cn.loss_years), - '-mo', 'source=many data sources', - '-mo', 'extent=Tree cover loss pixels within model extent (and tree cover loss driver, if applicable)', - '{0}_{1}.tif'.format(tile_id, pattern)] - uu.log_subprocess_output_full(cmd) + uu.end_of_fx_summary(start, tile_id, pattern) diff --git a/emissions/mp_calculate_gross_emissions.py b/emissions/mp_calculate_gross_emissions.py index 2d50ec8e..ff1c1093 100644 --- a/emissions/mp_calculate_gross_emissions.py +++ b/emissions/mp_calculate_gross_emissions.py @@ -1,4 +1,4 @@ -''' +""" This script calculates the gross emissions in tonnes CO2e/ha for every loss pixel. The properties of each pixel determine the appropriate emissions equation, the constants for the equation, and the carbon pool values that go into the equation. @@ -20,21 +20,30 @@ Emissions from all drivers is also output as emissions due to CO2 only and emissions due to other GHG (CH4 and N2O). The other output shows which branch of the decision tree that determines the emissions equation applies to each pixel. These codes are summarized in carbon-budget/emissions/node_codes.txt -''' +""" -import multiprocessing import argparse -import datetime -import os from functools import partial +import multiprocessing +import os import sys + sys.path.append('../') import constants_and_names as cn import universal_util as uu sys.path.append(os.path.join(cn.docker_app,'emissions')) import calculate_gross_emissions -def mp_calculate_gross_emissions(sensit_type, tile_id_list, emitted_pools, run_date = None, no_upload = None): +def mp_calculate_gross_emissions(tile_id_list, emitted_pools): + """ + :param tile_id_list: list of tile ids to process + :param emitted_pools: Whether emissions from soil only is calculated, or emissions from biomass and soil. + Options are: soil_only or biomass_soil. + :return: 10 sets of tiles: 6 sets of tiles with emissions for each driver; CO2 emissions from all drivers; + non-CO2 emissions from all drivers; all gases (CO2 and non-CO2 from all drivers); + emissions decision tree nodes (used for QC). + Units: Mg CO2e/ha over entire model period. + """ os.chdir(cn.docker_base_dir) @@ -44,10 +53,10 @@ def mp_calculate_gross_emissions(sensit_type, tile_id_list, emitted_pools, run_d # If the tile_list argument is an s3 folder, the list of tiles in it is created if tile_id_list == 'all': # List of tiles to run in the model - tile_id_list = uu.tile_list_s3(cn.AGC_emis_year_dir, sensit_type) + tile_id_list = uu.tile_list_s3(cn.AGC_emis_year_dir, cn.SENSIT_TYPE) uu.print_log(tile_id_list) - uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n") + uu.print_log(f'There are {str(len(tile_id_list))} tiles to process', "\n") # Files to download for this script @@ -67,9 +76,9 @@ def mp_calculate_gross_emissions(sensit_type, tile_id_list, emitted_pools, run_d } # Special loss tiles for the Brazil and Mekong sensitivity analyses - if sensit_type == 'legal_Amazon_loss': + if cn.SENSIT_TYPE == 'legal_Amazon_loss': download_dict[cn.Brazil_annual_loss_processed_dir] = [cn.pattern_Brazil_annual_loss_processed] - elif sensit_type == 'Mekong_loss': + elif cn.SENSIT_TYPE == 'Mekong_loss': download_dict[cn.Mekong_loss_processed_dir] = [cn.pattern_Mekong_loss_processed] else: download_dict[cn.loss_dir] = [cn.pattern_loss] @@ -77,7 +86,7 @@ def mp_calculate_gross_emissions(sensit_type, tile_id_list, emitted_pools, run_d # Checks the validity of the emitted_pools argument if (emitted_pools not in ['soil_only', 'biomass_soil']): - uu.exception_log(no_upload, 'Invalid pool input. Please choose soil_only or biomass_soil.') + uu.exception_log('Invalid pool input. Please choose soil_only or biomass_soil.') # Checks if the correct c++ script has been compiled for the pool option selected @@ -108,21 +117,21 @@ def mp_calculate_gross_emissions(sensit_type, tile_id_list, emitted_pools, run_d # Some sensitivity analyses have specific gross emissions scripts. # The rest of the sensitivity analyses and the standard model can all use the same, generic gross emissions script. - if sensit_type in ['no_shifting_ag', 'convert_to_grassland']: - # if os.path.exists('../carbon-budget/emissions/cpp_util/calc_gross_emissions_{}.exe'.format(sensit_type)): - if os.path.exists('{0}/calc_gross_emissions_{1}.exe'.format(cn.c_emis_compile_dst, sensit_type)): - uu.print_log("C++ for {} already compiled.".format(sensit_type)) + if cn.SENSIT_TYPE in ['no_shifting_ag', 'convert_to_grassland']: + # if os.path.exists('../carbon-budget/emissions/cpp_util/calc_gross_emissions_{}.exe'.format(cn.SENSIT_TYPE)): + if os.path.exists(f'{cn.c_emis_compile_dst}/calc_gross_emissions_{cn.SENSIT_TYPE}.exe'): + uu.print_log(f'C++ for {cn.SENSIT_TYPE} already compiled.') else: - uu.exception_log(no_upload, 'Must compile {} model C++...'.format(sensit_type)) + uu.exception_log(f'Must compile {cn.SENSIT_TYPE} model C++...') else: - if os.path.exists('{0}/calc_gross_emissions_generic.exe'.format(cn.c_emis_compile_dst)): - uu.print_log("C++ for generic emissions already compiled.") + if os.path.exists(f'{cn.c_emis_compile_dst}/calc_gross_emissions_generic.exe'): + uu.print_log('C++ for generic emissions already compiled.') else: - uu.exception_log(no_upload, 'Must compile generic emissions C++...') + uu.exception_log('Must compile generic emissions C++...') - elif (emitted_pools == 'soil_only') & (sensit_type == 'std'): - if os.path.exists('{0}/calc_gross_emissions_soil_only.exe'.format(cn.c_emis_compile_dst)): - uu.print_log("C++ for soil_only already compiled.") + elif (emitted_pools == 'soil_only') & (cn.SENSIT_TYPE == 'std'): + if os.path.exists(f'{cn.c_emis_compile_dst}/calc_gross_emissions_soil_only.exe'): + uu.print_log('C++ for soil_only already compiled.') # Output file directories for soil_only. Must be in same order as output pattern directories. output_dir_list = [cn.gross_emis_commod_soil_only_dir, @@ -148,30 +157,30 @@ def mp_calculate_gross_emissions(sensit_type, tile_id_list, emitted_pools, run_d cn.pattern_gross_emis_nodes_soil_only] else: - uu.exception_log(no_upload, 'Must compile soil_only C++...') + uu.exception_log('Must compile soil_only C++...') else: - uu.exception_log(no_upload, 'Pool and/or sensitivity analysis option not valid') + uu.exception_log('Pool and/or sensitivity analysis option not valid') # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list for key, values in download_dict.items(): - dir = key - pattern = values[0] - uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list) + directory = key + output_pattern = values[0] + uu.s3_flexible_download(directory, output_pattern, cn.docker_base_dir, cn.SENSIT_TYPE, tile_id_list) # If the model run isn't the standard one, the output directory and file names are changed - if sensit_type != 'std': - uu.print_log("Changing output directory and file name pattern based on sensitivity analysis") - output_dir_list = uu.alter_dirs(sensit_type, output_dir_list) - output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list) + if cn.SENSIT_TYPE != 'std': + uu.print_log('Changing output directory and file name pattern based on sensitivity analysis') + output_dir_list = uu.alter_dirs(cn.SENSIT_TYPE, output_dir_list) + output_pattern_list = uu.alter_patterns(cn.SENSIT_TYPE, output_pattern_list) # A date can optionally be provided by the full model script or a run of this script. # This replaces the date in constants_and_names. # Only done if output upload is enabled. - if run_date is not None and no_upload is not None: - output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date) + if cn.RUN_DATE is not None and cn.NO_UPLOAD is not None: + output_dir_list = uu.replace_output_dir_date(output_dir_list, cn.RUN_DATE) uu.print_log(output_pattern_list) @@ -181,7 +190,7 @@ def mp_calculate_gross_emissions(sensit_type, tile_id_list, emitted_pools, run_d # This function creates "dummy" tiles for all Hansen tiles that currently have non-existent tiles. # That way, the C++ script gets all the necessary input files. # If it doesn't get the necessary inputs, it skips that tile. - uu.print_log("Making blank tiles for inputs that don't currently exist") + uu.print_log('Making blank tiles for inputs that do not currently exist') # All of the inputs that need to have dummy tiles made in order to match the tile list of the carbon emitted_pools pattern_list = [cn.pattern_planted_forest_type_unmasked, cn.pattern_peat_mask, cn.pattern_ifl_primary, cn.pattern_drivers, cn.pattern_bor_tem_trop_processed, cn.pattern_burn_year, cn.pattern_climate_zone, @@ -192,70 +201,71 @@ def mp_calculate_gross_emissions(sensit_type, tile_id_list, emitted_pools, run_d # This will be iterated through to delete the tiles at the end of the script. uu.create_blank_tile_txt() - for pattern in pattern_list: - pool = multiprocessing.Pool(processes=80) # 60 = 100 GB peak; 80 = XXX GB peak - pool.map(partial(uu.make_blank_tile, pattern=pattern, folder=folder, - sensit_type=sensit_type), tile_id_list) - pool.close() - pool.join() + processes=80 # 60 = 100 GB peak; 80 = XXX GB peak + for output_pattern in pattern_list: + with multiprocessing.Pool(processes) as pool: + pool.map(partial(uu.make_blank_tile, pattern=output_pattern, folder=folder), + tile_id_list) + pool.close() + pool.join() # # For single processor use # for pattern in pattern_list: # for tile in tile_id_list: - # uu.make_blank_tile(tile, pattern, folder, sensit_type) + # uu.make_blank_tile(tile, pattern, folder) # Calculates gross emissions for each tile # count/4 uses about 390 GB on a r4.16xlarge spot machine. # processes=18 uses about 440 GB on an r4.16xlarge spot machine. if cn.count == 96: - if sensit_type == 'biomass_swap': + if cn.SENSIT_TYPE == 'biomass_swap': processes = 15 # 15 processors = XXX GB peak else: processes = 19 # 17 = 650 GB peak; 18 = 677 GB peak; 19 = 716 GB peak else: processes = 9 - uu.print_log('Gross emissions max processors=', processes) - pool = multiprocessing.Pool(processes) - pool.map(partial(calculate_gross_emissions.calc_emissions, emitted_pools=emitted_pools, sensit_type=sensit_type, - folder=folder, no_upload=no_upload), tile_id_list) - pool.close() - pool.join() + uu.print_log(f'Gross emissions max processors={processes}') + with multiprocessing.Pool(processes) as pool: + pool.map(partial(calculate_gross_emissions.calc_emissions, emitted_pools=emitted_pools, + folder=folder), + tile_id_list) + pool.close() + pool.join() # # For single processor use # for tile in tile_id_list: - # calculate_gross_emissions.calc_emissions(tile, emitted_pools, sensit_type, folder, no_upload) + # calculate_gross_emissions.calc_emissions(tile, emitted_pools, folder) # Print the list of blank created tiles, delete the tiles, and delete their text file uu.list_and_delete_blank_tiles() - for i in range(0, len(output_pattern_list)): - pattern = output_pattern_list[i] + for i, output_pattern in enumerate(output_pattern_list): - uu.print_log("Adding metadata tags for pattern {}".format(pattern)) + uu.print_log(f'Adding metadata tags for pattern {output_pattern}') if cn.count == 96: processes = 75 # 45 processors = ~30 GB peak; 55 = XXX GB peak; 75 = XXX GB peak else: processes = 9 - uu.print_log('Adding metadata tags max processors=', processes) - pool = multiprocessing.Pool(processes) - pool.map(partial(calculate_gross_emissions.add_metadata_tags, pattern=pattern, sensit_type=sensit_type), - tile_id_list) - pool.close() - pool.join() + uu.print_log(f'Adding metadata tags max processors={processes}') + with multiprocessing.Pool(processes) as pool: + pool.map(partial(uu.add_emissions_metadata, output_pattern=output_pattern), + tile_id_list) + pool.close() + pool.join() # for tile_id in tile_id_list: - # calculate_gross_emissions.add_metadata_tags(tile_id, pattern, sensit_type) + # calculate_gross_emissions.add_metadata_tags(tile_id, pattern) - # If no_upload flag is not activated (by choice or by lack of AWS credentials), output is uploaded - if not no_upload: + # If cn.NO_UPLOAD flag is not activated (by choice or by lack of AWS credentials), output is uploaded + if not cn.NO_UPLOAD: - for i in range(0, len(output_dir_list)): - uu.upload_final_set(output_dir_list[i], output_pattern_list[i]) + for output_dir, output_pattern in zip(output_dir_list, output_pattern_list): + uu.upload_final_set(output_dir, output_pattern) if __name__ == '__main__': @@ -268,33 +278,34 @@ def mp_calculate_gross_emissions(sensit_type, tile_id_list, emitted_pools, run_d parser.add_argument('--tile_id_list', '-l', required=True, help='List of tile ids to use in the model. Should be of form 00N_110E or 00N_110E,00N_120E or all.') parser.add_argument('--model-type', '-t', required=True, - help='{}'.format(cn.model_type_arg_help)) + help=f'{cn.model_type_arg_help}') parser.add_argument('--run-date', '-d', required=False, help='Date of run. Must be format YYYYMMDD.') parser.add_argument('--no-upload', '-nu', action='store_true', help='Disables uploading of outputs to s3') args = parser.parse_args() - sensit_type = args.model_type + + # Sets global variables to the command line arguments + cn.SENSIT_TYPE = args.model_type + cn.RUN_DATE = args.run_date + cn.NO_UPLOAD = args.no_upload + cn.EMITTED_POOLS = args.emitted_pools_to_use + tile_id_list = args.tile_id_list - emitted_pools = args.emitted_pools_to_use - run_date = args.run_date - no_upload = args.no_upload # Disables upload to s3 if no AWS credentials are found in environment if not uu.check_aws_creds(): - no_upload = True + cn.NO_UPLOAD = True # Create the output log - uu.initiate_log(tile_id_list=tile_id_list, sensit_type=sensit_type, run_date=run_date, - emitted_pools=emitted_pools, no_upload=no_upload) + uu.initiate_log(tile_id_list) # Checks whether the sensitivity analysis and tile_id_list arguments are valid - uu.check_sensit_type(sensit_type) + uu.check_sensit_type(cn.SENSIT_TYPE) if 's3://' in tile_id_list: tile_id_list = uu.tile_list_s3(tile_id_list, 'std') else: tile_id_list = uu.tile_id_list_check(tile_id_list) - mp_calculate_gross_emissions(sensit_type=sensit_type, tile_id_list=tile_id_list, emitted_pools=emitted_pools, - run_date=run_date, no_upload=no_upload) + mp_calculate_gross_emissions(tile_id_list, cn.EMITTED_POOLS) diff --git a/removals/US_removal_rates.py b/removals/US_removal_rates.py index 116f2bb5..1eed68e9 100644 --- a/removals/US_removal_rates.py +++ b/removals/US_removal_rates.py @@ -18,7 +18,7 @@ def US_removal_rate_calc(tile_id, gain_table_group_region_age_dict, gain_table_g start = datetime.datetime.now() # Names of the input tiles - gain = '{0}_{1}.tif'.format(cn.pattern_gain, tile_id) + gain = f'{cn.pattern_gain}_{tile_id}.tif' US_age_cat = '{0}_{1}.tif'.format(tile_id, cn.pattern_age_cat_natrl_forest_US) US_forest_group = '{0}_{1}.tif'.format(tile_id, cn.pattern_FIA_forest_group_processed) US_region = '{0}_{1}.tif'.format(tile_id, cn.pattern_FIA_regions_processed) @@ -51,7 +51,7 @@ def US_removal_rate_calc(tile_id, gain_table_group_region_age_dict, gain_table_g agc_bgc_stdev_dst = rasterio.open('{0}_{1}.tif'.format(tile_id, output_pattern_list[1]), 'w', **kwargs) # Adds metadata tags to the output rasters - uu.add_rasterio_tags(agc_bgc_rate_dst, 'std') + uu.add_universal_metadata_rasterio(agc_bgc_rate_dst) agc_bgc_rate_dst.update_tags( units='megagrams aboveground+belowground carbon/ha/yr') agc_bgc_rate_dst.update_tags( @@ -59,7 +59,7 @@ def US_removal_rate_calc(tile_id, gain_table_group_region_age_dict, gain_table_g agc_bgc_rate_dst.update_tags( extent='Continental USA. Applies to pixels for which an FIA region, FIA forest group, and Pan et al. forest age category are available or interpolated.') - uu.add_rasterio_tags(agc_bgc_stdev_dst, 'std') + uu.add_universal_metadata_rasterio(agc_bgc_stdev_dst) agc_bgc_stdev_dst.update_tags( units='standard deviation of removal factor, in megagrams aboveground+belowground carbon/ha/yr') agc_bgc_stdev_dst.update_tags( diff --git a/removals/annual_gain_rate_AGC_BGC_all_forest_types.py b/removals/annual_gain_rate_AGC_BGC_all_forest_types.py index 88702be4..71274a88 100644 --- a/removals/annual_gain_rate_AGC_BGC_all_forest_types.py +++ b/removals/annual_gain_rate_AGC_BGC_all_forest_types.py @@ -1,46 +1,56 @@ +""" +Function to create removal factor tiles with all removal factor sources combined +""" + import datetime import numpy as np -import os import rasterio -import logging import sys + sys.path.append('../') import constants_and_names as cn import universal_util as uu -def annual_gain_rate_AGC_BGC_all_forest_types(tile_id, output_pattern_list, sensit_type, no_upload): +def annual_gain_rate_AGC_BGC_all_forest_types(tile_id, output_pattern_list): + """ + :param tile_id: tile to be processed, identified by its tile id + :param output_pattern_list: patterns for output tile names + :return: 5 tiles: removal factor source, aboveground rate, belowground rate, aboveground+belowground rate, + standard deviation for aboveground rate (all removal factor sources combined) + Units: Mg carbon/ha/yr (including for standard deviation tiles) + """ - uu.print_log("Mapping removal rate source and AGB and BGB removal rates:", tile_id) + uu.print_log(f'Mapping removal rate source and AGB and BGB removal rates: {tile_id}') # Start time start = datetime.datetime.now() # Names of the input tiles # Removal factors - model_extent = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_model_extent) - mangrove_AGB = '{0}_{1}.tif'.format(tile_id, cn.pattern_annual_gain_AGB_mangrove) - mangrove_BGB = '{0}_{1}.tif'.format(tile_id, cn.pattern_annual_gain_BGB_mangrove) - europe_AGC_BGC = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_annual_gain_AGC_BGC_natrl_forest_Europe) - plantations_AGC_BGC = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_annual_gain_AGC_BGC_planted_forest_unmasked) - us_AGC_BGC = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_annual_gain_AGC_BGC_natrl_forest_US) - young_AGC = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_annual_gain_AGC_natrl_forest_young) - age_category = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_age_cat_IPCC) - ipcc_AGB_default = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_annual_gain_AGB_IPCC_defaults) + model_extent = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_model_extent) + mangrove_AGB = f'{tile_id}_{cn.pattern_annual_gain_AGB_mangrove}.tif' + mangrove_BGB = f'{tile_id}_{cn.pattern_annual_gain_BGB_mangrove}.tif' + europe_AGC_BGC = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_annual_gain_AGC_BGC_natrl_forest_Europe) + plantations_AGC_BGC = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_annual_gain_AGC_BGC_planted_forest_unmasked) + us_AGC_BGC = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_annual_gain_AGC_BGC_natrl_forest_US) + young_AGC = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_annual_gain_AGC_natrl_forest_young) + age_category = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_age_cat_IPCC) + ipcc_AGB_default = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_annual_gain_AGB_IPCC_defaults) # Removal factor standard deviations - mangrove_AGB_stdev = '{0}_{1}.tif'.format(tile_id, cn.pattern_stdev_annual_gain_AGB_mangrove) - europe_AGC_BGC_stdev = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_Europe) - plantations_AGC_BGC_stdev = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_stdev_annual_gain_AGC_BGC_planted_forest_unmasked) - us_AGC_BGC_stdev = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_US) - young_AGC_stdev = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_stdev_annual_gain_AGC_natrl_forest_young) - ipcc_AGB_default_stdev = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_stdev_annual_gain_AGB_IPCC_defaults) + mangrove_AGB_stdev = f'{tile_id}_{cn.pattern_stdev_annual_gain_AGB_mangrove}.tif' + europe_AGC_BGC_stdev = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_Europe) + plantations_AGC_BGC_stdev = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_stdev_annual_gain_AGC_BGC_planted_forest_unmasked) + us_AGC_BGC_stdev = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_US) + young_AGC_stdev = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_stdev_annual_gain_AGC_natrl_forest_young) + ipcc_AGB_default_stdev = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_stdev_annual_gain_AGB_IPCC_defaults) # Names of the output tiles - removal_forest_type = '{0}_{1}.tif'.format(tile_id, output_pattern_list[0]) - annual_gain_AGC_all_forest_types = '{0}_{1}.tif'.format(tile_id, output_pattern_list[1]) - annual_gain_BGC_all_forest_types = '{0}_{1}.tif'.format(tile_id, output_pattern_list[2]) - annual_gain_AGC_BGC_all_forest_types = '{0}_{1}.tif'.format(tile_id, output_pattern_list[3]) # Not used further in the model. Created just for reference. - stdev_annual_gain_AGC_all_forest_types = '{0}_{1}.tif'.format(tile_id, output_pattern_list[4]) + removal_forest_type = f'{tile_id}_{output_pattern_list[0]}.tif' + annual_gain_AGC_all_forest_types = f'{tile_id}_{output_pattern_list[1]}.tif' + annual_gain_BGC_all_forest_types = f'{tile_id}_{output_pattern_list[2]}.tif' + annual_gain_AGC_BGC_all_forest_types = f'{tile_id}_{output_pattern_list[3]}.tif' # Not used further in the model. Created just for reference. + stdev_annual_gain_AGC_all_forest_types = f'{tile_id}_{output_pattern_list[4]}.tif' # Opens biomass tile with rasterio.open(model_extent) as model_extent_src: @@ -64,56 +74,56 @@ def annual_gain_rate_AGC_BGC_all_forest_types(tile_id, output_pattern_list, sens mangrove_AGB_src = rasterio.open(mangrove_AGB) mangrove_BGB_src = rasterio.open(mangrove_BGB) mangrove_AGB_stdev_src = rasterio.open(mangrove_AGB_stdev) - uu.print_log(" Mangrove tiles (AGB and BGB) for {}".format(tile_id)) - except: - uu.print_log(" No mangrove tile for {}".format(tile_id)) + uu.print_log(f' Mangrove tiles (AGB and BGB) for {tile_id}') + except rasterio.errors.RasterioIOError: + uu.print_log(f' No mangrove tile for {tile_id}') try: europe_AGC_BGC_src = rasterio.open(europe_AGC_BGC) europe_AGC_BGC_stdev_src = rasterio.open(europe_AGC_BGC_stdev) - uu.print_log(" Europe removal factor tile for {}".format(tile_id)) - except: - uu.print_log(" No Europe removal factor tile for {}".format(tile_id)) + uu.print_log(f' Europe removal factor tile for {tile_id}') + except rasterio.errors.RasterioIOError: + uu.print_log(f' No Europe removal factor tile for {tile_id}') try: plantations_AGC_BGC_src = rasterio.open(plantations_AGC_BGC) plantations_AGC_BGC_stdev_src = rasterio.open(plantations_AGC_BGC_stdev) - uu.print_log(" Planted forest tile for {}".format(tile_id)) - except: - uu.print_log(" No planted forest tile for {}".format(tile_id)) + uu.print_log(f' Planted forest tile for {tile_id}') + except rasterio.errors.RasterioIOError: + uu.print_log(f' No planted forest tile for {tile_id}') try: us_AGC_BGC_src = rasterio.open(us_AGC_BGC) us_AGC_BGC_stdev_src = rasterio.open(us_AGC_BGC_stdev) - uu.print_log(" US removal factor tile for {}".format(tile_id)) - except: - uu.print_log(" No US removal factor tile for {}".format(tile_id)) + uu.print_log(f' US removal factor tile for {tile_id}') + except rasterio.errors.RasterioIOError: + uu.print_log(f' No US removal factor tile for {tile_id}') try: young_AGC_src = rasterio.open(young_AGC) young_AGC_stdev_src = rasterio.open(young_AGC_stdev) - uu.print_log(" Young forest removal factor tile for {}".format(tile_id)) - except: - uu.print_log(" No young forest removal factor tile for {}".format(tile_id)) + uu.print_log(f' Young forest removal factor tile for {tile_id}') + except rasterio.errors.RasterioIOError: + uu.print_log(f' No young forest removal factor tile for {tile_id}') try: age_category_src = rasterio.open(age_category) - uu.print_log(" Age category tile for {}".format(tile_id)) - except: - uu.print_log(" No age category tile for {}".format(tile_id)) + uu.print_log(f' Age category tile for {tile_id}') + except rasterio.errors.RasterioIOError: + uu.print_log(f' No age category tile for {tile_id}') try: ipcc_AGB_default_src = rasterio.open(ipcc_AGB_default) ipcc_AGB_default_stdev_src = rasterio.open(ipcc_AGB_default_stdev) - uu.print_log(" IPCC default removal rate tile for {}".format(tile_id)) - except: - uu.print_log(" No IPCC default removal rate tile for {}".format(tile_id)) + uu.print_log(f' IPCC default removal rate tile for {tile_id}') + except rasterio.errors.RasterioIOError: + uu.print_log(f'" No IPCC default removal rate tile for {tile_id}') # Opens the output tile, giving it the arguments of the input tiles removal_forest_type_dst = rasterio.open(removal_forest_type, 'w', **kwargs) # Adds metadata tags to the output raster - uu.add_rasterio_tags(removal_forest_type_dst, sensit_type) + uu.add_universal_metadata_rasterio(removal_forest_type_dst) removal_forest_type_dst.update_tags( key='6: mangroves. 5: European-specific rates. 4: planted forests. 3: US-specific rates. 2: young (<20 year) secondary forests. 1: old (>20 year) secondary forests and primary forests. Priority goes to the highest number.') removal_forest_type_dst.update_tags( @@ -130,7 +140,7 @@ def annual_gain_rate_AGC_BGC_all_forest_types(tile_id, output_pattern_list, sens stdev_annual_gain_AGC_all_forest_types_dst = rasterio.open(stdev_annual_gain_AGC_all_forest_types, 'w', **kwargs) # Adds metadata tags to the output raster - uu.add_rasterio_tags(annual_gain_AGC_all_forest_types_dst, sensit_type) + uu.add_universal_metadata_rasterio(annual_gain_AGC_all_forest_types_dst) annual_gain_AGC_all_forest_types_dst.update_tags( units='megagrams aboveground carbon/ha/yr') annual_gain_AGC_all_forest_types_dst.update_tags( @@ -139,7 +149,7 @@ def annual_gain_rate_AGC_BGC_all_forest_types(tile_id, output_pattern_list, sens extent='Full model extent') # Adds metadata tags to the output raster - uu.add_rasterio_tags(annual_gain_BGC_all_forest_types_dst, sensit_type) + uu.add_universal_metadata_rasterio(annual_gain_BGC_all_forest_types_dst) annual_gain_BGC_all_forest_types_dst.update_tags( units='megagrams belowground carbon/ha/yr') annual_gain_BGC_all_forest_types_dst.update_tags( @@ -148,7 +158,7 @@ def annual_gain_rate_AGC_BGC_all_forest_types(tile_id, output_pattern_list, sens extent='Full model extent') # Adds metadata tags to the output raster - uu.add_rasterio_tags(annual_gain_AGC_BGC_all_forest_types_dst, sensit_type) + uu.add_universal_metadata_rasterio(annual_gain_AGC_BGC_all_forest_types_dst) annual_gain_AGC_BGC_all_forest_types_dst.update_tags( units='megagrams aboveground + belowground carbon/ha/yr') annual_gain_AGC_BGC_all_forest_types_dst.update_tags( @@ -157,7 +167,7 @@ def annual_gain_rate_AGC_BGC_all_forest_types(tile_id, output_pattern_list, sens extent='Full model extent') # Adds metadata tags to the output raster - uu.add_rasterio_tags(stdev_annual_gain_AGC_all_forest_types_dst, sensit_type) + uu.add_universal_metadata_rasterio(stdev_annual_gain_AGC_all_forest_types_dst) stdev_annual_gain_AGC_all_forest_types_dst.update_tags( units='standard deviation for removal factor, in terms of megagrams aboveground carbon/ha/yr') stdev_annual_gain_AGC_all_forest_types_dst.update_tags( @@ -165,7 +175,7 @@ def annual_gain_rate_AGC_BGC_all_forest_types(tile_id, output_pattern_list, sens stdev_annual_gain_AGC_all_forest_types_dst.update_tags( extent='Full model extent') - uu.print_log(" Creating removal model forest type tile, AGC removal factor tile, BGC removal factor tile, and AGC removal factor standard deviation tile for {}".format(tile_id)) + uu.print_log(f' Creating removal model forest type tile, AGC removal factor tile, BGC removal factor tile, and AGC removal factor standard deviation tile for {tile_id}') uu.check_memory() @@ -182,7 +192,7 @@ def annual_gain_rate_AGC_BGC_all_forest_types(tile_id, output_pattern_list, sens try: age_category_window = age_category_src.read(1, window=window) - except: + except UnboundLocalError: age_category_window = np.zeros((window.height, window.width), dtype='uint8') # Lowest priority @@ -195,7 +205,7 @@ def annual_gain_rate_AGC_BGC_all_forest_types(tile_id, output_pattern_list, sens # that don't have rates under this sensitivity analysis to still be included in the model. # Unfortunately, model_extent is slightly different from the IPCC rate extent (no IPCC rates where # there is no ecozone information), but this is a very small difference and not worth worrying about. - if sensit_type == 'no_primary_gain': + if cn.SENSIT_TYPE == 'no_primary_gain': removal_forest_type_window = np.where(model_extent_window != 0, cn.old_natural_rank, removal_forest_type_window).astype('uint8') @@ -212,7 +222,7 @@ def annual_gain_rate_AGC_BGC_all_forest_types(tile_id, output_pattern_list, sens stdev_annual_gain_AGC_all_forest_types_window = np.where(ipcc_AGB_default_stdev_window != 0, ipcc_AGB_default_stdev_window * cn.biomass_to_c_non_mangrove, stdev_annual_gain_AGC_all_forest_types_window).astype('float32') - except: + except UnboundLocalError: pass try: # young_AGC_rate_window uses > because of the weird NaN in the tiles. If != is used, the young rate NaN overwrites the IPCC arrays @@ -234,10 +244,10 @@ def annual_gain_rate_AGC_BGC_all_forest_types(tile_id, output_pattern_list, sens young_AGC_stdev_window, stdev_annual_gain_AGC_all_forest_types_window).astype('float32') - except: + except UnboundLocalError: pass - if sensit_type != 'US_removals': + if cn.SENSIT_TYPE != 'US_removals': try: us_AGC_BGC_rate_window = us_AGC_BGC_src.read(1, window=window) us_AGC_BGC_stdev_window = us_AGC_BGC_stdev_src.read(1, window=window) @@ -252,7 +262,7 @@ def annual_gain_rate_AGC_BGC_all_forest_types(tile_id, output_pattern_list, sens stdev_annual_gain_AGC_all_forest_types_window = np.where(us_AGC_BGC_stdev_window != 0, us_AGC_BGC_stdev_window / (1 + cn.below_to_above_non_mang), stdev_annual_gain_AGC_all_forest_types_window).astype('float32') - except: + except UnboundLocalError: pass try: @@ -269,7 +279,7 @@ def annual_gain_rate_AGC_BGC_all_forest_types(tile_id, output_pattern_list, sens stdev_annual_gain_AGC_all_forest_types_window = np.where(plantations_AGC_BGC_stdev_window != 0, plantations_AGC_BGC_stdev_window / (1 + cn.below_to_above_non_mang), stdev_annual_gain_AGC_all_forest_types_window).astype('float32') - except: + except UnboundLocalError: pass try: @@ -289,7 +299,7 @@ def annual_gain_rate_AGC_BGC_all_forest_types(tile_id, output_pattern_list, sens stdev_annual_gain_AGC_all_forest_types_window = np.where(europe_AGC_BGC_stdev_window != 0, (europe_AGC_BGC_stdev_window/2) / (1 + cn.below_to_above_non_mang), stdev_annual_gain_AGC_all_forest_types_window).astype('float32') - except: + except UnboundLocalError: pass # Highest priority @@ -307,7 +317,7 @@ def annual_gain_rate_AGC_BGC_all_forest_types(tile_id, output_pattern_list, sens stdev_annual_gain_AGC_all_forest_types_window = np.where(mangroves_AGB_stdev_window != 0, mangroves_AGB_stdev_window * cn.biomass_to_c_mangrove, stdev_annual_gain_AGC_all_forest_types_window).astype('float32') - except: + except UnboundLocalError: pass # Masks outputs to model output extent @@ -325,4 +335,4 @@ def annual_gain_rate_AGC_BGC_all_forest_types(tile_id, output_pattern_list, sens stdev_annual_gain_AGC_all_forest_types_dst.write_band(1, stdev_annual_gain_AGC_all_forest_types_window, window=window) # Prints information about the tile that was just processed - uu.end_of_fx_summary(start, tile_id, cn.pattern_removal_forest_type, no_upload) \ No newline at end of file + uu.end_of_fx_summary(start, tile_id, cn.pattern_removal_forest_type) diff --git a/removals/annual_gain_rate_IPCC_defaults.py b/removals/annual_gain_rate_IPCC_defaults.py index 58676f67..99f7e3b4 100644 --- a/removals/annual_gain_rate_IPCC_defaults.py +++ b/removals/annual_gain_rate_IPCC_defaults.py @@ -1,8 +1,12 @@ +""" +Function to create removal factor tiles according to IPCC defaults +""" + import datetime import numpy as np import rasterio -import os import sys + sys.path.append('../') import constants_and_names as cn import universal_util as uu @@ -10,7 +14,15 @@ # Necessary to suppress a pandas error later on. https://github.com/numpy/numpy/issues/12987 np.set_printoptions(threshold=sys.maxsize) -def annual_gain_rate(tile_id, sensit_type, gain_table_dict, stdev_table_dict, output_pattern_list, no_upload): +def annual_gain_rate(tile_id, gain_table_dict, stdev_table_dict, output_pattern_list): + """ + :param tile_id: tile to be processed, identified by its tile id + :param gain_table_dict: dictionary of removal factors by continent, ecozone, and age + :param stdev_table_dict: dictionary of standard deviations for removal factors by continent, ecozone, and age + :param output_pattern_list: patterns for output tile names + :return: 3 tiles: aboveground rate, belowground rate, standard deviation for aboveground rate (IPCC rates) + Units: Mg biomass/ha/yr (including for standard deviation tiles) + """ # Converts the forest age category decision tree output values to the three age categories-- # 10000: primary forest; 20000: secondary forest > 20 years; 30000: secondary forest <= 20 years @@ -19,32 +31,32 @@ def annual_gain_rate(tile_id, sensit_type, gain_table_dict, stdev_table_dict, ou # The key in the dictionary is the forest age category decision tree endpoints. age_dict = {0: 0, 1: 10000, 2: 20000, 3: 30000} - uu.print_log("Creating IPCC default biomass removals rates and standard deviation for {}".format(tile_id)) + uu.print_log(f'Creating IPCC default biomass removals rates and standard deviation for {tile_id}') # Start time start = datetime.datetime.now() # Names of the forest age category and continent-ecozone tiles - age_cat = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_age_cat_IPCC) - cont_eco = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_cont_eco_processed) + age_cat = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_age_cat_IPCC) + cont_eco = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_cont_eco_processed) # Names of the output natural forest removals rate tiles (above and belowground) - AGB_IPCC_default_gain_rate = '{0}_{1}.tif'.format(tile_id, output_pattern_list[0]) - BGB_IPCC_default_gain_rate = '{0}_{1}.tif'.format(tile_id, output_pattern_list[1]) - AGB_IPCC_default_gain_stdev = '{0}_{1}.tif'.format(tile_id, output_pattern_list[2]) + AGB_IPCC_default_gain_rate = f'{tile_id}_{output_pattern_list[0]}.tif' + BGB_IPCC_default_gain_rate = f'{tile_id}_{output_pattern_list[1]}.tif' + AGB_IPCC_default_gain_stdev = f'{tile_id}_{output_pattern_list[2]}.tif' # Opens the input tiles if they exist. kips tile if either input doesn't exist. try: age_cat_src = rasterio.open(age_cat) - uu.print_log(" Age category tile found for {}".format(tile_id)) - except: - return uu.print_log(" No age category tile found for {}. Skipping tile.".format(tile_id)) + uu.print_log(f' Age category tile found for {tile_id}') + except rasterio.errors.RasterioIOError: + return uu.print_log(f' No age category tile found for {tile_id}. Skipping tile.') try: cont_eco_src = rasterio.open(cont_eco) - uu.print_log(" Continent-ecozone tile found for {}".format(tile_id)) - except: - return uu.print_log(" No continent-ecozone tile found for {}. Skipping tile.".format(tile_id)) + uu.print_log(f' Continent-ecozone tile found for {tile_id}') + except rasterio.errors.RasterioIOError: + return uu.print_log(f' No continent-ecozone tile found for {tile_id}. Skipping tile.') # Grabs metadata about the continent ecozone tile, like its location/projection/cellsize kwargs = cont_eco_src.meta @@ -65,7 +77,7 @@ def annual_gain_rate(tile_id, sensit_type, gain_table_dict, stdev_table_dict, ou # The output files, aboveground and belowground biomass removals rates dst_above = rasterio.open(AGB_IPCC_default_gain_rate, 'w', **kwargs) # Adds metadata tags to the output raster - uu.add_rasterio_tags(dst_above, sensit_type) + uu.add_universal_metadata_rasterio(dst_above) dst_above.update_tags( units='megagrams aboveground biomass (AGB or dry matter)/ha/yr') dst_above.update_tags( @@ -75,7 +87,7 @@ def annual_gain_rate(tile_id, sensit_type, gain_table_dict, stdev_table_dict, ou dst_below = rasterio.open(BGB_IPCC_default_gain_rate, 'w', **kwargs) # Adds metadata tags to the output raster - uu.add_rasterio_tags(dst_below, sensit_type) + uu.add_universal_metadata_rasterio(dst_below) dst_below.update_tags( units='megagrams belowground biomass (AGB or dry matter)/ha/yr') dst_below.update_tags( @@ -85,7 +97,7 @@ def annual_gain_rate(tile_id, sensit_type, gain_table_dict, stdev_table_dict, ou dst_stdev_above = rasterio.open(AGB_IPCC_default_gain_stdev, 'w', **kwargs) # Adds metadata tags to the output raster - uu.add_rasterio_tags(dst_stdev_above, sensit_type) + uu.add_universal_metadata_rasterio(dst_stdev_above) dst_stdev_above.update_tags( units='standard deviation, in terms of megagrams aboveground biomass (AGB or dry matter)/ha/yr') dst_stdev_above.update_tags( @@ -101,12 +113,12 @@ def annual_gain_rate(tile_id, sensit_type, gain_table_dict, stdev_table_dict, ou # Creates a processing window for each input raster try: cont_eco_window = cont_eco_src.read(1, window=window) - except: + except UnboundLocalError: cont_eco_window = np.zeros((window.height, window.width), dtype='uint8') try: age_cat_window = age_cat_src.read(1, window=window) - except: + except UnboundLocalError: age_cat_window = np.zeros((window.height, window.width), dtype='uint8') # Recodes the input forest age category array with 10 different decision tree end values into the 3 actual age categories @@ -147,4 +159,4 @@ def annual_gain_rate(tile_id, sensit_type, gain_table_dict, stdev_table_dict, ou dst_stdev_above.write_band(1, gain_stdev_AGB, window=window) # Prints information about the tile that was just processed - uu.end_of_fx_summary(start, tile_id, output_pattern_list[0], no_upload) + uu.end_of_fx_summary(start, tile_id, output_pattern_list[0]) diff --git a/removals/annual_gain_rate_mangrove.py b/removals/annual_gain_rate_mangrove.py index 306ba6e4..851fe7a2 100644 --- a/removals/annual_gain_rate_mangrove.py +++ b/removals/annual_gain_rate_mangrove.py @@ -14,7 +14,7 @@ # Necessary to suppress a pandas error later on. https://github.com/numpy/numpy/issues/12987 np.set_printoptions(threshold=sys.maxsize) -def annual_gain_rate(tile_id, sensit_type, output_pattern_list, gain_above_dict, gain_below_dict, stdev_dict): +def annual_gain_rate(tile_id, output_pattern_list, gain_above_dict, gain_below_dict, stdev_dict): uu.print_log("Processing:", tile_id) @@ -29,8 +29,8 @@ def annual_gain_rate(tile_id, sensit_type, output_pattern_list, gain_above_dict, return # Name of the input files - mangrove_biomass = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_mangrove_biomass_2000) - cont_eco = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_cont_eco_processed) + mangrove_biomass = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_mangrove_biomass_2000) + cont_eco = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_cont_eco_processed) # Names of the output aboveground and belowground mangrove removals rate tiles AGB_gain_rate = '{0}_{1}.tif'.format(tile_id, output_pattern_list[0]) @@ -60,7 +60,7 @@ def annual_gain_rate(tile_id, sensit_type, output_pattern_list, gain_above_dict, dst_above = rasterio.open(AGB_gain_rate, 'w', **kwargs) # Adds metadata tags to the output raster - uu.add_rasterio_tags(dst_above, sensit_type) + uu.add_universal_metadata_rasterio(dst_above) dst_above.update_tags( units='megagrams aboveground biomass (AGB or dry matter)/ha/yr') dst_above.update_tags( @@ -70,7 +70,7 @@ def annual_gain_rate(tile_id, sensit_type, output_pattern_list, gain_above_dict, dst_below = rasterio.open(BGB_gain_rate, 'w', **kwargs) # Adds metadata tags to the output raster - uu.add_rasterio_tags(dst_below, sensit_type) + uu.add_universal_metadata_rasterio(dst_below) dst_below.update_tags( units='megagrams belowground biomass (BGB or dry matter)/ha/yr') dst_below.update_tags( @@ -80,7 +80,7 @@ def annual_gain_rate(tile_id, sensit_type, output_pattern_list, gain_above_dict, dst_stdev_above = rasterio.open(AGB_gain_stdev, 'w', **kwargs) # Adds metadata tags to the output raster - uu.add_rasterio_tags(dst_stdev_above, sensit_type) + uu.add_universal_metadata_rasterio(dst_stdev_above) dst_stdev_above.update_tags( units='standard deviation, in terms of megagrams aboveground biomass (AGB or dry matter)/ha/yr') dst_stdev_above.update_tags( diff --git a/removals/forest_age_category_IPCC.py b/removals/forest_age_category_IPCC.py index df4a40e0..468b86c4 100644 --- a/removals/forest_age_category_IPCC.py +++ b/removals/forest_age_category_IPCC.py @@ -1,14 +1,22 @@ +""" +Function to create forest age category tiles +""" + import datetime import numpy as np -import os import rasterio -import logging import sys sys.path.append('../') import constants_and_names as cn import universal_util as uu -def forest_age_category(tile_id, gain_table_dict, pattern, sensit_type, no_upload): +def forest_age_category(tile_id, gain_table_dict, pattern): + """ + :param tile_id: tile to be processed, identified by its tile id + :param gain_table_dict: dictionary of removal factors by continent, ecozone, and forest age category + :param pattern: pattern for output tile names + :return: tile denoting three broad forest age categories: 1- young (<20), 2- middle, 3- old/primary + """ uu.print_log("Assigning forest age categories:", tile_id) @@ -26,30 +34,30 @@ def forest_age_category(tile_id, gain_table_dict, pattern, sensit_type, no_uploa tropics = 1 - uu.print_log(" Tile {} in tropics:".format(tile_id), tropics) + uu.print_log(f' Tile {tile_id} in tropics: {tropics}') - # Names of the input tiles - gain = '{0}_{1}.tif'.format(cn.pattern_gain, tile_id) - model_extent = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_model_extent) - ifl_primary = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_ifl_primary) - cont_eco = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_cont_eco_processed) + # Names of the input tilRes + gain = f'{cn.pattern_gain}_{tile_id}.tif' + model_extent = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_model_extent) + ifl_primary = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_ifl_primary) + cont_eco = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_cont_eco_processed) # Biomass tile name depends on the sensitivity analysis - if sensit_type == 'biomass_swap': - biomass = '{0}_{1}.tif'.format(tile_id, cn.pattern_JPL_unmasked_processed) - uu.print_log("Using JPL biomass tile for {} sensitivity analysis".format(sensit_type)) + if cn.SENSIT_TYPE == 'biomass_swap': + biomass = f'{tile_id}_{cn.pattern_JPL_unmasked_processed}.tif' + uu.print_log(f'Using JPL biomass tile for {cn.SENSIT_TYPE} sensitivity analysis') else: - biomass = '{0}_{1}.tif'.format(tile_id, cn.pattern_WHRC_biomass_2000_unmasked) - uu.print_log("Using WHRC biomass tile for {} sensitivity analysis".format(sensit_type)) - - if sensit_type == 'legal_Amazon_loss': - loss = '{0}_{1}.tif'.format(tile_id, cn.pattern_Brazil_annual_loss_processed) - uu.print_log("Using PRODES loss tile {0} for {1} sensitivity analysis".format(tile_id, sensit_type)) - elif sensit_type == 'Mekong_loss': - loss = '{0}_{1}.tif'.format(tile_id, cn.pattern_Mekong_loss_processed) + biomass = f'{tile_id}_{cn.pattern_WHRC_biomass_2000_unmasked}.tif' + uu.print_log(f'Using WHRC biomass tile for {cn.SENSIT_TYPE} sensitivity analysis') + + if cn.SENSIT_TYPE == 'legal_Amazon_loss': + loss = f'{tile_id}_{cn.pattern_Brazil_annual_loss_processed}.tif' + uu.print_log(f'Using PRODES loss tile {tile_id} for {cn.SENSIT_TYPE} sensitivity analysis') + elif cn.SENSIT_TYPE == 'Mekong_loss': + loss = f'{tile_id}_{cn.pattern_Mekong_loss_processed}.tif' else: - loss = '{0}_{1}.tif'.format(cn.pattern_loss, tile_id) - uu.print_log("Using Hansen loss tile {0} for {1} model run".format(tile_id, sensit_type)) + loss = f'{cn.pattern_loss}_{tile_id}.tif' + uu.print_log(f'Using Hansen loss tile {tile_id} for {cn.SENSIT_TYPE} model run') # Opens biomass tile with rasterio.open(model_extent) as model_extent_src: @@ -63,33 +71,33 @@ def forest_age_category(tile_id, gain_table_dict, pattern, sensit_type, no_uploa # Opens the input tiles if they exist try: cont_eco_src = rasterio.open(cont_eco) - uu.print_log(" Continent-ecozone tile found for {}".format(tile_id)) - except: - uu.print_log(" No continent-ecozone tile found for {}".format(tile_id)) + uu.print_log(f' Continent-ecozone tile found for {tile_id}') + except rasterio.errors.RasterioIOError: + uu.print_log(f' No continent-ecozone tile found for {tile_id}') try: gain_src = rasterio.open(gain) - uu.print_log(" Gain tile found for {}".format(tile_id)) - except: - uu.print_log(" No gain tile found for {}".format(tile_id)) + uu.print_log(f' Gain tile found for {tile_id}') + except rasterio.errors.RasterioIOError: + uu.print_log(f' No gain tile found for {tile_id}') try: biomass_src = rasterio.open(biomass) - uu.print_log(" Biomass tile found for {}".format(tile_id)) - except: - uu.print_log(" No biomass tile found for {}".format(tile_id)) + uu.print_log(f' Biomass tile found for {tile_id}') + except rasterio.errors.RasterioIOError: + uu.print_log(f' No biomass tile found for {tile_id}') try: loss_src = rasterio.open(loss) - uu.print_log(" Loss tile found for {}".format(tile_id)) - except: - uu.print_log(" No loss tile found for {}".format(tile_id)) + uu.print_log(f' Loss tile found for {tile_id}') + except rasterio.errors.RasterioIOError: + uu.print_log(f' No loss tile found for {tile_id}') try: ifl_primary_src = rasterio.open(ifl_primary) - uu.print_log(" IFL-primary forest tile found for {}".format(tile_id)) - except: - uu.print_log(" No IFL-primary forest tile found for {}".format(tile_id)) + uu.print_log(f' IFL-primary forest tile found for {tile_id}') + except rasterio.errors.RasterioIOError: + uu.print_log(f' No IFL-primary forest tile found for {tile_id}') # Updates kwargs for the output dataset kwargs.update( @@ -100,10 +108,10 @@ def forest_age_category(tile_id, gain_table_dict, pattern, sensit_type, no_uploa ) # Opens the output tile, giving it the arguments of the input tiles - dst = rasterio.open('{0}_{1}.tif'.format(tile_id, pattern), 'w', **kwargs) + dst = rasterio.open(f'{tile_id}_{pattern}.tif', 'w', **kwargs) # Adds metadata tags to the output raster - uu.add_rasterio_tags(dst, sensit_type) + uu.add_universal_metadata_rasterio(dst) dst.update_tags( key='1: young (<20 year) secondary forest; 2: old (>20 year) secondary forest; 3: primary forest or IFL') dst.update_tags( @@ -111,8 +119,7 @@ def forest_age_category(tile_id, gain_table_dict, pattern, sensit_type, no_uploa dst.update_tags( extent='Full model extent, even though these age categories will not be used over the full model extent. They apply to just the rates from IPCC defaults.') - - uu.print_log(" Assigning IPCC age categories for", tile_id) + uu.print_log(f' Assigning IPCC age categories for {tile_id}') uu.check_memory() @@ -124,27 +131,27 @@ def forest_age_category(tile_id, gain_table_dict, pattern, sensit_type, no_uploa try: loss_window = loss_src.read(1, window=window) - except: + except UnboundLocalError: loss_window = np.zeros((window.height, window.width), dtype='uint8') try: gain_window = gain_src.read(1, window=window) - except: + except UnboundLocalError: gain_window = np.zeros((window.height, window.width), dtype='uint8') try: cont_eco_window = cont_eco_src.read(1, window=window) - except: + except UnboundLocalError: cont_eco_window = np.zeros((window.height, window.width), dtype='uint8') try: biomass_window = biomass_src.read(1, window=window) - except: + except UnboundLocalError: biomass_window = np.zeros((window.height, window.width), dtype='float32') try: ifl_primary_window = ifl_primary_src.read(1, window=window) - except: + except UnboundLocalError: ifl_primary_window = np.zeros((window.height, window.width), dtype='uint8') # Creates a numpy array that has the <=20 year secondary forest growth rate x 20 @@ -162,7 +169,7 @@ def forest_age_category(tile_id, gain_table_dict, pattern, sensit_type, no_uploa # For every model version except legal_Amazon_loss sensitivity analysis, which has its own rules about age assignment - if sensit_type != 'legal_Amazon_loss': + if cn.SENSIT_TYPE != 'legal_Amazon_loss': # No change pixels- no loss or gain if tropics == 0: @@ -208,4 +215,4 @@ def forest_age_category(tile_id, gain_table_dict, pattern, sensit_type, no_uploa dst.write_band(1, dst_data, window=window) # Prints information about the tile that was just processed - uu.end_of_fx_summary(start, tile_id, pattern, no_upload) \ No newline at end of file + uu.end_of_fx_summary(start, tile_id, pattern) diff --git a/removals/gain_year_count_all_forest_types.py b/removals/gain_year_count_all_forest_types.py index 847cbf4d..c85f8475 100644 --- a/removals/gain_year_count_all_forest_types.py +++ b/removals/gain_year_count_all_forest_types.py @@ -1,34 +1,46 @@ -from subprocess import Popen, PIPE, STDOUT, check_call +""" +Functions to create tiles with the number of years of carbon accumulation +""" + import datetime -import rasterio import numpy as np +import rasterio import os import sys + sys.path.append('../') import constants_and_names as cn import universal_util as uu -# Gets the names of the input tiles -def tile_names(tile_id, sensit_type): +def tile_names(tile_id): + """ + Gets the names of the input tiles + :param tile_id: tile to be processed, identified by its tile id + :return: names of input tiles + """ # Names of the loss, gain, and model extent tiles - if sensit_type == 'legal_Amazon_loss': - loss = '{0}_{1}.tif'.format(tile_id, cn.pattern_Brazil_annual_loss_processed) + if cn.SENSIT_TYPE == 'legal_Amazon_loss': + loss = f'{tile_id}_{cn.pattern_Brazil_annual_loss_processed}.tif' else: - loss = '{0}_{1}.tif'.format(cn.pattern_loss, tile_id) - gain = '{0}_{1}.tif'.format(cn.pattern_gain, tile_id) - model_extent = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_model_extent) + loss = f'{cn.pattern_loss}_{tile_id}.tif' + gain = f'{cn.pattern_gain}_{tile_id}.tif' + model_extent = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_model_extent) return loss, gain, model_extent -# Creates gain year count tiles for pixels that only had loss -def create_gain_year_count_loss_only(tile_id, sensit_type, no_upload): +def create_gain_year_count_loss_only(tile_id): + """ + Creates gain year count tiles for pixels that only had loss + :param tile_id: tile to be processed, identified by its tile id + :return: tile with number of years of carbon accumulation in pixels that only had tree cover loss + """ - uu.print_log("Gain year count for loss only pixels:", tile_id) + uu.print_log(f'Gain year count for loss only pixels: {tile_id}') # Names of the loss, gain and tree cover density tiles - loss, gain, model_extent = tile_names(tile_id, sensit_type) + loss, gain, model_extent = tile_names(tile_id) # start time start = datetime.datetime.now() @@ -36,27 +48,31 @@ def create_gain_year_count_loss_only(tile_id, sensit_type, no_upload): uu.check_memory() if os.path.exists(loss): - uu.print_log(" Loss tile found for {}. Using it in loss only pixel gain year count.".format(tile_id)) + uu.print_log(f' Loss tile found for {tile_id}. Using it in loss only pixel gain year count.') loss_calc = '--calc=(A>0)*(B==0)*(C>0)*(A-1)' - loss_outfilename = '{}_growth_years_loss_only.tif'.format(tile_id) - loss_outfilearg = '--outfile={}'.format(loss_outfilename) + loss_outfilename = f'{tile_id}_gain_year_count_loss_only.tif' + loss_outfilearg = f'--outfile={loss_outfilename}' cmd = ['gdal_calc.py', '-A', loss, '-B', gain, '-C', model_extent, loss_calc, loss_outfilearg, '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=DEFLATE', '--type', 'Byte', '--quiet'] uu.log_subprocess_output_full(cmd) else: - uu.print_log("No loss tile found for {}. Skipping loss only pixel gain year count.".format(tile_id)) + uu.print_log(f'No loss tile found for {tile_id}. Skipping loss only pixel gain year count.') # Prints information about the tile that was just processed - uu.end_of_fx_summary(start, tile_id, 'growth_years_loss_only', no_upload) + uu.end_of_fx_summary(start, tile_id, 'gain_year_count_loss_only') -# Creates gain year count tiles for pixels that only had gain -def create_gain_year_count_gain_only_standard(tile_id, sensit_type, no_upload): +def create_gain_year_count_gain_only_standard(tile_id): + """ + Creates gain year count tiles for pixels that only had gain (standard model only) + :param tile_id: tile to be processed, identified by its tile id + :return: tile with number of years of carbon accumulation in pixels that only had tree cover gain + """ - uu.print_log("Gain year count for gain only pixels using standard function:", tile_id) + uu.print_log(f'Gain year count for gain only pixels using standard function: {tile_id}') # Names of the loss, gain and tree cover density tiles - loss, gain, model_extent = tile_names(tile_id, sensit_type) + loss, gain, model_extent = tile_names(tile_id) # start time start = datetime.datetime.now() @@ -65,33 +81,37 @@ def create_gain_year_count_gain_only_standard(tile_id, sensit_type, no_upload): # Need to check if loss tile exists because the calc string is depends on the presene/absence of the loss tile if os.path.exists(loss): - uu.print_log(" Loss tile found for {}. Using it in gain only pixel gain year count.".format(tile_id)) - gain_calc = '--calc=(A==0)*(B==1)*(C>0)*({}/2)'.format(cn.gain_years) - gain_outfilename = '{}_growth_years_gain_only.tif'.format(tile_id) - gain_outfilearg = '--outfile={}'.format(gain_outfilename) + uu.print_log(f' Loss tile found for {tile_id}. Using it in gain only pixel gain year count.') + gain_calc = f'--calc=(A==0)*(B==1)*(C>0)*({cn.gain_years}/2)' + gain_outfilename = f'{tile_id}_gain_year_count_gain_only.tif' + gain_outfilearg = f'--outfile={gain_outfilename}' cmd = ['gdal_calc.py', '-A', loss, '-B', gain, '-C', model_extent, gain_calc, gain_outfilearg, '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=DEFLATE', '--type', 'Byte', '--quiet'] uu.log_subprocess_output_full(cmd) else: - uu.print_log(" No loss tile found for {}. Not using it for gain only pixel gain year count.".format(tile_id)) - gain_calc = '--calc=(A==1)*(B>0)*({}/2)'.format(cn.gain_years) - gain_outfilename = '{}_growth_years_gain_only.tif'.format(tile_id) - gain_outfilearg = '--outfile={}'.format(gain_outfilename) + uu.print_log(f' No loss tile found for {tile_id}. Not using it for gain only pixel gain year count.') + gain_calc = f'--calc=(A==1)*(B>0)*({cn.gain_years}/2)' + gain_outfilename = f'{tile_id}_gain_year_count_gain_only.tif' + gain_outfilearg = f'--outfile={gain_outfilename}' cmd = ['gdal_calc.py', '-A', gain, '-B', model_extent, gain_calc, gain_outfilearg, '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=DEFLATE', '--type', 'Byte', '--quiet'] uu.log_subprocess_output_full(cmd) # Prints information about the tile that was just processed - uu.end_of_fx_summary(start, tile_id, 'growth_years_gain_only', no_upload) + uu.end_of_fx_summary(start, tile_id, 'gain_year_count_gain_only') -# Creates gain year count tiles for pixels that only had gain -def create_gain_year_count_gain_only_maxgain(tile_id, sensit_type, no_upload): +def create_gain_year_count_gain_only_maxgain(tile_id): + """ + Creates gain year count tiles for pixels that only had gain (maximum gain year sensitivity analysis only) + :param tile_id: tile to be processed, identified by its tile id + :return: tile with number of years of carbon accumulation in pixels that only had tree cover gain + """ - uu.print_log("Gain year count for gain only pixels using maxgain function:", tile_id) + uu.print_log(f'Gain year count for gain only pixels using maxgain function: {tile_id}') # Names of the loss, gain and tree cover density tiles - loss, gain, model_extent = tile_names(tile_id, sensit_type) + loss, gain, model_extent = tile_names(tile_id) # start time start = datetime.datetime.now() @@ -99,34 +119,38 @@ def create_gain_year_count_gain_only_maxgain(tile_id, sensit_type, no_upload): uu.check_memory() if os.path.exists(loss): - uu.print_log(" Loss tile found for {}. Using it in gain only pixel gain year count.".format(tile_id)) - gain_calc = '--calc=(A==0)*(B==1)*(C>0)*({})'.format(cn.loss_years) - gain_outfilename = '{}_growth_years_gain_only.tif'.format(tile_id) - gain_outfilearg = '--outfile={}'.format(gain_outfilename) + uu.print_log(f' Loss tile found for {tile_id}. Using it in gain only pixel gain year count.') + gain_calc = f'--calc=(A==0)*(B==1)*(C>0)*({cn.loss_years})' + gain_outfilename = f'{tile_id}_gain_year_count_gain_only.tif' + gain_outfilearg = f'--outfile={gain_outfilename}' cmd = ['gdal_calc.py', '-A', loss, '-B', gain, '-C', model_extent, gain_calc, gain_outfilearg, '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=DEFLATE', '--type', 'Byte', '--quiet'] uu.log_subprocess_output_full(cmd) else: - uu.print_log(" No loss tile found for {}. Not using loss for gain only pixel gain year count.".format(tile_id)) - gain_calc = '--calc=(A==1)*(B>0)*({})'.format(cn.loss_years) - gain_outfilename = '{}_growth_years_gain_only.tif'.format(tile_id) - gain_outfilearg = '--outfile={}'.format(gain_outfilename) + uu.print_log(f' No loss tile found for {tile_id}. Not using loss for gain only pixel gain year count.') + gain_calc = f'--calc=(A==1)*(B>0)*({cn.loss_years})' + gain_outfilename = f'{tile_id}_gain_year_count_gain_only.tif' + gain_outfilearg = f'--outfile={gain_outfilename}' cmd = ['gdal_calc.py', '-A', gain, '-B', model_extent, gain_calc, gain_outfilearg, '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=DEFLATE', '--type', 'Byte', '--quiet'] uu.log_subprocess_output_full(cmd) # Prints information about the tile that was just processed - uu.end_of_fx_summary(start, tile_id, 'growth_years_gain_only', no_upload) + uu.end_of_fx_summary(start, tile_id, 'gain_year_count_gain_only') -# Creates gain year count tiles for pixels that had neither loss not gain. -# For all models except legal_Amazon_loss. -def create_gain_year_count_no_change_standard(tile_id, sensit_type, no_upload): +def create_gain_year_count_no_change_standard(tile_id): + """ + Creates gain year count tiles for pixels that had neither loss not gain. + For all models except legal_Amazon_loss. + :param tile_id: tile to be processed, identified by its tile id + :return: tile with number of years of carbon accumulation in pixels that had neither loss nor gain + """ uu.print_log("Gain year count for pixels with neither loss nor gain:", tile_id) # Names of the loss, gain and tree cover density tiles - loss, gain, model_extent = tile_names(tile_id, sensit_type) + loss, gain, model_extent = tile_names(tile_id) # start time start = datetime.datetime.now() @@ -134,34 +158,38 @@ def create_gain_year_count_no_change_standard(tile_id, sensit_type, no_upload): uu.check_memory() if os.path.exists(loss): - uu.print_log(" Loss tile found for {}. Using it in no change pixel gain year count.".format(tile_id)) - no_change_calc = '--calc=(A==0)*(B==0)*(C>0)*{}'.format(cn.loss_years) - no_change_outfilename = '{}_growth_years_no_change.tif'.format(tile_id) - no_change_outfilearg = '--outfile={}'.format(no_change_outfilename) + uu.print_log(f' Loss tile found for {tile_id}. Using it in no change pixel gain year count.') + no_change_calc = f'--calc=(A==0)*(B==0)*(C>0)*{cn.loss_years}' + no_change_outfilename = f'{tile_id}_gain_year_count_no_change.tif' + no_change_outfilearg = f'--outfile={no_change_outfilename}' cmd = ['gdal_calc.py', '-A', loss, '-B', gain, '-C', model_extent, no_change_calc, no_change_outfilearg, '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=DEFLATE', '--type', 'Byte', '--quiet'] uu.log_subprocess_output_full(cmd) else: - uu.print_log(" No loss tile found for {}. Not using it for no change pixel gain year count.".format(tile_id)) - no_change_calc = '--calc=(A==0)*(B>0)*{}'.format(cn.loss_years) - no_change_outfilename = '{}_growth_years_no_change.tif'.format(tile_id) - no_change_outfilearg = '--outfile={}'.format(no_change_outfilename) + uu.print_log(f' No loss tile found for {tile_id}. Not using it for no change pixel gain year count.') + no_change_calc = f'--calc=(A==0)*(B>0)*{cn.loss_years}' + no_change_outfilename = f'{tile_id}_gain_year_count_no_change.tif' + no_change_outfilearg = f'--outfile={no_change_outfilename}' cmd = ['gdal_calc.py', '-A', gain, '-B', model_extent, no_change_calc, no_change_outfilearg, '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=DEFLATE', '--type', 'Byte', '--quiet'] uu.log_subprocess_output_full(cmd) # Prints information about the tile that was just processed - uu.end_of_fx_summary(start, tile_id, 'growth_years_no_change', no_upload) + uu.end_of_fx_summary(start, tile_id, 'gain_year_count_no_change') -# Creates gain year count tiles for pixels that did not have loss (doesn't matter if they had gain or not). -# For legal_Amazon_loss sensitivity analysis. -def create_gain_year_count_no_change_legal_Amazon_loss(tile_id, sensit_type, no_upload): +def create_gain_year_count_no_change_legal_Amazon_loss(tile_id): + """ + Creates gain year count tiles for pixels that did not have loss (doesn't matter if they had gain or not) + For legal_Amazon_loss sensitivity analysis. + :param tile_id: tile to be processed, identified by its tile id + :return: tile with number of years of carbon accumulation in pixels that did not have loss + """ - uu.print_log("Gain year count for pixels without loss for legal_Amazon_loss:", tile_id) + uu.print_log(f'Gain year count for pixels without loss for legal_Amazon_loss: {tile_id}') # Names of the loss, gain and tree cover density tiles - loss, gain, model_extent = tile_names(tile_id, sensit_type) + loss, gain, model_extent = tile_names(tile_id) # start time start = datetime.datetime.now() @@ -171,29 +199,33 @@ def create_gain_year_count_no_change_legal_Amazon_loss(tile_id, sensit_type, no_ # For unclear reasons, gdal_calc doesn't register the 0 (NoData) pixels in the loss tile, so I have to convert it # to a vrt so that the 0 pixels are recognized. # This was the case with PRODES loss in model v.1.1.2. - loss_vrt = '{}_loss.vrt'.format(tile_id) - os.system('gdalbuildvrt -vrtnodata None {0} {1}'.format(loss_vrt, loss)) + loss_vrt = f'{tile_id}_loss.vrt' + os.system(f'gdalbuildvrt -vrtnodata None {loss_vrt} {loss}') - no_change_calc = '--calc=(A==0)*(B>0)*{}'.format(cn.loss_years) - no_change_outfilename = '{}_growth_years_no_change.tif'.format(tile_id) - no_change_outfilearg = '--outfile={}'.format(no_change_outfilename) + no_change_calc = f'--calc=(A==0)*(B>0)*{cn.loss_years}' + no_change_outfilename = f'{tile_id}_gain_year_count_no_change.tif' + no_change_outfilearg = f'--outfile={no_change_outfilename}' cmd = ['gdal_calc.py', '-A', loss_vrt, '-B', model_extent, no_change_calc, no_change_outfilearg, '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=DEFLATE', '--type', 'Byte', '--quiet'] uu.log_subprocess_output_full(cmd) - + os.remove(loss_vrt) # Prints information about the tile that was just processed - uu.end_of_fx_summary(start, tile_id, 'growth_years_no_change', no_upload) + uu.end_of_fx_summary(start, tile_id, 'gain_year_count_no_change') -# Creates gain year count tiles for pixels that had both loss and gain -def create_gain_year_count_loss_and_gain_standard(tile_id, sensit_type, no_upload): +def create_gain_year_count_loss_and_gain_standard(tile_id): + """ + Creates gain year count tiles for pixels that had both loss and gain (standard model only) + :param tile_id: tile to be processed, identified by its tile id + :return: tile with number of years of carbon accumulation in pixels that had both loss and gain + """ - uu.print_log("Loss and gain pixel processing using standard function:", tile_id) + uu.print_log(f'Loss and gain pixel processing using standard function: {tile_id}') # Names of the loss, gain and tree cover density tiles - loss, gain, model_extent = tile_names(tile_id, sensit_type) + loss, gain, model_extent = tile_names(tile_id) # start time start = datetime.datetime.now() @@ -201,28 +233,32 @@ def create_gain_year_count_loss_and_gain_standard(tile_id, sensit_type, no_uploa uu.check_memory() if os.path.exists(loss): - uu.print_log(" Loss tile found for {}. Using it in loss and gain pixel gain year count.".format(tile_id)) - loss_and_gain_calc = '--calc=((A>0)*(B==1)*(C>0)*((A-1)+floor(({}+1-A)/2)))'.format(cn.loss_years) - loss_and_gain_outfilename = '{}_growth_years_loss_and_gain.tif'.format(tile_id) - loss_and_gain_outfilearg = '--outfile={}'.format(loss_and_gain_outfilename) + uu.print_log(f' Loss tile found for {tile_id}. Using it in loss and gain pixel gain year count.') + loss_and_gain_calc = f'--calc=((A>0)*(B==1)*(C>0)*((A-1)+floor(({cn.loss_years}+1-A)/2)))' + loss_and_gain_outfilename = f'{tile_id}_gain_year_count_loss_and_gain.tif' + loss_and_gain_outfilearg = f'--outfile={loss_and_gain_outfilename}' cmd = ['gdal_calc.py', '-A', loss, '-B', gain, '-C', model_extent, loss_and_gain_calc, loss_and_gain_outfilearg, '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=DEFLATE', '--type', 'Byte', '--quiet'] uu.log_subprocess_output_full(cmd) else: - uu.print_log(" No loss tile found for {}. Skipping loss and gain pixel gain year count.".format(tile_id)) + uu.print_log(f' No loss tile found for {tile_id}. Skipping loss and gain pixel gain year count.') # Prints information about the tile that was just processed - uu.end_of_fx_summary(start, tile_id, 'growth_years_loss_and_gain', no_upload) + uu.end_of_fx_summary(start, tile_id, 'gain_year_count_loss_and_gain') -# Creates gain year count tiles for pixels that had both loss and gain -def create_gain_year_count_loss_and_gain_maxgain(tile_id, sensit_type, no_upload): +def create_gain_year_count_loss_and_gain_maxgain(tile_id): + """ + Creates gain year count tiles for pixels that had both loss and gain (maxgain sensitivity model only) + :param tile_id: tile to be processed, identified by its tile id + :return: tile with number of years of carbon accumulation in pixels that had both loss and gain + """ - uu.print_log("Loss and gain pixel processing using maxgain function:", tile_id) + uu.print_log(f'Loss and gain pixel processing using maxgain function: {tile_id}') # Names of the loss, gain and tree cover density tiles - loss, gain, model_extent = tile_names(tile_id, sensit_type) + loss, gain, model_extent = tile_names(tile_id) # start time start = datetime.datetime.now() @@ -230,36 +266,41 @@ def create_gain_year_count_loss_and_gain_maxgain(tile_id, sensit_type, no_upload uu.check_memory() if os.path.exists(loss): - uu.print_log(" Loss tile found for {}. Using it in loss and gain pixel gain year count".format(tile_id)) - loss_and_gain_calc = '--calc=((A>0)*(B==1)*(C>0)*({}-1))'.format(cn.loss_years) - loss_and_gain_outfilename = '{}_growth_years_loss_and_gain.tif'.format(tile_id) - loss_and_gain_outfilearg = '--outfile={}'.format(loss_and_gain_outfilename) + uu.print_log(f' Loss tile found for {tile_id}. Using it in loss and gain pixel gain year count') + loss_and_gain_calc = f'--calc=((A>0)*(B==1)*(C>0)*({cn.loss_years}-1))' + loss_and_gain_outfilename = f'{tile_id}_gain_year_count_loss_and_gain.tif' + loss_and_gain_outfilearg = f'--outfile={loss_and_gain_outfilename}' cmd = ['gdal_calc.py', '-A', loss, '-B', gain, '-C', model_extent, loss_and_gain_calc, loss_and_gain_outfilearg, '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=DEFLATE', '--type', 'Byte', '--quiet'] uu.log_subprocess_output_full(cmd) else: - uu.print_log(" No loss tile found for {}. Skipping loss and gain pixel gain year count.".format(tile_id)) + uu.print_log(f' No loss tile found for {tile_id}. Skipping loss and gain pixel gain year count.') # Prints information about the tile that was just processed - uu.end_of_fx_summary(start, tile_id, 'growth_years_loss_and_gain', no_upload) + uu.end_of_fx_summary(start, tile_id, 'gain_year_count_loss_and_gain') -# Merges the four gain year count tiles above to create a single gain year count tile -def create_gain_year_count_merge(tile_id, pattern, sensit_type, no_upload): +def create_gain_year_count_merge(tile_id, pattern): + """ + Merges the four gain year count tiles above to create a single gain year count tile + :param tile_id: tile to be processed, identified by its tile id + :param pattern: pattern for output tile names + :return: tile with number of years of carbon accumulation in all pixels + """ - uu.print_log("Merging loss, gain, no change, and loss/gain pixels into single gain year count raster for {}".format(tile_id)) + uu.print_log(f'Merging loss, gain, no change, and loss/gain pixels into single gain year count raster for {tile_id}') # start time start = datetime.datetime.now() # The four rasters from above that are to be merged - no_change_gain_years = '{}_growth_years_no_change.tif'.format(tile_id) - loss_only_gain_years = '{}_growth_years_loss_only.tif'.format(tile_id) - gain_only_gain_years = '{}_growth_years_gain_only.tif'.format(tile_id) - loss_and_gain_gain_years = '{}_growth_years_loss_and_gain.tif'.format(tile_id) + no_change_gain_years = f'{tile_id}_gain_year_count_no_change.tif' + loss_only_gain_years = f'{tile_id}_gain_year_count_loss_only.tif' + gain_only_gain_years = f'{tile_id}_gain_year_count_gain_only.tif' + loss_and_gain_gain_years = f'{tile_id}_gain_year_count_loss_and_gain.tif' # Names of the output tiles - gain_year_count_merged = '{0}_{1}.tif'.format(tile_id, pattern) + gain_year_count_merged = f'{tile_id}_{pattern}.tif' # Opens no change gain year count tile. This should exist for all tiles. with rasterio.open(no_change_gain_years) as no_change_gain_years_src: @@ -278,32 +319,32 @@ def create_gain_year_count_merge(tile_id, pattern, sensit_type, no_upload): nodata=0 ) - uu.print_log(" No change tile exists for {} by default".format(tile_id)) + uu.print_log(f' No change tile exists for {tile_id} by default') # Opens the other gain year count tiles. They may not exist for all other tiles. try: loss_only_gain_years_src = rasterio.open(loss_only_gain_years) - uu.print_log(" Loss only tile found for {}".format(tile_id)) - except: - uu.print_log(" No loss only tile found for {}".format(tile_id)) + uu.print_log(f' Loss only tile found for {tile_id}') + except rasterio.errors.RasterioIOError: + uu.print_log(f' No loss only tile found for {tile_id}') try: gain_only_gain_years_src = rasterio.open(gain_only_gain_years) - uu.print_log(" Gain only tile found for {}".format(tile_id)) - except: - uu.print_log(" No gain only tile found for {}".format(tile_id)) + uu.print_log(f' Gain only tile found for {tile_id}') + except rasterio.errors.RasterioIOError: + uu.print_log(f' No gain only tile found for {tile_id}') try: loss_and_gain_gain_years_src = rasterio.open(loss_and_gain_gain_years) - uu.print_log(" Loss and gain tile found for {}".format(tile_id)) - except: - uu.print_log(" No loss and gain tile found for {}".format(tile_id)) + uu.print_log(f' Loss and gain tile found for {tile_id}') + except rasterio.errors.RasterioIOError: + uu.print_log(f' No loss and gain tile found for {tile_id}') # Opens the output tile, giving it the arguments of the input tiles gain_year_count_merged_dst = rasterio.open(gain_year_count_merged, 'w', **kwargs) # Adds metadata tags to the output raster - uu.add_rasterio_tags(gain_year_count_merged_dst, sensit_type) + uu.add_universal_metadata_rasterio(gain_year_count_merged_dst) gain_year_count_merged_dst.update_tags( units='years') gain_year_count_merged_dst.update_tags( @@ -324,17 +365,17 @@ def create_gain_year_count_merge(tile_id, pattern, sensit_type, no_upload): try: loss_only_gain_years_window = loss_only_gain_years_src.read(1, window=window) - except: + except UnboundLocalError: loss_only_gain_years_window = np.zeros((window.height, window.width), dtype='uint8') try: gain_only_gain_years_window = gain_only_gain_years_src.read(1, window=window) - except: + except UnboundLocalError: gain_only_gain_years_window = np.zeros((window.height, window.width), dtype='uint8') try: loss_and_gain_gain_years_window = loss_and_gain_gain_years_src.read(1, window=window) - except: + except UnboundLocalError: loss_and_gain_gain_years_window = np.zeros((window.height, window.width), dtype='uint8') @@ -344,4 +385,4 @@ def create_gain_year_count_merge(tile_id, pattern, sensit_type, no_upload): gain_year_count_merged_dst.write_band(1, gain_year_count_merged_window, window=window) # Prints information about the tile that was just processed - uu.end_of_fx_summary(start, tile_id, pattern, no_upload) \ No newline at end of file + uu.end_of_fx_summary(start, tile_id, pattern) diff --git a/removals/gross_removals_all_forest_types.py b/removals/gross_removals_all_forest_types.py index 2c0b3eff..ecc279b6 100644 --- a/removals/gross_removals_all_forest_types.py +++ b/removals/gross_removals_all_forest_types.py @@ -1,48 +1,55 @@ +""" +Function to create gross removals tiles +""" + import datetime import rasterio -from subprocess import Popen, PIPE, STDOUT, check_call + import sys sys.path.append('../') import constants_and_names as cn import universal_util as uu -# Calculates cumulative aboveground carbon dioxide removals in mangroves -def gross_removals_all_forest_types(tile_id, output_pattern_list, sensit_type, no_upload): +def gross_removals_all_forest_types(tile_id, output_pattern_list): + """ + Calculates cumulative aboveground carbon dioxide removals in mangroves + :param tile_id: tile to be processed, identified by its tile id + :param output_pattern_list: pattern for output tile names + :return: 3 tiles: gross aboveground removals, belowground removals, aboveground+belowground removals + Units: Mg CO2/ha over entire model period. + """ - uu.print_log("Calculating cumulative CO2 removals:", tile_id) + uu.print_log(f'Calculating cumulative CO2 removals: {tile_id}') # Start time start = datetime.datetime.now() # Names of the input tiles, modified according to sensitivity analysis - gain_rate_AGC = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_annual_gain_AGC_all_types) - gain_rate_BGC = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_annual_gain_BGC_all_types) - gain_year_count = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_gain_year_count) + gain_rate_AGC = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_annual_gain_AGC_all_types) + gain_rate_BGC = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_annual_gain_BGC_all_types) + gain_year_count = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_gain_year_count) # Names of the output removal tiles - cumulative_gain_AGCO2 = '{0}_{1}.tif'.format(tile_id, output_pattern_list[0]) - cumulative_gain_BGCO2 = '{0}_{1}.tif'.format(tile_id, output_pattern_list[1]) - cumulative_gain_AGCO2_BGCO2 = '{0}_{1}.tif'.format(tile_id, output_pattern_list[2]) + cumulative_gain_AGCO2 = f'{tile_id}_{output_pattern_list[0]}.tif' + cumulative_gain_BGCO2 = f'{tile_id}_{output_pattern_list[1]}.tif' + cumulative_gain_AGCO2_BGCO2 = f'{tile_id}_{output_pattern_list[2]}.tif' # Opens the input tiles if they exist. If one of the inputs doesn't exist, try: gain_rate_AGC_src = rasterio.open(gain_rate_AGC) - uu.print_log(" Aboveground removal factor tile found for", tile_id) - except: - uu.print_log(" No aboveground removal factor tile found for {}. Not creating gross removals.".format(tile_id)) - return + uu.print_log(f' Aboveground removal factor tile found for {tile_id}') + except rasterio.errors.RasterioIOError: + uu.print_log(f' No aboveground removal factor tile found for {tile_id}. Not creating gross removals.') try: gain_rate_BGC_src = rasterio.open(gain_rate_BGC) - uu.print_log(" Belowground removal factor tile found for", tile_id) - except: - uu.print_log(" No belowground removal factor tile found for {}. Not creating gross removals.".format(tile_id)) - return + uu.print_log(f' Belowground removal factor tile found for {tile_id}') + except rasterio.errors.RasterioIOError: + uu.print_log(f' No belowground removal factor tile found for {tile_id}. Not creating gross removals.') try: gain_year_count_src = rasterio.open(gain_year_count) - uu.print_log(" Gain year count tile found for", tile_id) - except: - uu.print_log(" No gain year count tile found for {}. Not creating gross removals.".format(tile_id)) - return + uu.print_log(f' Gain year count tile found for {tile_id}') + except rasterio.errors.RasterioIOError: + uu.print_log(f' No gain year count tile found for {tile_id}. Not creating gross removals.') # Grabs metadata for an input tile @@ -61,7 +68,7 @@ def gross_removals_all_forest_types(tile_id, output_pattern_list, sensit_type, n # The output files: aboveground gross removals, belowground gross removals, above+belowground gross removals. Adds metadata tags cumulative_gain_AGCO2_dst = rasterio.open(cumulative_gain_AGCO2, 'w', **kwargs) - uu.add_rasterio_tags(cumulative_gain_AGCO2_dst, sensit_type) + uu.add_universal_metadata_rasterio(cumulative_gain_AGCO2_dst) cumulative_gain_AGCO2_dst.update_tags( units='megagrams aboveground CO2/ha over entire model period') cumulative_gain_AGCO2_dst.update_tags( @@ -70,7 +77,7 @@ def gross_removals_all_forest_types(tile_id, output_pattern_list, sensit_type, n extent='Full model extent') cumulative_gain_BGCO2_dst = rasterio.open(cumulative_gain_BGCO2, 'w', **kwargs) - uu.add_rasterio_tags(cumulative_gain_BGCO2_dst, sensit_type) + uu.add_universal_metadata_rasterio(cumulative_gain_BGCO2_dst) cumulative_gain_BGCO2_dst.update_tags( units='megagrams belowground CO2/ha over entire model period') cumulative_gain_BGCO2_dst.update_tags( @@ -108,4 +115,4 @@ def gross_removals_all_forest_types(tile_id, output_pattern_list, sensit_type, n # Prints information about the tile that was just processed - uu.end_of_fx_summary(start, tile_id, output_pattern_list[0], no_upload) + uu.end_of_fx_summary(start, tile_id, output_pattern_list[0]) diff --git a/removals/mp_US_removal_rates.py b/removals/mp_US_removal_rates.py index 4c445da0..0af26e2e 100644 --- a/removals/mp_US_removal_rates.py +++ b/removals/mp_US_removal_rates.py @@ -50,7 +50,7 @@ import constants_and_names as cn import universal_util as uu -def mp_US_removal_rates(sensit_type, tile_id_list, run_date): +def mp_US_removal_rates(tile_id_list): os.chdir(cn.docker_base_dir) @@ -59,7 +59,7 @@ def mp_US_removal_rates(sensit_type, tile_id_list, run_date): tile_id_list = uu.tile_list_s3(cn.FIA_regions_processed_dir) uu.print_log(tile_id_list) - uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n") + uu.print_log(f'There are {str(len(tile_id_list))} tiles to process', "\n") # Files to download for this script download_dict = {cn.gain_dir: [cn.pattern_gain], @@ -77,19 +77,19 @@ def mp_US_removal_rates(sensit_type, tile_id_list, run_date): for key, values in download_dict.items(): dir = key pattern = values[0] - uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list) + uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, cn.SENSIT_TYPE, tile_id_list) # If the model run isn't the standard one, the output directory and file names are changed - if sensit_type != 'std': - uu.print_log("Changing output directory and file name pattern based on sensitivity analysis") - output_dir_list = uu.alter_dirs(sensit_type, output_dir_list) - output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list) + if cn.SENSIT_TYPE != 'std': + uu.print_log('Changing output directory and file name pattern based on sensitivity analysis') + output_dir_list = uu.alter_dirs(cn.SENSIT_TYPE, output_dir_list) + output_pattern_list = uu.alter_patterns(cn.SENSIT_TYPE, output_pattern_list) # A date can optionally be provided by the full model script or a run of this script. # This replaces the date in constants_and_names. - if run_date is not None: - output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date) + if cn.RUN_DATE is not None: + output_dir_list = uu.replace_output_dir_date(output_dir_list, cn.RUN_DATE) # Table with US-specific removal rates @@ -216,25 +216,31 @@ def mp_US_removal_rates(sensit_type, tile_id_list, run_date): parser = argparse.ArgumentParser( description='Create tiles of removal factors for the US using US rates') parser.add_argument('--model-type', '-t', required=True, - help='{}'.format(cn.model_type_arg_help)) + help=f'{cn.model_type_arg_help}') parser.add_argument('--tile_id_list', '-l', required=True, help='List of tile ids to use in the model. Should be of form 00N_110E or 00N_110E,00N_120E or all.') parser.add_argument('--run-date', '-d', required=False, help='Date of run. Must be format YYYYMMDD.') + parser.add_argument('--no-upload', '-nu', action='store_true', + help='Disables uploading of outputs to s3') args = parser.parse_args() - sensit_type = args.model_type + + # Sets global variables to the command line arguments + cn.SENSIT_TYPE = args.model_type + cn.RUN_DATE = args.run_date + cn.NO_UPLOAD = args.no_upload + tile_id_list = args.tile_id_list - run_date = args.run_date # Disables upload to s3 if no AWS credentials are found in environment if not uu.check_aws_creds(): - no_upload = True + cn.NO_UPLOAD = True # Create the output log - uu.initiate_log(tile_id_list=tile_id_list, sensit_type=sensit_type, run_date=run_date) + uu.initiate_log(tile_id_list) # Checks whether the sensitivity analysis and tile_id_list arguments are valid - uu.check_sensit_type(sensit_type) + uu.check_sensit_type(cn.SENSIT_TYPE) tile_id_list = uu.tile_id_list_check(tile_id_list) - mp_US_removal_rates(sensit_type=sensit_type, tile_id_list=tile_id_list, run_date=run_date) \ No newline at end of file + mp_US_removal_rates(tile_id_list) \ No newline at end of file diff --git a/removals/mp_annual_gain_rate_AGC_BGC_all_forest_types.py b/removals/mp_annual_gain_rate_AGC_BGC_all_forest_types.py index 55378085..a5a8229d 100644 --- a/removals/mp_annual_gain_rate_AGC_BGC_all_forest_types.py +++ b/removals/mp_annual_gain_rate_AGC_BGC_all_forest_types.py @@ -1,4 +1,4 @@ -''' +""" Creates tiles of annual aboveground and belowground removal rates for the entire model extent (all forest types). Also, creates tiles that show what the source of the removal factor is each for each pixel. This can correspond to particular forest types (mangrove, planted, natural) or data sources (US, Europe, young natural forests from Cook-Patton et al., @@ -7,34 +7,39 @@ rates for young secondary forests > IPCC defaults for old secondary and primary forests. This hierarchy is reflected in the removal rates and the forest type rasters. The different removal rate inputs are in different units but all are standardized to AGC/ha/yr and BGC/ha/yr. -''' +""" -import multiprocessing -from functools import partial -import pandas as pd -import datetime import argparse -from subprocess import Popen, PIPE, STDOUT, check_call +from functools import partial +import multiprocessing import os import sys + sys.path.append('../') import constants_and_names as cn import universal_util as uu sys.path.append(os.path.join(cn.docker_app,'removals')) import annual_gain_rate_AGC_BGC_all_forest_types -def mp_annual_gain_rate_AGC_BGC_all_forest_types(sensit_type, tile_id_list, run_date = None, no_upload = None): +def mp_annual_gain_rate_AGC_BGC_all_forest_types(tile_id_list): + """ + :param tile_id_list: list of tile ids to process + :return: 5 sets of tiles with annual removal factors combined from all removal factor sources: + removal forest type, aboveground rate, belowground rate, aboveground+belowground rate, + standard deviation for aboveground rate. + Units: Mg carbon/ha/yr (including for standard deviation tiles) + """ os.chdir(cn.docker_base_dir) # If a full model run is specified, the correct set of tiles for the particular script is listed if tile_id_list == 'all': # List of tiles to run in the model - tile_id_list = uu.tile_list_s3(cn.model_extent_dir, sensit_type) + tile_id_list = uu.tile_list_s3(cn.model_extent_dir, cn.SENSIT_TYPE) uu.print_log(tile_id_list) - uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n") + uu.print_log(f'There are {str(len(tile_id_list))} tiles to process', "\n") # Files to download for this script. @@ -69,69 +74,68 @@ def mp_annual_gain_rate_AGC_BGC_all_forest_types(sensit_type, tile_id_list, run_ # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list for key, values in download_dict.items(): - dir = key + directory = key pattern = values[0] - uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list) + uu.s3_flexible_download(directory, pattern, cn.docker_base_dir, cn.SENSIT_TYPE, tile_id_list) # If the model run isn't the standard one, the output directory and file names are changed - if sensit_type != 'std': - uu.print_log("Changing output directory and file name pattern based on sensitivity analysis") - output_dir_list = uu.alter_dirs(sensit_type, output_dir_list) - output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list) + if cn.SENSIT_TYPE != 'std': + uu.print_log('Changing output directory and file name pattern based on sensitivity analysis') + output_dir_list = uu.alter_dirs(cn.SENSIT_TYPE, output_dir_list) + output_pattern_list = uu.alter_patterns(cn.SENSIT_TYPE, output_pattern_list) # A date can optionally be provided by the full model script or a run of this script. # This replaces the date in constants_and_names. # Only done if output upload is enabled. - if run_date is not None and no_upload is not None: - output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date) + if cn.RUN_DATE is not None and cn.NO_UPLOAD is not None: + output_dir_list = uu.replace_output_dir_date(output_dir_list, cn.RUN_DATE) # This configuration of the multiprocessing call is necessary for passing multiple arguments to the main function # It is based on the example here: http://spencerimp.blogspot.com/2015/12/python-multiprocess-with-multiple.html if cn.count == 96: - if sensit_type == 'biomass_swap': + if cn.SENSIT_TYPE == 'biomass_swap': processes = 13 else: processes = 17 # 30 processors > 740 GB peak; 18 = >740 GB peak; 16 = 660 GB peak; 17 = >680 GB peak else: processes = 2 - uu.print_log('Removal factor processors=', processes) - pool = multiprocessing.Pool(processes) - pool.map(partial(annual_gain_rate_AGC_BGC_all_forest_types.annual_gain_rate_AGC_BGC_all_forest_types, - output_pattern_list=output_pattern_list, sensit_type=sensit_type, no_upload=no_upload), tile_id_list) - pool.close() - pool.join() + uu.print_log(f'Removal factor processors={processes}') + with multiprocessing.Pool(processes) as pool: + pool.map(partial(annual_gain_rate_AGC_BGC_all_forest_types.annual_gain_rate_AGC_BGC_all_forest_types, + output_pattern_list=output_pattern_list), + tile_id_list) + pool.close() + pool.join() # # For single processor use # for tile_id in tile_id_list: - # annual_gain_rate_AGC_BGC_all_forest_types.annual_gain_rate_AGC_BGC_all_forest_types(tile_id, sensit_type, no_upload) + # annual_gain_rate_AGC_BGC_all_forest_types.annual_gain_rate_AGC_BGC_all_forest_types(tile_id) # Checks the gross removals outputs for tiles with no data for output_pattern in output_pattern_list: if cn.count <= 2: # For local tests processes = 1 - uu.print_log("Checking for empty tiles of {0} pattern with {1} processors using light function...".format( - output_pattern, processes)) - pool = multiprocessing.Pool(processes) - pool.map(partial(uu.check_and_delete_if_empty_light, output_pattern=output_pattern), tile_id_list) - pool.close() - pool.join() + uu.print_log(f'Checking for empty tiles of {output_pattern} pattern with {processes} processors using light function...') + with multiprocessing.Pool(processes) as pool: + pool.map(partial(uu.check_and_delete_if_empty_light, output_pattern=output_pattern), tile_id_list) + pool.close() + pool.join() else: processes = 55 # 50 processors = XXX GB peak - uu.print_log( - "Checking for empty tiles of {0} pattern with {1} processors...".format(output_pattern, processes)) - pool = multiprocessing.Pool(processes) - pool.map(partial(uu.check_and_delete_if_empty, output_pattern=output_pattern), tile_id_list) - pool.close() - pool.join() + uu.print_log(f'Checking for empty tiles of {output_pattern} pattern with {processes} processors...') + with multiprocessing.Pool(processes) as pool: + pool.map(partial(uu.check_and_delete_if_empty, output_pattern=output_pattern), tile_id_list) + pool.close() + pool.join() - # If no_upload flag is not activated (by choice or by lack of AWS credentials), output is uploaded - if not no_upload: + # If cn.NO_UPLOAD flag is not activated (by choice or by lack of AWS credentials), output is uploaded + if not cn.NO_UPLOAD: - for i in range(0, len(output_dir_list)): - uu.upload_final_set(output_dir_list[i], output_pattern_list[i]) + for output_dir, output_pattern in zip(output_dir_list, output_pattern_list): + uu.upload_final_set(output_dir, output_pattern) if __name__ == '__main__': @@ -141,7 +145,7 @@ def mp_annual_gain_rate_AGC_BGC_all_forest_types(sensit_type, tile_id_list, run_ parser = argparse.ArgumentParser( description='Create tiles of removal factors for all forest types') parser.add_argument('--model-type', '-t', required=True, - help='{}'.format(cn.model_type_arg_help)) + help=f'{cn.model_type_arg_help}') parser.add_argument('--tile_id_list', '-l', required=True, help='List of tile ids to use in the model. Should be of form 00N_110E or 00N_110E,00N_120E or all.') parser.add_argument('--run-date', '-d', required=False, @@ -149,22 +153,24 @@ def mp_annual_gain_rate_AGC_BGC_all_forest_types(sensit_type, tile_id_list, run_ parser.add_argument('--no-upload', '-nu', action='store_true', help='Disables uploading of outputs to s3') args = parser.parse_args() - sensit_type = args.model_type + + + # Sets global variables to the command line arguments + cn.SENSIT_TYPE = args.model_type + cn.RUN_DATE = args.run_date + cn.NO_UPLOAD = args.no_upload + tile_id_list = args.tile_id_list - run_date = args.run_date - no_upload = args.no_upload # Disables upload to s3 if no AWS credentials are found in environment if not uu.check_aws_creds(): - no_upload = True + cn.NO_UPLOAD = True # Create the output log - uu.initiate_log(tile_id_list=tile_id_list, sensit_type=sensit_type, run_date=run_date, no_upload=no_upload) + uu.initiate_log(tile_id_list) # Checks whether the sensitivity analysis and tile_id_list arguments are valid - uu.check_sensit_type(sensit_type) + uu.check_sensit_type(cn.SENSIT_TYPE) tile_id_list = uu.tile_id_list_check(tile_id_list) - mp_annual_gain_rate_AGC_BGC_all_forest_types(sensit_type=sensit_type, tile_id_list=tile_id_list, - run_date=run_date, no_upload=no_upload) - + mp_annual_gain_rate_AGC_BGC_all_forest_types(tile_id_list) diff --git a/removals/mp_annual_gain_rate_IPCC_defaults.py b/removals/mp_annual_gain_rate_IPCC_defaults.py index dc33b6f6..ce4bef3a 100644 --- a/removals/mp_annual_gain_rate_IPCC_defaults.py +++ b/removals/mp_annual_gain_rate_IPCC_defaults.py @@ -1,6 +1,4 @@ -''' -This script assigns annual aboveground and belowground removal rates for the full model extent according to IPCC Table 4.9 defaults -(in the units of IPCC Table 4.9 (currently tonnes biomass/ha/yr)) to the entire model extent. +""" It also creates assigns aboveground removal rate standard deviations for the full model extent according to IPCC Table 4.9 defaults (in the units of IPCC Table 4.9 (currently tonnes biomass/ha/yr)) to the entire model extent. The standard deviation tiles are used in the uncertainty analysis. @@ -13,16 +11,15 @@ everywhere there's a forest age category, continent, and ecozone. You can think of this as the IPCC default rate that would be applied if no other data were available for that pixel. The belowground removal rates are purely the aboveground removal rates with the above:below ratio applied to them. -''' +""" import multiprocessing from functools import partial import argparse import pandas as pd -import datetime -from subprocess import Popen, PIPE, STDOUT, check_call import os import sys + sys.path.append('../') import constants_and_names as cn import universal_util as uu @@ -31,7 +28,13 @@ os.chdir(cn.docker_base_dir) -def mp_annual_gain_rate_IPCC_defaults(sensit_type, tile_id_list, run_date = None, no_upload = None): +def mp_annual_gain_rate_IPCC_defaults(tile_id_list): + """ + :param tile_id_list: list of tile ids to process + :return: set of tiles with annual removal factors according to IPCC Volume 4 Table 4.9: + aboveground rate, belowground rate, standard deviation for aboveground rate. + Units: Mg biomass/ha/yr (including for standard deviation tiles) + """ os.chdir(cn.docker_base_dir) pd.options.mode.chained_assignment = None @@ -40,10 +43,10 @@ def mp_annual_gain_rate_IPCC_defaults(sensit_type, tile_id_list, run_date = None # If a full model run is specified, the correct set of tiles for the particular script is listed if tile_id_list == 'all': # List of tiles to run in the model - tile_id_list = uu.tile_list_s3(cn.model_extent_dir, sensit_type) + tile_id_list = uu.tile_list_s3(cn.model_extent_dir, cn.SENSIT_TYPE) uu.print_log(tile_id_list) - uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n") + uu.print_log(f'There are {str(len(tile_id_list))} tiles to process', "\n") # Files to download for this script. @@ -59,23 +62,23 @@ def mp_annual_gain_rate_IPCC_defaults(sensit_type, tile_id_list, run_date = None # If the model run isn't the standard one, the output directory and file names are changed - if sensit_type != 'std': - uu.print_log("Changing output directory and file name pattern based on sensitivity analysis") - output_dir_list = uu.alter_dirs(sensit_type, output_dir_list) - output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list) + if cn.SENSIT_TYPE != 'std': + uu.print_log('Changing output directory and file name pattern based on sensitivity analysis') + output_dir_list = uu.alter_dirs(cn.SENSIT_TYPE, output_dir_list) + output_pattern_list = uu.alter_patterns(cn.SENSIT_TYPE, output_pattern_list) # A date can optionally be provided by the full model script or a run of this script. # This replaces the date in constants_and_names. # Only done if output upload is enabled. - if run_date is not None and no_upload is not None: - output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date) + if cn.RUN_DATE is not None and cn.NO_UPLOAD is not None: + output_dir_list = uu.replace_output_dir_date(output_dir_list, cn.RUN_DATE) # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list for key, values in download_dict.items(): - dir = key + directory = key pattern = values[0] - uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list) + uu.s3_flexible_download(directory, pattern, cn.docker_base_dir, cn.SENSIT_TYPE, tile_id_list) # Table with IPCC Table 4.9 default removals rates # cmd = ['aws', 's3', 'cp', os.path.join(cn.gain_spreadsheet_dir, cn.gain_spreadsheet), cn.docker_base_dir, '--no-sign-request'] @@ -86,23 +89,22 @@ def mp_annual_gain_rate_IPCC_defaults(sensit_type, tile_id_list, run_date = None ### To make the removal factor dictionaries # Special removal rate table for no_primary_gain sensitivity analysis: primary forests and IFLs have removal rate of 0 - if sensit_type == 'no_primary_gain': + if cn.SENSIT_TYPE == 'no_primary_gain': # Imports the table with the ecozone-continent codes and the carbon removals rates - gain_table = pd.read_excel("{}".format(cn.gain_spreadsheet), - sheet_name = "natrl fores gain, no_prim_gain") - uu.print_log("Using no_primary_gain IPCC default rates for tile creation") + gain_table = pd.read_excel(cn.gain_spreadsheet, sheet_name = "natrl fores gain, no_prim_gain") + uu.print_log('Using no_primary_gain IPCC default rates for tile creation') # All other analyses use the standard removal rates else: # Imports the table with the ecozone-continent codes and the biomass removals rates - gain_table = pd.read_excel("{}".format(cn.gain_spreadsheet), - sheet_name = "natrl fores gain, for std model") + gain_table = pd.read_excel(cn.gain_spreadsheet, sheet_name = "natrl fores gain, for std model") # Removes rows with duplicate codes (N. and S. America for the same ecozone) gain_table_simplified = gain_table.drop_duplicates(subset='gainEcoCon', keep='first') # Converts removals table from wide to long, so each continent-ecozone-age category has its own row - gain_table_cont_eco_age = pd.melt(gain_table_simplified, id_vars = ['gainEcoCon'], value_vars = ['growth_primary', 'growth_secondary_greater_20', 'growth_secondary_less_20']) + gain_table_cont_eco_age = pd.melt(gain_table_simplified, id_vars = ['gainEcoCon'], + value_vars = ['growth_primary', 'growth_secondary_greater_20', 'growth_secondary_less_20']) gain_table_cont_eco_age = gain_table_cont_eco_age.dropna() # Creates a table that has just the continent-ecozone combinations for adding to the dictionary. @@ -141,17 +143,15 @@ def mp_annual_gain_rate_IPCC_defaults(sensit_type, tile_id_list, run_date = None ### To make the removal factor standard deviation dictionary # Special removal rate table for no_primary_gain sensitivity analysis: primary forests and IFLs have removal rate of 0 - if sensit_type == 'no_primary_gain': + if cn.SENSIT_TYPE == 'no_primary_gain': # Imports the table with the ecozone-continent codes and the carbon removals rates - stdev_table = pd.read_excel("{}".format(cn.gain_spreadsheet), - sheet_name="natrl fores stdv, no_prim_gain") - uu.print_log("Using no_primary_gain IPCC default standard deviations for tile creation") + stdev_table = pd.read_excel(cn.gain_spreadsheet, sheet_name="natrl fores stdv, no_prim_gain") + uu.print_log('Using no_primary_gain IPCC default standard deviations for tile creation') # All other analyses use the standard removal rates else: # Imports the table with the ecozone-continent codes and the biomass removals rate standard deviations - stdev_table = pd.read_excel("{}".format(cn.gain_spreadsheet), - sheet_name="natrl fores stdv, for std model") + stdev_table = pd.read_excel(cn.gain_spreadsheet, sheet_name="natrl fores stdv, for std model") # Removes rows with duplicate codes (N. and S. America for the same ecozone) stdev_table_simplified = stdev_table.drop_duplicates(subset='gainEcoCon', keep='first') @@ -197,32 +197,33 @@ def mp_annual_gain_rate_IPCC_defaults(sensit_type, tile_id_list, run_date = None # This configuration of the multiprocessing call is necessary for passing multiple arguments to the main function # It is based on the example here: http://spencerimp.blogspot.com/2015/12/python-multiprocess-with-multiple.html if cn.count == 96: - if sensit_type == 'biomass_swap': + if cn.SENSIT_TYPE == 'biomass_swap': processes = 24 # 24 processors = 590 GB peak else: processes = 30 # 30 processors = 725 GB peak else: processes = 2 - uu.print_log('Annual removals rate natural forest max processors=', processes) - pool = multiprocessing.Pool(processes) - pool.map(partial(annual_gain_rate_IPCC_defaults.annual_gain_rate, sensit_type=sensit_type, - gain_table_dict=gain_table_dict, stdev_table_dict=stdev_table_dict, - output_pattern_list=output_pattern_list, no_upload=no_upload), tile_id_list) - pool.close() - pool.join() + uu.print_log(f'Annual removals rate natural forest max processors={processes}') + with multiprocessing.Pool(processes) as pool: + pool.map(partial(annual_gain_rate_IPCC_defaults.annual_gain_rate, + gain_table_dict=gain_table_dict, stdev_table_dict=stdev_table_dict, + output_pattern_list=output_pattern_list), + tile_id_list) + pool.close() + pool.join() # # For single processor use # for tile_id in tile_id_list: # - # annual_gain_rate_IPCC_defaults.annual_gain_rate(tile_id, sensit_type, - # gain_table_dict, stdev_table_dict, output_pattern_list, no_upload) + # annual_gain_rate_IPCC_defaults.annual_gain_rate(tile_id, + # gain_table_dict, stdev_table_dict, output_pattern_list) # If no_upload flag is not activated (by choice or by lack of AWS credentials), output is uploaded - if not no_upload: + if not cn.NO_UPLOAD: - for i in range(0, len(output_dir_list)): - uu.upload_final_set(output_dir_list[i], output_pattern_list[i]) + for output_dir, output_pattern in zip(output_dir_list, output_pattern_list): + uu.upload_final_set(output_dir, output_pattern) if __name__ == '__main__': @@ -232,7 +233,7 @@ def mp_annual_gain_rate_IPCC_defaults(sensit_type, tile_id_list, run_date = None parser = argparse.ArgumentParser( description='Create tiles of removal factors according to IPCC defaults') parser.add_argument('--model-type', '-t', required=True, - help='{}'.format(cn.model_type_arg_help)) + help=f'{cn.model_type_arg_help}') parser.add_argument('--tile_id_list', '-l', required=True, help='List of tile ids to use in the model. Should be of form 00N_110E or 00N_110E,00N_120E or all.') parser.add_argument('--run-date', '-d', required=False, @@ -240,20 +241,24 @@ def mp_annual_gain_rate_IPCC_defaults(sensit_type, tile_id_list, run_date = None parser.add_argument('--no-upload', '-nu', action='store_true', help='Disables uploading of outputs to s3') args = parser.parse_args() - sensit_type = args.model_type + + + # Sets global variables to the command line arguments + cn.SENSIT_TYPE = args.model_type + cn.RUN_DATE = args.run_date + cn.NO_UPLOAD = args.no_upload + tile_id_list = args.tile_id_list - run_date = args.run_date - no_upload = args.no_upload # Disables upload to s3 if no AWS credentials are found in environment if not uu.check_aws_creds(): - no_upload = True + cn.NO_UPLOAD = True # Create the output log - uu.initiate_log(tile_id_list=tile_id_list, sensit_type=sensit_type, run_date=run_date, no_upload=no_upload) + uu.initiate_log(tile_id_list) # Checks whether the sensitivity analysis and tile_id_list arguments are valid - uu.check_sensit_type(sensit_type) + uu.check_sensit_type(cn.SENSIT_TYPE) tile_id_list = uu.tile_id_list_check(tile_id_list) - mp_annual_gain_rate_IPCC_defaults(sensit_type=sensit_type, tile_id_list=tile_id_list, run_date=run_date, no_upload=no_upload) + mp_annual_gain_rate_IPCC_defaults(tile_id_list) diff --git a/removals/mp_annual_gain_rate_mangrove.py b/removals/mp_annual_gain_rate_mangrove.py index 035cbbab..8461fc29 100644 --- a/removals/mp_annual_gain_rate_mangrove.py +++ b/removals/mp_annual_gain_rate_mangrove.py @@ -19,7 +19,7 @@ sys.path.append(os.path.join(cn.docker_app,'removals')) import annual_gain_rate_mangrove -def mp_annual_gain_rate_mangrove(sensit_type, tile_id_list, run_date = None): +def mp_annual_gain_rate_mangrove(tile_id_list): os.chdir(cn.docker_base_dir) pd.options.mode.chained_assignment = None @@ -34,7 +34,7 @@ def mp_annual_gain_rate_mangrove(sensit_type, tile_id_list, run_date = None): tile_id_list = list(set(mangrove_biomass_tile_list).intersection(ecozone_tile_list)) uu.print_log(tile_id_list) - uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n") + uu.print_log(f'There are {str(len(tile_id_list))} tiles to process', "\n") download_dict = { @@ -49,15 +49,15 @@ def mp_annual_gain_rate_mangrove(sensit_type, tile_id_list, run_date = None): # A date can optionally be provided by the full model script or a run of this script. # This replaces the date in constants_and_names. - if run_date is not None: - output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date) + if cn.RUN_DATE is not None: + output_dir_list = uu.replace_output_dir_date(output_dir_list, cn.RUN_DATE) # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list, if AWS credentials are found for key, values in download_dict.items(): dir = key pattern = values[0] - uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list) + uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, cn.SENSIT_TYPE, tile_id_list) # Table with IPCC Wetland Supplement Table 4.4 default mangrove removals rates @@ -128,7 +128,7 @@ def mp_annual_gain_rate_mangrove(sensit_type, tile_id_list, run_date = None): processes = 4 uu.print_log('Mangrove annual removals rate max processors=', processes) pool = multiprocessing.Pool(processes) - pool.map(partial(annual_gain_rate_mangrove.annual_gain_rate, sensit_type=sensit_type, output_pattern_list=output_pattern_list, + pool.map(partial(annual_gain_rate_mangrove.annual_gain_rate, output_pattern_list=output_pattern_list, gain_above_dict=gain_above_dict, gain_below_dict=gain_below_dict, stdev_dict=stdev_dict), tile_id_list) pool.close() pool.join() @@ -136,7 +136,7 @@ def mp_annual_gain_rate_mangrove(sensit_type, tile_id_list, run_date = None): # # For single processor use # for tile in tile_id_list: # - # annual_gain_rate_mangrove.annual_gain_rate(tile, sensit_type, output_pattern_list, + # annual_gain_rate_mangrove.annual_gain_rate(tile, output_pattern_list, # gain_above_dict, gain_below_dict, stdev_dict) @@ -154,26 +154,31 @@ def mp_annual_gain_rate_mangrove(sensit_type, tile_id_list, run_date = None): parser = argparse.ArgumentParser( description='Create tiles of removal factors for mangrove forests') parser.add_argument('--model-type', '-t', required=True, - help='{}'.format(cn.model_type_arg_help)) + help=f'{cn.model_type_arg_help}') parser.add_argument('--tile_id_list', '-l', required=True, help='List of tile ids to use in the model. Should be of form 00N_110E or 00N_110E,00N_120E or all.') parser.add_argument('--run-date', '-d', required=False, help='Date of run. Must be format YYYYMMDD.') + parser.add_argument('--no-upload', '-nu', action='store_true', + help='Disables uploading of outputs to s3') args = parser.parse_args() - sensit_type = args.model_type + + # Sets global variables to the command line arguments + cn.SENSIT_TYPE = args.model_type + cn.RUN_DATE = args.run_date + cn.NO_UPLOAD = args.no_upload + tile_id_list = args.tile_id_list - run_date = args.run_date # Disables upload to s3 if no AWS credentials are found in environment if not uu.check_aws_creds(): - no_upload = True - + cn.NO_UPLOAD = True # Create the output log - uu.initiate_log(tile_id_list=tile_id_list, sensit_type=sensit_type, run_date=run_date) + uu.initiate_log(tile_id_list) # Checks whether the sensitivity analysis and tile_id_list arguments are valid - uu.check_sensit_type(sensit_type) + uu.check_sensit_type(cn.SENSIT_TYPE) tile_id_list = uu.tile_id_list_check(tile_id_list) - mp_annual_gain_rate_mangrove(sensit_type=sensit_type, tile_id_list=tile_id_list, run_date=run_date) \ No newline at end of file + mp_annual_gain_rate_mangrove(tile_id_list) \ No newline at end of file diff --git a/removals/mp_forest_age_category_IPCC.py b/removals/mp_forest_age_category_IPCC.py index c90203d9..9b8ae93e 100644 --- a/removals/mp_forest_age_category_IPCC.py +++ b/removals/mp_forest_age_category_IPCC.py @@ -1,4 +1,4 @@ -''' +""" This script creates tiles of forest age category across the entire model extent (all pixels) according to a decision tree. The age categories are: <= 20 year old secondary forest (1), >20 year old secondary forest (2), and primary forest (3). The decision tree is implemented as a series of numpy array statements rather than as nested if statements or gdal_calc operations. @@ -9,34 +9,37 @@ This assigns forest age category to all pixels within the model but they are ultimately only used for non-mangrove, non-planted, non-European, non-US, older secondary and primary forest pixels. You can think of the output from this script as being the age category if IPCC Table 4.9 rates were to be applied there. -''' +""" -import multiprocessing +import argparse from functools import partial import pandas as pd -import datetime -import argparse -from subprocess import Popen, PIPE, STDOUT, check_call +import multiprocessing import os import sys + sys.path.append('../') import constants_and_names as cn import universal_util as uu sys.path.append(os.path.join(cn.docker_app,'removals')) import forest_age_category_IPCC -def mp_forest_age_category_IPCC(sensit_type, tile_id_list, run_date = None, no_upload = None): +def mp_forest_age_category_IPCC(tile_id_list): + """ + :param tile_id_list: list of tile ids to process + :return: set of tiles denoting three broad forest age categories: 1- young (<20), 2- middle, 3- old/primary + """ os.chdir(cn.docker_base_dir) # If a full model run is specified, the correct set of tiles for the particular script is listed if tile_id_list == 'all': # List of tiles to run in the model - tile_id_list = uu.tile_list_s3(cn.model_extent_dir, sensit_type) + tile_id_list = uu.tile_list_s3(cn.model_extent_dir, cn.SENSIT_TYPE) uu.print_log(tile_id_list) - uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n") + uu.print_log(f'There are {str(len(tile_id_list))} tiles to process', "\n") # Files to download for this script. @@ -48,15 +51,15 @@ def mp_forest_age_category_IPCC(sensit_type, tile_id_list, run_date = None, no_u } # Adds the correct loss tile to the download dictionary depending on the model run - if sensit_type == 'legal_Amazon_loss': + if cn.SENSIT_TYPE == 'legal_Amazon_loss': download_dict[cn.Brazil_annual_loss_processed_dir] = [cn.pattern_Brazil_annual_loss_processed] - elif sensit_type == 'Mekong_loss': + elif cn.SENSIT_TYPE == 'Mekong_loss': download_dict[cn.Mekong_loss_processed_dir] = [cn.pattern_Mekong_loss_processed] else: download_dict[cn.loss_dir] = [cn.pattern_loss] # Adds the correct biomass tile to the download dictionary depending on the model run - if sensit_type == 'biomass_swap': + if cn.SENSIT_TYPE == 'biomass_swap': download_dict[cn.JPL_processed_dir] = [cn.pattern_JPL_unmasked_processed] else: download_dict[cn.WHRC_biomass_2000_unmasked_dir] = [cn.pattern_WHRC_biomass_2000_unmasked] @@ -69,22 +72,22 @@ def mp_forest_age_category_IPCC(sensit_type, tile_id_list, run_date = None, no_u # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list for key, values in download_dict.items(): - dir = key + directory = key pattern = values[0] - uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list) + uu.s3_flexible_download(directory, pattern, cn.docker_base_dir, cn.SENSIT_TYPE, tile_id_list) # If the model run isn't the standard one, the output directory and file names are changed - if sensit_type != 'std': - uu.print_log("Changing output directory and file name pattern based on sensitivity analysis") - output_dir_list = uu.alter_dirs(sensit_type, output_dir_list) - output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list) + if cn.SENSIT_TYPE != 'std': + uu.print_log('Changing output directory and file name pattern based on sensitivity analysis') + output_dir_list = uu.alter_dirs(cn.SENSIT_TYPE, output_dir_list) + output_pattern_list = uu.alter_patterns(cn.SENSIT_TYPE, output_pattern_list) # A date can optionally be provided by the full model script or a run of this script. # This replaces the date in constants_and_names. # Only done if output upload is enabled. - if run_date is not None and no_upload is not None: - output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date) + if cn.RUN_DATE is not None and cn.NO_UPLOAD is not None: + output_dir_list = uu.replace_output_dir_date(output_dir_list, cn.RUN_DATE) # Table with IPCC Table 4.9 default removals rates @@ -94,8 +97,7 @@ def mp_forest_age_category_IPCC(sensit_type, tile_id_list, run_date = None, no_u # Imports the table with the ecozone-continent codes and the carbon removals rates - gain_table = pd.read_excel("{}".format(cn.gain_spreadsheet), - sheet_name = "natrl fores gain, for std model") + gain_table = pd.read_excel(f'{cn.gain_spreadsheet}', sheet_name = "natrl fores gain, for std model") # Removes rows with duplicate codes (N. and S. America for the same ecozone) gain_table_simplified = gain_table.drop_duplicates(subset='gainEcoCon', keep='first') @@ -115,27 +117,27 @@ def mp_forest_age_category_IPCC(sensit_type, tile_id_list, run_date = None, no_u # With processes=30, peak usage was about 350 GB using WHRC AGB. # processes=26 maxes out above 480 GB for biomass_swap, so better to use fewer than that. if cn.count == 96: - if sensit_type == 'biomass_swap': + if cn.SENSIT_TYPE == 'biomass_swap': processes = 32 # 32 processors = 610 GB peak else: processes = 42 # 30 processors=460 GB peak; 36 = 550 GB peak; 40 = XXX GB peak else: processes = 2 - uu.print_log('Natural forest age category max processors=', processes) - pool = multiprocessing.Pool(processes) - pool.map(partial(forest_age_category_IPCC.forest_age_category, gain_table_dict=gain_table_dict, - pattern=pattern, sensit_type=sensit_type, no_upload=no_upload), tile_id_list) - pool.close() - pool.join() + uu.print_log(f'Natural forest age category max processors={processes}') + with multiprocessing.Pool(processes) as pool: + pool.map(partial(forest_age_category_IPCC.forest_age_category, gain_table_dict=gain_table_dict, pattern=pattern), + tile_id_list) + pool.close() + pool.join() # # For single processor use # for tile_id in tile_id_list: # - # forest_age_category_IPCC.forest_age_category(tile_id, gain_table_dict, pattern, sensit_type, no_upload) + # forest_age_category_IPCC.forest_age_category(tile_id, gain_table_dict, pattern) # If no_upload flag is not activated (by choice or by lack of AWS credentials), output is uploaded - if not no_upload: + if not cn.NO_UPLOAD: uu.upload_final_set(output_dir_list[0], output_pattern_list[0]) @@ -147,7 +149,7 @@ def mp_forest_age_category_IPCC(sensit_type, tile_id_list, run_date = None, no_u parser = argparse.ArgumentParser( description='Create tiles of the forest age category (<20 years, >20 years secondary, primary)') parser.add_argument('--model-type', '-t', required=True, - help='{}'.format(cn.model_type_arg_help)) + help=f'{cn.model_type_arg_help}') parser.add_argument('--tile_id_list', '-l', required=True, help='List of tile ids to use in the model. Should be of form 00N_110E or 00N_110E,00N_120E or all.') parser.add_argument('--run-date', '-d', required=False, @@ -155,21 +157,23 @@ def mp_forest_age_category_IPCC(sensit_type, tile_id_list, run_date = None, no_u parser.add_argument('--no-upload', '-nu', action='store_true', help='Disables uploading of outputs to s3') args = parser.parse_args() - sensit_type = args.model_type + + # Sets global variables to the command line arguments + cn.SENSIT_TYPE = args.model_type + cn.RUN_DATE = args.run_date + cn.NO_UPLOAD = args.no_upload + tile_id_list = args.tile_id_list - run_date = args.run_date - no_upload = args.no_upload # Disables upload to s3 if no AWS credentials are found in environment if not uu.check_aws_creds(): - no_upload = True + cn.NO_UPLOAD = True # Create the output log - uu.initiate_log(tile_id_list=tile_id_list, sensit_type=sensit_type, run_date=run_date, no_upload=no_upload) + uu.initiate_log(tile_id_list) # Checks whether the sensitivity analysis and tile_id_list arguments are valid - uu.check_sensit_type(sensit_type) + uu.check_sensit_type(cn.SENSIT_TYPE) tile_id_list = uu.tile_id_list_check(tile_id_list) - mp_forest_age_category_IPCC(sensit_type=sensit_type, tile_id_list=tile_id_list, run_date=run_date, no_upload=no_upload) - + mp_forest_age_category_IPCC(tile_id_list) diff --git a/removals/mp_gain_year_count_all_forest_types.py b/removals/mp_gain_year_count_all_forest_types.py index 6638be58..b6e79ca9 100644 --- a/removals/mp_gain_year_count_all_forest_types.py +++ b/removals/mp_gain_year_count_all_forest_types.py @@ -1,4 +1,4 @@ -''' +""" Creates tiles of the number of years in which carbon removals occur during the model duration (2001 to 2020 currently). It is based on the annual Hansen loss data and the 2000-2012 Hansen gain data. First it separately calculates rasters of gain years for model pixels that had loss only, @@ -8,30 +8,36 @@ Then it combines those four rasters into a single gain year raster for each tile using rasterio because summing the arrays using rasterio is faster and uses less memory than combining them with gdalmerge. If different input rasters for loss (e.g., 2001-2017) and gain (e.g., 2000-2018) are used, the year count constants in constants_and_names.py must be changed. -''' +""" -import multiprocessing import argparse -import os -import datetime from functools import partial +import multiprocessing +import os import sys + import gain_year_count_all_forest_types sys.path.append('../') import constants_and_names as cn import universal_util as uu -def mp_gain_year_count_all_forest_types(sensit_type, tile_id_list, run_date = None, no_upload = None): +def mp_gain_year_count_all_forest_types(tile_id_list): + """ + :param tile_id_list: list of tile ids to process + :return: 5 sets of tiles that show the estimated years of carbon accumulation. + The only one used later in the model is the combined one. The other four are for QC. + Units: years. + """ os.chdir(cn.docker_base_dir) # If a full model run is specified, the correct set of tiles for the particular script is listed if tile_id_list == 'all': # No point in making gain year count tiles for tiles that don't have annual removals - tile_id_list = uu.tile_list_s3(cn.annual_gain_AGC_all_types_dir, sensit_type) + tile_id_list = uu.tile_list_s3(cn.annual_gain_AGC_all_types_dir, cn.SENSIT_TYPE) uu.print_log(tile_id_list) - uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n") + uu.print_log(f'There are {str(len(tile_id_list))} tiles to process', "\n") # Files to download for this script. 'true'/'false' says whether the input directory and pattern should be # changed for a sensitivity analysis. This does not need to change based on what run is being done; @@ -40,38 +46,38 @@ def mp_gain_year_count_all_forest_types(sensit_type, tile_id_list, run_date = No cn.gain_dir: [cn.pattern_gain], cn.model_extent_dir: [cn.pattern_model_extent] } - + # Adds the correct loss tile to the download dictionary depending on the model run - if sensit_type == 'legal_Amazon_loss': + if cn.SENSIT_TYPE == 'legal_Amazon_loss': download_dict[cn.Brazil_annual_loss_processed_dir] = [cn.pattern_Brazil_annual_loss_processed] - elif sensit_type == 'Mekong_loss': + elif cn.SENSIT_TYPE == 'Mekong_loss': download_dict[cn.Mekong_loss_processed_dir] = [cn.pattern_Mekong_loss_processed] else: download_dict[cn.loss_dir] = [cn.pattern_loss] - - + + output_dir_list = [cn.gain_year_count_dir] output_pattern_list = [cn.pattern_gain_year_count] # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list for key, values in download_dict.items(): - dir = key + directory = key pattern = values[0] - uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list) + uu.s3_flexible_download(directory, pattern, cn.docker_base_dir, cn.SENSIT_TYPE, tile_id_list) # If the model run isn't the standard one, the output directory and file names are changed - if sensit_type != 'std': - uu.print_log("Changing output directory and file name pattern based on sensitivity analysis") - output_dir_list = uu.alter_dirs(sensit_type, output_dir_list) - output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list) + if cn.SENSIT_TYPE != 'std': + uu.print_log('Changing output directory and file name pattern based on sensitivity analysis') + output_dir_list = uu.alter_dirs(cn.SENSIT_TYPE, output_dir_list) + output_pattern_list = uu.alter_patterns(cn.SENSIT_TYPE, output_pattern_list) # A date can optionally be provided by the full model script or a run of this script. # This replaces the date in constants_and_names. # Only done if output upload is enabled. - if run_date is not None and no_upload is not None: - output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date) + if cn.RUN_DATE is not None and cn.NO_UPLOAD is not None: + output_dir_list = uu.replace_output_dir_date(output_dir_list, cn.RUN_DATE) # Creates a single filename pattern to pass to the multiprocessor call pattern = output_pattern_list[0] @@ -81,56 +87,64 @@ def mp_gain_year_count_all_forest_types(sensit_type, tile_id_list, run_date = No processes = 90 # 66 = 310 GB peak; 75 = 380 GB peak; 90 = 480 GB peak else: processes = int(cn.count/2) - uu.print_log('Gain year count loss only pixels max processors=', processes) - pool = multiprocessing.Pool(processes) - pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_loss_only, - sensit_type=sensit_type, no_upload=no_upload), tile_id_list) + uu.print_log(f'Gain year count loss only pixels max processors={processes}') + with multiprocessing.Pool(processes) as pool: + pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_loss_only), + tile_id_list) + pool.close() + pool.join() if cn.count == 96: processes = 90 # 66 = 330 GB peak; 75 = 380 GB peak; 90 = 530 GB peak else: processes = int(cn.count/2) - uu.print_log('Gain year count gain only pixels max processors=', processes) - pool = multiprocessing.Pool(processes) - if sensit_type == 'maxgain': - # Creates gain year count tiles using only pixels that had only gain - pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_gain_only_maxgain, - sensit_type=sensit_type, no_upload=no_upload), tile_id_list) - if sensit_type == 'legal_Amazon_loss': - uu.print_log("Gain-only pixels do not apply to legal_Amazon_loss sensitivity analysis. Skipping this step.") - else: - # Creates gain year count tiles using only pixels that had only gain - pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_gain_only_standard, - sensit_type=sensit_type, no_upload=no_upload), tile_id_list) + uu.print_log(f'Gain year count gain only pixels max processors={processes}') + with multiprocessing.Pool(processes) as pool: + if cn.SENSIT_TYPE == 'maxgain': + # Creates gain year count tiles using only pixels that had only gain + pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_gain_only_maxgain), + tile_id_list) + elif cn.SENSIT_TYPE == 'legal_Amazon_loss': + uu.print_log('Gain-only pixels do not apply to legal_Amazon_loss sensitivity analysis. Skipping this step.') + else: + # Creates gain year count tiles using only pixels that had only gain + pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_gain_only_standard), + tile_id_list) + pool.close() + pool.join() # Creates gain year count tiles using only pixels that had neither loss nor gain pixels if cn.count == 96: processes = 90 # 66 = 360 GB peak; 88 = 430 GB peak; 90 = 510 GB peak else: processes = int(cn.count/2) - uu.print_log('Gain year count no change pixels max processors=', processes) - pool = multiprocessing.Pool(processes) - if sensit_type == 'legal_Amazon_loss': - pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_no_change_legal_Amazon_loss, - sensit_type=sensit_type, no_upload=no_upload), tile_id_list) - else: - pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_no_change_standard, - sensit_type=sensit_type, no_upload=no_upload), tile_id_list) + uu.print_log(f'Gain year count no change pixels max processors={processes}') + with multiprocessing.Pool(processes) as pool: + if cn.SENSIT_TYPE == 'legal_Amazon_loss': + pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_no_change_legal_Amazon_loss), + tile_id_list) + else: + pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_no_change_standard), + tile_id_list) + pool.close() + pool.join() if cn.count == 96: processes = 90 # 66 = 370 GB peak; 88 = 430 GB peak; 90 = 550 GB peak else: processes = int(cn.count/2) - uu.print_log('Gain year count loss & gain pixels max processors=', processes) - pool = multiprocessing.Pool(processes) - if sensit_type == 'maxgain': - # Creates gain year count tiles using only pixels that had only gain - pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_loss_and_gain_maxgain, - sensit_type=sensit_type, no_upload=no_upload), tile_id_list) - else: - # Creates gain year count tiles using only pixels that had only gain - pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_loss_and_gain_standard, - sensit_type=sensit_type, no_upload=no_upload), tile_id_list) + uu.print_log(f'Gain year count loss & gain pixels max processors={processes}') + with multiprocessing.Pool(processes) as pool: + if cn.SENSIT_TYPE == 'maxgain': + # Creates gain year count tiles using only pixels that had only gain + pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_loss_and_gain_maxgain), + tile_id_list) + else: + # Creates gain year count tiles using only pixels that had only gain + pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_loss_and_gain_standard), + tile_id_list) + pool.close() + pool.join() # Combines the four above gain year count tiles for each Hansen tile into a single output tile if cn.count == 96: @@ -139,47 +153,47 @@ def mp_gain_year_count_all_forest_types(sensit_type, tile_id_list, run_date = No processes = 1 else: processes = int(cn.count/4) - uu.print_log('Gain year count gain merge all combos max processors=', processes) - pool = multiprocessing.Pool(processes) - pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_merge, - pattern=pattern, sensit_type=sensit_type, no_upload=no_upload), tile_id_list) - pool.close() - pool.join() + uu.print_log(f'Gain year count gain merge all combos max processors={processes}') + with multiprocessing.Pool(processes) as pool: + pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_merge, pattern=pattern), + tile_id_list) + pool.close() + pool.join() # # For single processor use # for tile_id in tile_id_list: - # gain_year_count_all_forest_types.create_gain_year_count_loss_only(tile_id, no_upload) + # gain_year_count_all_forest_types.create_gain_year_count_loss_only(tile_id) # # for tile_id in tile_id_list: - # if sensit_type == 'maxgain': - # gain_year_count_all_forest_types.create_gain_year_count_gain_only_maxgain(tile_id, no_upload) + # if cn.SENSIT_TYPE == 'maxgain': + # gain_year_count_all_forest_types.create_gain_year_count_gain_only_maxgain(tile_id) # else: - # gain_year_count_all_forest_types.create_gain_year_count_gain_only_standard(tile_id, no_upload) + # gain_year_count_all_forest_types.create_gain_year_count_gain_only_standard(tile_id) # # for tile_id in tile_id_list: - # gain_year_count_all_forest_types.create_gain_year_count_no_change_standard(tile_id, no_upload) + # gain_year_count_all_forest_types.create_gain_year_count_no_change_standard(tile_id) # # for tile_id in tile_id_list: - # if sensit_type == 'maxgain': - # gain_year_count_all_forest_types.create_gain_year_count_loss_and_gain_maxgain(tile_id, no_upload) + # if cn.SENSIT_TYPE == 'maxgain': + # gain_year_count_all_forest_types.create_gain_year_count_loss_and_gain_maxgain(tile_id) # else: - # gain_year_count_all_forest_types.create_gain_year_count_loss_and_gain_standard(tile_id, no_upload) + # gain_year_count_all_forest_types.create_gain_year_count_loss_and_gain_standard(tile_id) # # for tile_id in tile_id_list: - # gain_year_count_all_forest_types.create_gain_year_count_merge(tile_id, pattern, sensit_type, no_upload) + # gain_year_count_all_forest_types.create_gain_year_count_merge(tile_id, pattern) - # If no_upload flag is not activated (by choice or by lack of AWS credentials), output is uploaded - if not no_upload: + # If cn.NO_UPLOAD flag is not activated (by choice or by lack of AWS credentials), output is uploaded + if not cn.NO_UPLOAD: print("in upload area") # Intermediate output tiles for checking outputs - uu.upload_final_set(output_dir_list[0], "growth_years_loss_only") - uu.upload_final_set(output_dir_list[0], "growth_years_gain_only") - uu.upload_final_set(output_dir_list[0], "growth_years_no_change") - uu.upload_final_set(output_dir_list[0], "growth_years_loss_and_gain") + uu.upload_final_set(output_dir_list[0], "gain_year_count_loss_only") + uu.upload_final_set(output_dir_list[0], "gain_year_count_gain_only") + uu.upload_final_set(output_dir_list[0], "gain_year_count_no_change") + uu.upload_final_set(output_dir_list[0], "gain_year_count_loss_and_gain") # This is the final output used later in the model uu.upload_final_set(output_dir_list[0], output_pattern_list[0]) @@ -192,7 +206,7 @@ def mp_gain_year_count_all_forest_types(sensit_type, tile_id_list, run_date = No parser = argparse.ArgumentParser( description='Create tiles of number of years in which removals occurred during the model period') parser.add_argument('--model-type', '-t', required=True, - help='{}'.format(cn.model_type_arg_help)) + help=f'{cn.model_type_arg_help}') parser.add_argument('--tile_id_list', '-l', required=True, help='List of tile ids to use in the model. Should be of form 00N_110E or 00N_110E,00N_120E or all.') parser.add_argument('--run-date', '-d', required=False, @@ -200,20 +214,23 @@ def mp_gain_year_count_all_forest_types(sensit_type, tile_id_list, run_date = No parser.add_argument('--no-upload', '-nu', action='store_true', help='Disables uploading of outputs to s3') args = parser.parse_args() - sensit_type = args.model_type + + # Sets global variables to the command line arguments + cn.SENSIT_TYPE = args.model_type + cn.RUN_DATE = args.run_date + cn.NO_UPLOAD = args.no_upload + tile_id_list = args.tile_id_list - run_date = args.run_date - no_upload = args.no_upload # Disables upload to s3 if no AWS credentials are found in environment if not uu.check_aws_creds(): - no_upload = True + cn.NO_UPLOAD = True # Create the output log - uu.initiate_log(tile_id_list=tile_id_list, sensit_type=sensit_type, run_date=run_date, no_upload=no_upload) + uu.initiate_log(tile_id_list) # Checks whether the sensitivity analysis and tile_id_list arguments are valid - uu.check_sensit_type(sensit_type) + uu.check_sensit_type(cn.SENSIT_TYPE) tile_id_list = uu.tile_id_list_check(tile_id_list) - mp_gain_year_count_all_forest_types(sensit_type=sensit_type, tile_id_list=tile_id_list, run_date=run_date, no_upload=no_upload) \ No newline at end of file + mp_gain_year_count_all_forest_types(tile_id_list) diff --git a/removals/mp_gross_removals_all_forest_types.py b/removals/mp_gross_removals_all_forest_types.py index ceb89545..c64c33c2 100644 --- a/removals/mp_gross_removals_all_forest_types.py +++ b/removals/mp_gross_removals_all_forest_types.py @@ -1,39 +1,44 @@ -''' +""" This script calculates the cumulative above and belowground carbon dioxide removals (removals) for all forest types for the duration of the model. It multiplies the annual aboveground and belowground carbon removal factors by the number of years of removals and the C to CO2 conversion. It then sums the aboveground and belowground gross removals to get gross removals for all forest types in both emitted_pools. That is the final gross removals for the entire model. Note that gross removals from this script are reported as positive values. -''' +""" -import multiprocessing import argparse -import os -import datetime from functools import partial +import multiprocessing +import os import sys + sys.path.append('../') import constants_and_names as cn import universal_util as uu sys.path.append(os.path.join(cn.docker_app,'removals')) import gross_removals_all_forest_types -def mp_gross_removals_all_forest_types(sensit_type, tile_id_list, run_date = None, no_upload = True): +def mp_gross_removals_all_forest_types(tile_id_list): + """ + :param tile_id_list: list of tile ids to process + :return: 3 set of tiles: gross aboveground removals, belowground removals, aboveground+belowground removals + Units: Mg CO2/ha over entire model period. + """ os.chdir(cn.docker_base_dir) # If a full model run is specified, the correct set of tiles for the particular script is listed if tile_id_list == 'all': # List of tiles to run in the model - # tile_id_list = uu.tile_list_s3(cn.model_extent_dir, sensit_type) - gain_year_count_tile_id_list = uu.tile_list_s3(cn.gain_year_count_dir, sensit_type=sensit_type) - annual_removals_tile_id_list = uu.tile_list_s3(cn.annual_gain_AGC_all_types_dir, sensit_type=sensit_type) + # tile_id_list = uu.tile_list_s3(cn.model_extent_dir, cn.SENSIT_TYPE) + gain_year_count_tile_id_list = uu.tile_list_s3(cn.gain_year_count_dir, cn.SENSIT_TYPE) + annual_removals_tile_id_list = uu.tile_list_s3(cn.annual_gain_AGC_all_types_dir, cn.SENSIT_TYPE) tile_id_list = list(set(gain_year_count_tile_id_list).intersection(annual_removals_tile_id_list)) - uu.print_log("Gross removals tile_id_list is combination of gain_year_count and annual_removals tiles:") + uu.print_log('Gross removals tile_id_list is combination of gain_year_count and annual_removals tiles:') uu.print_log(tile_id_list) - uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n") + uu.print_log(f'There are {str(len(tile_id_list))} tiles to process', "\n") # Files to download for this script. @@ -51,67 +56,66 @@ def mp_gross_removals_all_forest_types(sensit_type, tile_id_list, run_date = Non # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list for key, values in download_dict.items(): - dir = key + directory = key pattern = values[0] - uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list) + uu.s3_flexible_download(directory, pattern, cn.docker_base_dir, cn.SENSIT_TYPE, tile_id_list) # If the model run isn't the standard one, the output directory and file names are changed - if sensit_type != 'std': - uu.print_log("Changing output directory and file name pattern based on sensitivity analysis") - output_dir_list = uu.alter_dirs(sensit_type, output_dir_list) - output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list) + if cn.SENSIT_TYPE != 'std': + uu.print_log('Changing output directory and file name pattern based on sensitivity analysis') + output_dir_list = uu.alter_dirs(cn.SENSIT_TYPE, output_dir_list) + output_pattern_list = uu.alter_patterns(cn.SENSIT_TYPE, output_pattern_list) # A date can optionally be provided by the full model script or a run of this script. # This replaces the date in constants_and_names. # Only done if output upload is enabled. - if run_date is not None and no_upload is not None: - output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date) + if cn.RUN_DATE is not None and cn.NO_UPLOAD is not None: + output_dir_list = uu.replace_output_dir_date(output_dir_list, cn.RUN_DATE) # Calculates gross removals if cn.count == 96: - if sensit_type == 'biomass_swap': + if cn.SENSIT_TYPE == 'biomass_swap': processes = 18 else: processes = 22 # 50 processors > 740 GB peak; 25 = >740 GB peak; 15 = 490 GB peak; 20 = 590 GB peak; 22 = 710 GB peak else: processes = 2 - uu.print_log('Gross removals max processors=', processes) - pool = multiprocessing.Pool(processes) - pool.map(partial(gross_removals_all_forest_types.gross_removals_all_forest_types, output_pattern_list=output_pattern_list, - sensit_type=sensit_type, no_upload=no_upload), tile_id_list) - pool.close() - pool.join() + uu.print_log(f'Gross removals max processors={processes}') + with multiprocessing.Pool(processes) as pool: + pool.map(partial(gross_removals_all_forest_types.gross_removals_all_forest_types, + output_pattern_list=output_pattern_list), + tile_id_list) + pool.close() + pool.join() # # For single processor use # for tile_id in tile_id_list: - # gross_removals_all_forest_types.gross_removals_all_forest_types(tile_id, output_pattern_list, sensit_type, no_upload) + # gross_removals_all_forest_types.gross_removals_all_forest_types(tile_id, output_pattern_list) # Checks the gross removals outputs for tiles with no data for output_pattern in output_pattern_list: if cn.count <= 2: # For local tests processes = 1 - uu.print_log("Checking for empty tiles of {0} pattern with {1} processors using light function...".format( - output_pattern, processes)) - pool = multiprocessing.Pool(processes) - pool.map(partial(uu.check_and_delete_if_empty_light, output_pattern=output_pattern), tile_id_list) - pool.close() - pool.join() + uu.print_log(f'Checking for empty tiles of {output_pattern} pattern with {processes} processors using light function...') + with multiprocessing.Pool(processes) as pool: + pool.map(partial(uu.check_and_delete_if_empty_light, output_pattern=output_pattern), tile_id_list) + pool.close() + pool.join() else: processes = 55 # 55 processors = 670 GB peak - uu.print_log( - "Checking for empty tiles of {0} pattern with {1} processors...".format(output_pattern, processes)) - pool = multiprocessing.Pool(processes) - pool.map(partial(uu.check_and_delete_if_empty, output_pattern=output_pattern), tile_id_list) - pool.close() - pool.join() + uu.print_log(f'Checking for empty tiles of {output_pattern} pattern with {processes} processors...') + with multiprocessing.Pool(processes) as pool: + pool.map(partial(uu.check_and_delete_if_empty, output_pattern=output_pattern), tile_id_list) + pool.close() + pool.join() - # If no_upload flag is not activated (by choice or by lack of AWS credentials), output is uploaded - if not no_upload: + # If cn.NO_UPLOAD flag is not activated (by choice or by lack of AWS credentials), output is uploaded + if not cn.NO_UPLOAD: - for i in range(0, len(output_dir_list)): - uu.upload_final_set(output_dir_list[i], output_pattern_list[i]) + for output_dir, output_pattern in zip(output_dir_list, output_pattern_list): + uu.upload_final_set(output_dir, output_pattern) if __name__ == '__main__': @@ -121,7 +125,7 @@ def mp_gross_removals_all_forest_types(sensit_type, tile_id_list, run_date = Non parser = argparse.ArgumentParser( description='Create tiles of gross removals over the model period') parser.add_argument('--model-type', '-t', required=True, - help='{}'.format(cn.model_type_arg_help)) + help=f'{cn.model_type_arg_help}') parser.add_argument('--tile_id_list', '-l', required=True, help='List of tile ids to use in the model. Should be of form 00N_110E or 00N_110E,00N_120E or all.') parser.add_argument('--run-date', '-d', required=False, @@ -129,20 +133,23 @@ def mp_gross_removals_all_forest_types(sensit_type, tile_id_list, run_date = Non parser.add_argument('--no-upload', '-nu', action='store_true', help='Disables uploading of outputs to s3') args = parser.parse_args() - sensit_type = args.model_type + + # Sets global variables to the command line arguments + cn.SENSIT_TYPE = args.model_type + cn.RUN_DATE = args.run_date + cn.NO_UPLOAD = args.no_upload + tile_id_list = args.tile_id_list - run_date = args.run_date - no_upload = args.no_upload # Disables upload to s3 if no AWS credentials are found in environment if not uu.check_aws_creds(): - no_upload = True + cn.NO_UPLOAD = True # Create the output log - uu.initiate_log(tile_id_list=tile_id_list, sensit_type=sensit_type, run_date=run_date, no_upload=no_upload) + uu.initiate_log(tile_id_list) # Checks whether the sensitivity analysis and tile_id_list arguments are valid - uu.check_sensit_type(sensit_type) + uu.check_sensit_type(cn.SENSIT_TYPE) tile_id_list = uu.tile_id_list_check(tile_id_list) - mp_gross_removals_all_forest_types(sensit_type=sensit_type, tile_id_list=tile_id_list, run_date=run_date, no_upload=no_upload) \ No newline at end of file + mp_gross_removals_all_forest_types(tile_id_list) diff --git a/requirements.txt b/requirements.txt index 2eb23873..c7149a3d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,14 +1,16 @@ -cftime -awscli -boto3 -botocore -netCDF4 +awscli==1.25.58 +boto3==1.24.57 +botocore==1.27.57 +cftime==1.6.1 +netCDF4==1.6.0 numpy>=1.18.5 -openpyxl -pandas -psycopg2 -rasterio -scipy -simpledbf -virtualenv -psutil +openpyxl==3.0.10 +pandas==1.4.3 +psutil==5.9.1 +psycopg2==2.9.3 +pylint==2.14.5 +pytest==7.1.2 +rasterio==1.3.2 +scipy==1.9.0 +simpledbf==0.2.6 +virtualenv==20.16.3 diff --git a/run_full_model.py b/run_full_model.py index 41e46c4d..5cea0cb4 100644 --- a/run_full_model.py +++ b/run_full_model.py @@ -1,10 +1,16 @@ -''' -Clone repositoroy: +""" +Clone repository: git clone https://github.com/wri/carbon-budget Create spot machine using spotutil: spotutil new r5d.24xlarge dgibbs_wri +Build Docker container: +docker build . -t gfw/carbon-budget + +Enter Docker container: +docker run --rm -it -e AWS_SECRET_ACCESS_KEY=[] -e AWS_ACCESS_KEY_ID=[] gfw/carbon-budget + Compile C++ emissions modulte (for standard model and sensitivity analyses that using standard emissions model) c++ /usr/local/app/emissions/cpp_util/calc_gross_emissions_generic.cpp -o /usr/local/app/emissions/cpp_util/calc_gross_emissions_generic.exe -lgdal @@ -19,13 +25,13 @@ FULL STANDARD MODEL RUN: Run all tiles in standard model; save intermediate outputs; do upload outputs to s3; run all model stages; starting from the beginning; get carbon pools at time of loss; emissions from biomass and soil python run_full_model.py -si -t std -s all -r -l all -ce loss -p biomass_soil -tcd 30 -ln "Running all tiles" -''' +""" import argparse -import os -import glob import datetime -import logging +import glob +import os + import constants_and_names as cn import universal_util as uu from data_prep.mp_model_extent import mp_model_extent @@ -43,6 +49,10 @@ from analyses.mp_create_supplementary_outputs import mp_create_supplementary_outputs def main (): + """ + Runs the entire forest GHG flux model or a subset of stages + :return: Sets of output tiles for the selected stages + """ os.chdir(cn.docker_base_dir) @@ -55,9 +65,9 @@ def main (): # The argument for what kind of model run is being done: standard conditions or a sensitivity analysis run parser = argparse.ArgumentParser(description='Run the full carbon flux model') - parser.add_argument('--model-type', '-t', required=True, help='{}'.format(cn.model_type_arg_help)) + parser.add_argument('--model-type', '-t', required=True, help=f'{cn.model_type_arg_help}') parser.add_argument('--stages', '-s', required=True, - help='Stages for running the flux model. Options are {}'.format(model_stages)) + help=f'Stages for running the flux model. Options are {model_stages}') parser.add_argument('--run-through', '-r', action='store_true', help='If activated, run named stage and all following stages. If not activated, run the selected stage only.') parser.add_argument('--run-date', '-d', required=False, @@ -84,101 +94,99 @@ def main (): help='Note to include in log header about model run.') args = parser.parse_args() - sensit_type = args.model_type - stage_input = args.stages - run_through = args.run_through - run_date = args.run_date - tile_id_list = args.tile_id_list - carbon_pool_extent = args.carbon_pool_extent - emitted_pools = args.emitted_pools_to_use - thresh = args.tcd_threshold - if thresh is not None: - thresh = int(thresh) - std_net_flux = args.std_net_flux_aggreg - include_mangroves = args.mangroves - include_us = args.us_rates - no_upload = args.no_upload - save_intermediates = args.save_intermediates - log_note = args.log_note - + # Sets global variables to the command line arguments + cn.SENSIT_TYPE = args.model_type + cn.STAGE_INPUT = args.stages + cn.RUN_THROUGH = args.run_through + cn.RUN_DATE = args.run_date + cn.CARBON_POOL_EXTENT = args.carbon_pool_extent + cn.EMITTED_POOLS = args.emitted_pools_to_use + cn.THRESH = args.tcd_threshold + cn.STD_NET_FLUX = args.std_net_flux_aggreg + cn.INCLUDE_MANGROVES = args.mangroves + cn.INCLUDE_US = args.us_rates + cn.NO_UPLOAD = args.no_upload + cn.SAVE_INTERMEDIATES = args.save_intermediates + cn.LOG_NOTE = args.log_note - # Start time for script - script_start = datetime.datetime.now() + tile_id_list = args.tile_id_list # Disables upload to s3 if no AWS credentials are found in environment if not uu.check_aws_creds(): uu.print_log("s3 credentials not found. Uploading to s3 disabled but downloading enabled.") - no_upload = True - + cn.NO_UPLOAD = True # Forces intermediate files to not be deleted if files can't be uploaded to s3. # Rationale is that if uploads to s3 are not occurring, intermediate files can't be downloaded during the model # run and therefore must exist locally. - if no_upload == True: - save_intermediates = True + if cn.NO_UPLOAD: + cn.SAVE_INTERMEDIATES = True + if cn.THRESH is not None: + cn.THRESH = int(cn.THRESH) # Create the output log - uu.initiate_log(tile_id_list=tile_id_list, sensit_type=sensit_type, run_date=run_date, no_upload=no_upload, - save_intermediates=save_intermediates, - stage_input=stage_input, run_through=run_through, carbon_pool_extent=carbon_pool_extent, - emitted_pools=emitted_pools, thresh=thresh, std_net_flux=std_net_flux, - include_mangroves=include_mangroves, include_us=include_us, log_note=log_note) + uu.initiate_log(tile_id_list) + # Checks whether the sensitivity analysis and tile_id_list arguments are valid + uu.check_sensit_type(cn.SENSIT_TYPE) + + # Start time for script + script_start = datetime.datetime.now() # Checks the validity of the model stage arguments. If either one is invalid, the script ends. - if (stage_input not in model_stages): - uu.exception_log(no_upload, 'Invalid stage selection. Please provide a stage from', model_stages) + if cn.STAGE_INPUT not in model_stages: + uu.exception_log(f'Invalid stage selection. Please provide a stage from {model_stages}') else: pass # Generates the list of stages to run - actual_stages = uu.analysis_stages(model_stages, stage_input, run_through, sensit_type, - include_mangroves = include_mangroves, include_us=include_us) - uu.print_log("Analysis stages to run:", actual_stages) + actual_stages = uu.analysis_stages(model_stages, cn.STAGE_INPUT, cn.RUN_THROUGH, cn.SENSIT_TYPE, + include_mangroves = cn.INCLUDE_MANGROVES, include_us=cn.INCLUDE_US) + uu.print_log(f'Analysis stages to run: {actual_stages}') # Reports how much storage is being used with files uu.check_storage() # Checks whether the sensitivity analysis argument is valid - uu.check_sensit_type(sensit_type) + uu.check_sensit_type(cn.SENSIT_TYPE) # Checks if the carbon pool type is specified if the stages to run includes carbon pool generation. # Does this up front so the user knows before the run begins that information is missing. - if ('carbon_pools' in actual_stages) & (carbon_pool_extent not in ['loss', '2000', 'loss,2000', '2000,loss']): - uu.exception_log(no_upload, "Invalid carbon_pool_extent input. Please choose loss, 2000, loss,2000 or 2000,loss.") + if ('carbon_pools' in actual_stages) & (cn.CARBON_POOL_EXTENT not in ['loss', '2000', 'loss,2000', '2000,loss']): + uu.exception_log('Invalid carbon_pool_extent input. Please choose loss, 2000, loss,2000 or 2000,loss.') # Checks if the correct c++ script has been compiled for the pool option selected. # Does this up front so that the user is prompted to compile the C++ before the script starts running, if necessary. if 'gross_emissions' in actual_stages: - if emitted_pools == 'biomass_soil': + if cn.EMITTED_POOLS == 'biomass_soil': # Some sensitivity analyses have specific gross emissions scripts. # The rest of the sensitivity analyses and the standard model can all use the same, generic gross emissions script. - if sensit_type in ['no_shifting_ag', 'convert_to_grassland']: - if os.path.exists('{0}/calc_gross_emissions_{1}.exe'.format(cn.c_emis_compile_dst, sensit_type)): - uu.print_log("C++ for {} already compiled.".format(sensit_type)) + if cn.SENSIT_TYPE in ['no_shifting_ag', 'convert_to_grassland']: + if os.path.exists(f'{cn.c_emis_compile_dst}/calc_gross_emissions_{cn.SENSIT_TYPE}.exe'): + uu.print_log(f'C++ for {cn.SENSIT_TYPE} already compiled.') else: - uu.exception_log(no_upload, 'Must compile standard {} model C++...'.format(sensit_type)) + uu.exception_log(f'Must compile standard {cn.SENSIT_TYPE} model C++...') else: - if os.path.exists('{0}/calc_gross_emissions_generic.exe'.format(cn.c_emis_compile_dst)): - uu.print_log("C++ for generic emissions already compiled.") + if os.path.exists(f'{cn.c_emis_compile_dst}/calc_gross_emissions_generic.exe'): + uu.print_log('C++ for generic emissions already compiled.') else: - uu.exception_log(no_upload, 'Must compile generic emissions C++...') + uu.exception_log('Must compile generic emissions C++...') - elif (emitted_pools == 'soil_only') & (sensit_type == 'std'): - if os.path.exists('{0}/calc_gross_emissions_soil_only.exe'.format(cn.c_emis_compile_dst)): - uu.print_log("C++ for generic emissions already compiled.") + elif (cn.EMITTED_POOLS == 'soil_only') & (cn.SENSIT_TYPE == 'std'): + if os.path.exists(f'{cn.c_emis_compile_dst}/calc_gross_emissions_soil_only.exe'): + uu.print_log('C++ for generic emissions already compiled.') else: - uu.exception_log(no_upload, 'Must compile soil_only C++...') + uu.exception_log('Must compile soil_only C++...') else: - uu.exception_log(no_upload, 'Pool and/or sensitivity analysis option not valid for gross emissions') + uu.exception_log('Pool and/or sensitivity analysis option not valid for gross emissions') # Checks whether the canopy cover argument is valid up front. if 'aggregate' in actual_stages: - if thresh < 0 or thresh > 99: - uu.exception_log(no_upload, 'Invalid tcd. Please provide an integer between 0 and 99.') + if cn.THRESH < 0 or cn.THRESH > 99: + uu.exception_log('Invalid tcd. Please provide an integer between 0 and 99.') else: pass @@ -186,7 +194,7 @@ def main (): if 's3://' in tile_id_list: tile_id_list = uu.tile_list_s3(tile_id_list, 'std') uu.print_log(tile_id_list) - uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))), "\n") + uu.print_log(f'There are {str(len(tile_id_list))} tiles to process', "\n") # Otherwise, check that the tile list argument is valid. "all" is the way to specify that all tiles should be processed else: tile_id_list = uu.tile_id_list_check(tile_id_list) @@ -219,18 +227,18 @@ def main (): # Adds the carbon directories depending on which carbon emitted_pools are being generated: 2000 and/or emissions year if 'carbon_pools' in actual_stages: - if 'loss' in carbon_pool_extent: + if 'loss' in cn.CARBON_POOL_EXTENT: output_dir_list = output_dir_list + [cn.AGC_emis_year_dir, cn.BGC_emis_year_dir, cn.deadwood_emis_year_2000_dir, cn.litter_emis_year_2000_dir, cn.soil_C_emis_year_2000_dir, cn.total_C_emis_year_dir] - if '2000' in carbon_pool_extent: + if '2000' in cn.CARBON_POOL_EXTENT: output_dir_list = output_dir_list + [cn.AGC_2000_dir, cn.BGC_2000_dir, cn.deadwood_2000_dir, cn.litter_2000_dir, cn.soil_C_full_extent_2000_dir, cn.total_C_2000_dir] # Adds the biomass_soil output directories or the soil_only output directories depending on the model run - if emitted_pools == 'biomass_soil': + if cn.EMITTED_POOLS == 'biomass_soil': output_dir_list = output_dir_list + [cn.gross_emis_commod_biomass_soil_dir, cn.gross_emis_shifting_ag_biomass_soil_dir, cn.gross_emis_forestry_biomass_soil_dir, @@ -273,277 +281,276 @@ def main (): # removal function if 'annual_removals_mangrove' in actual_stages: - uu.print_log(":::::Creating tiles of annual removals for mangrove") + uu.print_log(':::::Creating tiles of annual removals for mangrove') start = datetime.datetime.now() - mp_annual_gain_rate_mangrove(sensit_type, tile_id_list, run_date = run_date) + mp_annual_gain_rate_mangrove(tile_id_list) end = datetime.datetime.now() elapsed_time = end - start uu.check_storage() - uu.print_log(":::::Processing time for annual_gain_rate_mangrove:", elapsed_time, "\n") + uu.print_log(f':::::Processing time for annual_gain_rate_mangrove: {elapsed_time}', "\n", "\n") # Creates tiles of annual AGC+BGC removals rate and AGC stdev for US-specific removals using the standard model # removal function if 'annual_removals_us' in actual_stages: - uu.print_log(":::::Creating tiles of annual removals for US") + uu.print_log(':::::Creating tiles of annual removals for US') start = datetime.datetime.now() - mp_US_removal_rates(sensit_type, tile_id_list, run_date = run_date) + mp_US_removal_rates(tile_id_list) end = datetime.datetime.now() elapsed_time = end - start uu.check_storage() - uu.print_log(":::::Processing time for annual_gain_rate_us:", elapsed_time, "\n") + uu.print_log(f':::::Processing time for annual_gain_rate_us: {elapsed_time}', "\n", "\n") # Creates model extent tiles if 'model_extent' in actual_stages: - uu.print_log(":::::Creating tiles of model extent") + uu.print_log(':::::Creating tiles of model extent') start = datetime.datetime.now() - mp_model_extent(sensit_type, tile_id_list, run_date=run_date, no_upload=no_upload) + mp_model_extent(tile_id_list) end = datetime.datetime.now() elapsed_time = end - start uu.check_storage() - uu.print_log(":::::Processing time for model_extent:", elapsed_time, "\n", "\n") + uu.print_log(f':::::Processing time for model_extent: {elapsed_time}', "\n", "\n") # Creates age category tiles for natural forests if 'forest_age_category_IPCC' in actual_stages: - uu.print_log(":::::Creating tiles of forest age categories for IPCC removal rates") + uu.print_log(':::::Creating tiles of forest age categories for IPCC removal rates') start = datetime.datetime.now() - mp_forest_age_category_IPCC(sensit_type, tile_id_list, run_date=run_date, no_upload=no_upload) + mp_forest_age_category_IPCC(tile_id_list) end = datetime.datetime.now() elapsed_time = end - start uu.check_storage() - uu.print_log(":::::Processing time for forest_age_category_IPCC:", elapsed_time, "\n", "\n") + uu.print_log(f':::::Processing time for forest_age_category_IPCC: {elapsed_time}', "\n", "\n") # Creates tiles of annual AGB and BGB removals rates using IPCC Table 4.9 defaults if 'annual_removals_IPCC' in actual_stages: - uu.print_log(":::::Creating tiles of annual aboveground and belowground removal rates using IPCC defaults") + uu.print_log(':::::Creating tiles of annual aboveground and belowground removal rates using IPCC defaults') start = datetime.datetime.now() - mp_annual_gain_rate_IPCC_defaults(sensit_type, tile_id_list, run_date=run_date, no_upload=no_upload) + mp_annual_gain_rate_IPCC_defaults(tile_id_list) end = datetime.datetime.now() elapsed_time = end - start uu.check_storage() - uu.print_log(":::::Processing time for annual_gain_rate_IPCC:", elapsed_time, "\n", "\n") + uu.print_log(f':::::Processing time for annual_gain_rate_IPCC: {elapsed_time}', "\n", "\n") # Creates tiles of annual AGC and BGC removal factors for the entire model, combining removal factors from all forest types if 'annual_removals_all_forest_types' in actual_stages: - uu.print_log(":::::Creating tiles of annual aboveground and belowground removal rates for all forest types") + uu.print_log(':::::Creating tiles of annual aboveground and belowground removal rates for all forest types') start = datetime.datetime.now() - mp_annual_gain_rate_AGC_BGC_all_forest_types(sensit_type, tile_id_list, run_date=run_date, no_upload=no_upload) + mp_annual_gain_rate_AGC_BGC_all_forest_types(tile_id_list) end = datetime.datetime.now() elapsed_time = end - start uu.check_storage() - uu.print_log(":::::Processing time for annual_gain_rate_AGC_BGC_all_forest_types:", elapsed_time, "\n", "\n") + uu.print_log(f':::::Processing time for annual_gain_rate_AGC_BGC_all_forest_types: {elapsed_time}', "\n", "\n") # Creates tiles of the number of years of removals for all model pixels (across all forest types) if 'gain_year_count' in actual_stages: - if not save_intermediates: + if not cn.SAVE_INTERMEDIATES: - uu.print_log(":::::Freeing up memory for gain year count creation by deleting unneeded tiles") + uu.print_log(':::::Freeing up memory for gain year count creation by deleting unneeded tiles') tiles_to_delete = [] - # tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_mangrove_biomass_2000))) - # tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_WHRC_biomass_2000_unmasked))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_annual_gain_AGB_mangrove))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_annual_gain_BGB_mangrove))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_annual_gain_AGC_BGC_natrl_forest_Europe))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_annual_gain_AGC_BGC_planted_forest_unmasked))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_annual_gain_AGC_BGC_natrl_forest_US))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_annual_gain_AGC_natrl_forest_young))) - # tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_age_cat_IPCC))) - # tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_annual_gain_AGB_IPCC_defaults))) - # tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_annual_gain_BGB_IPCC_defaults))) - # tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_annual_gain_AGC_BGC_all_types))) - # tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_ifl_primary))) - # tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_planted_forest_type_unmasked))) - # tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_plant_pre_2000))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_stdev_annual_gain_AGB_mangrove))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_Europe))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_stdev_annual_gain_AGC_BGC_planted_forest_unmasked))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_US))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_stdev_annual_gain_AGC_natrl_forest_young))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_stdev_annual_gain_AGB_IPCC_defaults))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_stdev_annual_gain_AGC_all_types))) - uu.print_log(" Deleting", len(tiles_to_delete), "tiles...") + # tiles_to_delete.extend(glob.glob(f'*{cn.pattern_mangrove_biomass_2000}*tif')) + # tiles_to_delete.extend(glob.glob(f'*{cn.pattern_WHRC_biomass_2000_unmasked}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_annual_gain_AGB_mangrove}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_annual_gain_BGB_mangrove}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_annual_gain_AGC_BGC_natrl_forest_Europe}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_annual_gain_AGC_BGC_planted_forest_unmasked}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_annual_gain_AGC_BGC_natrl_forest_US}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_annual_gain_AGC_natrl_forest_young}*tif')) + # tiles_to_delete.extend(glob.glob(f'*{cn.pattern_age_cat_IPCC}*tif')) + # tiles_to_delete.extend(glob.glob(f'*{cn.pattern_annual_gain_AGB_IPCC_defaults}*tif')) + # tiles_to_delete.extend(glob.glob(f'*{cn.pattern_annual_gain_BGB_IPCC_defaults}*tif')) + # tiles_to_delete.extend(glob.glob(f'*{cn.pattern_annual_gain_AGC_BGC_all_types}*tif')) + # tiles_to_delete.extend(glob.glob(f'*{cn.pattern_ifl_primary}*tif')) + # tiles_to_delete.extend(glob.glob(f'*{cn.pattern_planted_forest_type_unmasked}*tif')) + # tiles_to_delete.extend(glob.glob(f'*{cn.pattern_plant_pre_2000}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_stdev_annual_gain_AGB_mangrove}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_Europe}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_stdev_annual_gain_AGC_BGC_planted_forest_unmasked}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_US}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_stdev_annual_gain_AGC_natrl_forest_young}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_stdev_annual_gain_AGB_IPCC_defaults}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_stdev_annual_gain_AGC_all_types}*tif')) + uu.print_log(f' Deleting {len(tiles_to_delete)} tiles...') for tile_to_delete in tiles_to_delete: os.remove(tile_to_delete) - uu.print_log(":::::Deleted unneeded tiles") + uu.print_log(':::::Deleted unneeded tiles') uu.check_storage() - uu.print_log(":::::Creating tiles of gain year count for all removal pixels") + uu.print_log(':::::Creating tiles of gain year count for all removal pixels') start = datetime.datetime.now() - mp_gain_year_count_all_forest_types(sensit_type, tile_id_list, run_date = run_date, no_upload=no_upload) + mp_gain_year_count_all_forest_types(tile_id_list) end = datetime.datetime.now() elapsed_time = end - start uu.check_storage() - uu.print_log(":::::Processing time for gain_year_count:", elapsed_time, "\n", "\n") + uu.print_log(f':::::Processing time for gain_year_count: {elapsed_time}', "\n", "\n") # Creates tiles of gross removals for all forest types (aboveground, belowground, and above+belowground) if 'gross_removals_all_forest_types' in actual_stages: - uu.print_log(":::::Creating gross removals for all forest types combined (above + belowground) tiles") + uu.print_log(':::::Creating gross removals for all forest types combined (above + belowground) tiles') start = datetime.datetime.now() - mp_gross_removals_all_forest_types(sensit_type, tile_id_list, run_date=run_date, no_upload=no_upload) + mp_gross_removals_all_forest_types(tile_id_list) end = datetime.datetime.now() elapsed_time = end - start uu.check_storage() - uu.print_log(":::::Processing time for gross_removals_all_forest_types:", elapsed_time, "\n", "\n") + uu.print_log(f':::::Processing time for gross_removals_all_forest_types: {elapsed_time}', "\n", "\n") # Creates carbon emitted_pools in loss year if 'carbon_pools' in actual_stages: - if not save_intermediates: + if not cn.SAVE_INTERMEDIATES: - uu.print_log(":::::Freeing up memory for carbon pool creation by deleting unneeded tiles") + uu.print_log(':::::Freeing up memory for carbon pool creation by deleting unneeded tiles') tiles_to_delete = [] - # tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_model_extent))) - # tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_age_cat_IPCC))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_annual_gain_AGB_IPCC_defaults))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_annual_gain_BGB_IPCC_defaults))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_annual_gain_BGC_all_types))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_annual_gain_AGC_BGC_all_types))) + # tiles_to_delete.extend(glob.glob(f'*{cn.pattern_model_extent}*tif')) + # tiles_to_delete.extend(glob.glob(f'*{cn.pattern_age_cat_IPCC}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_annual_gain_AGB_IPCC_defaults}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_annual_gain_BGB_IPCC_defaults}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_annual_gain_BGC_all_types}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_annual_gain_AGC_BGC_all_types}*tif')) tiles_to_delete.extend(glob.glob('*growth_years*tif')) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_gain_year_count))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_cumul_gain_BGCO2_all_types))) - # tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_cumul_gain_AGCO2_BGCO2_all_types))) - # tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_ifl_primary))) - # tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_planted_forest_type_unmasked))) - uu.print_log(" Deleting", len(tiles_to_delete), "tiles...") + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_gain_year_count}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_cumul_gain_BGCO2_all_types}*tif')) + # tiles_to_delete.extend(glob.glob(f'*{cn.pattern_cumul_gain_AGCO2_BGCO2_all_types}*tif')) + # tiles_to_delete.extend(glob.glob(f'*{cn.pattern_ifl_primary}*tif')) + # tiles_to_delete.extend(glob.glob(f'*{cn.pattern_planted_forest_type_unmasked}*tif')) + uu.print_log(f' Deleting {len(tiles_to_delete)} tiles...') for tile_to_delete in tiles_to_delete: os.remove(tile_to_delete) - uu.print_log(":::::Deleted unneeded tiles") + uu.print_log(':::::Deleted unneeded tiles') uu.check_storage() - uu.print_log(":::::Creating carbon pool tiles") + uu.print_log(':::::Creating carbon pool tiles') start = datetime.datetime.now() - mp_create_carbon_pools(sensit_type, tile_id_list, carbon_pool_extent, run_date=run_date, no_upload=no_upload, - save_intermediates=save_intermediates) + mp_create_carbon_pools(tile_id_list, cn.CARBON_POOL_EXTENT) end = datetime.datetime.now() elapsed_time = end - start uu.check_storage() - uu.print_log(":::::Processing time for create_carbon_pools:", elapsed_time, "\n", "\n") + uu.print_log(f':::::Processing time for create_carbon_pools: {elapsed_time}', "\n", "\n") # Creates gross emissions tiles by driver, gas, and all emissions combined if 'gross_emissions' in actual_stages: - if not save_intermediates: + if not cn.SAVE_INTERMEDIATES: - uu.print_log(":::::Freeing up memory for gross emissions creation by deleting unneeded tiles") + uu.print_log(':::::Freeing up memory for gross emissions creation by deleting unneeded tiles') tiles_to_delete = [] - # tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_removal_forest_type))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_AGC_2000))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_BGC_2000))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_deadwood_2000))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_litter_2000))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_total_C_2000))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_elevation))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_precip))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_annual_gain_AGC_all_types))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_cumul_gain_AGCO2_all_types))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_cont_eco_processed))) - # tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_WHRC_biomass_2000_unmasked))) - # tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_mangrove_biomass_2000))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_removal_forest_type))) - uu.print_log(" Deleting", len(tiles_to_delete), "tiles...") + # tiles_to_delete.extend(glob.glob(f'*{cn.pattern_removal_forest_type}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_AGC_2000}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_BGC_2000}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_deadwood_2000}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_litter_2000}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_total_C_2000}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_elevation}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_precip}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_annual_gain_AGC_all_types}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_cumul_gain_AGCO2_all_types}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_cont_eco_processed}*tif')) + # tiles_to_delete.extend(glob.glob(f'*{cn.pattern_WHRC_biomass_2000_unmasked}*tif')) + # tiles_to_delete.extend(glob.glob(f'*{cn.pattern_mangrove_biomass_2000}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_removal_forest_type}*tif')) + uu.print_log(f' Deleting {len(tiles_to_delete)} tiles...') uu.print_log(tiles_to_delete) for tile_to_delete in tiles_to_delete: os.remove(tile_to_delete) - uu.print_log(":::::Deleted unneeded tiles") + uu.print_log(':::::Deleted unneeded tiles') uu.check_storage() - uu.print_log(":::::Creating gross emissions tiles") + uu.print_log(':::::Creating gross emissions tiles') start = datetime.datetime.now() - mp_calculate_gross_emissions(sensit_type, tile_id_list, emitted_pools, run_date=run_date, no_upload=no_upload) + mp_calculate_gross_emissions(tile_id_list, cn.EMITTED_POOLS) end = datetime.datetime.now() elapsed_time = end - start uu.check_storage() - uu.print_log(":::::Processing time for gross_emissions:", elapsed_time, "\n", "\n") + uu.print_log(f':::::Processing time for gross_emissions: {elapsed_time}', "\n", "\n") # Creates net flux tiles (gross emissions - gross removals) if 'net_flux' in actual_stages: - if not save_intermediates: + if not cn.SAVE_INTERMEDIATES: - uu.print_log(":::::Freeing up memory for net flux creation by deleting unneeded tiles") + uu.print_log(':::::Freeing up memory for net flux creation by deleting unneeded tiles') tiles_to_delete = [] - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_gross_emis_non_co2_all_drivers_biomass_soil))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_gross_emis_co2_only_all_drivers_biomass_soil))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_gross_emis_commod_biomass_soil))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_gross_emis_shifting_ag_biomass_soil))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_gross_emis_forestry_biomass_soil))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_gross_emis_wildfire_biomass_soil))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_gross_emis_urban_biomass_soil))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_gross_emis_no_driver_biomass_soil))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_gross_emis_nodes_biomass_soil))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_AGC_emis_year))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_BGC_emis_year))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_deadwood_emis_year_2000))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_litter_emis_year_2000))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_soil_C_emis_year_2000))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_total_C_emis_year))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_peat_mask))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_ifl_primary))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_planted_forest_type_unmasked))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_drivers))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_climate_zone))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_bor_tem_trop_processed))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_burn_year))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_plant_pre_2000))) - uu.print_log(" Deleting", len(tiles_to_delete), "tiles...") + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_gross_emis_non_co2_all_drivers_biomass_soil}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_gross_emis_co2_only_all_drivers_biomass_soil}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_gross_emis_commod_biomass_soil}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_gross_emis_shifting_ag_biomass_soil}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_gross_emis_forestry_biomass_soil}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_gross_emis_wildfire_biomass_soil}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_gross_emis_urban_biomass_soil}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_gross_emis_no_driver_biomass_soil}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_gross_emis_nodes_biomass_soil}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_AGC_emis_year}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_BGC_emis_year}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_deadwood_emis_year_2000}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_litter_emis_year_2000}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_soil_C_emis_year_2000}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_total_C_emis_year}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_peat_mask}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_ifl_primary}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_planted_forest_type_unmasked}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_drivers}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_climate_zone}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_bor_tem_trop_processed}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_burn_year}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_plant_pre_2000}*tif')) + uu.print_log(f' Deleting {len(tiles_to_delete)} tiles...') for tile_to_delete in tiles_to_delete: os.remove(tile_to_delete) - uu.print_log(":::::Deleted unneeded tiles") + uu.print_log(':::::Deleted unneeded tiles') uu.check_storage() - uu.print_log(":::::Creating net flux tiles") + uu.print_log(':::::Creating net flux tiles') start = datetime.datetime.now() - mp_net_flux(sensit_type, tile_id_list, run_date=run_date, no_upload=no_upload) + mp_net_flux(tile_id_list) end = datetime.datetime.now() elapsed_time = end - start uu.check_storage() - uu.print_log(":::::Processing time for net_flux:", elapsed_time, "\n", "\n") + uu.print_log(f':::::Processing time for net_flux: {elapsed_time}', "\n", "\n") # Aggregates gross emissions, gross removals, and net flux to coarser resolution. @@ -552,86 +559,86 @@ def main (): # aux.xml files need to be deleted because otherwise they'll be included in the aggregation iteration. # They are created by using check_and_delete_if_empty_light() - uu.print_log(":::::Deleting any aux.xml files") + uu.print_log(':::::Deleting any aux.xml files') tiles_to_delete = [] tiles_to_delete.extend(glob.glob('*aux.xml')) for tile_to_delete in tiles_to_delete: os.remove(tile_to_delete) - uu.print_log(":::::Deleted {0} aux.xml files: {1}".format(len(tiles_to_delete), tiles_to_delete), "\n") + uu.print_log(f':::::Deleted {len(tiles_to_delete)} aux.xml files: {tiles_to_delete}', "\n") - uu.print_log(":::::Creating 4x4 km aggregate maps") + uu.print_log(':::::Creating 4x4 km aggregate maps') start = datetime.datetime.now() - mp_aggregate_results_to_4_km(sensit_type, thresh, tile_id_list, std_net_flux=std_net_flux, - run_date=run_date, no_upload=no_upload) + mp_aggregate_results_to_4_km(tile_id_list, cn.THRESH, std_net_flux=cn.STD_NET_FLUX) end = datetime.datetime.now() elapsed_time = end - start uu.check_storage() - uu.print_log(":::::Processing time for aggregate:", elapsed_time, "\n", "\n") + uu.print_log(f':::::Processing time for aggregate: {elapsed_time}', "\n", "\n") # Converts gross emissions, gross removals and net flux from per hectare rasters to per pixel rasters if 'create_supplementary_outputs' in actual_stages: - if not save_intermediates: + if not cn.SAVE_INTERMEDIATES: - uu.print_log(":::::Deleting rewindowed tiles") + uu.print_log(':::::Deleting rewindowed tiles') tiles_to_delete = [] tiles_to_delete.extend(glob.glob('*rewindow*tif')) - uu.print_log(" Deleting", len(tiles_to_delete), "tiles...") + uu.print_log(f' Deleting {len(tiles_to_delete)} tiles...') for tile_to_delete in tiles_to_delete: os.remove(tile_to_delete) - uu.print_log(":::::Deleted unneeded tiles") + uu.print_log(':::::Deleted unneeded tiles') uu.check_storage() - uu.print_log(":::::Creating supplementary versions of main model outputs (forest extent, per pixel)") + uu.print_log(':::::Creating supplementary versions of main model outputs (forest extent, per pixel)') start = datetime.datetime.now() - mp_create_supplementary_outputs(sensit_type, tile_id_list, run_date=run_date, no_upload=no_upload) + mp_create_supplementary_outputs(tile_id_list) end = datetime.datetime.now() elapsed_time = end - start uu.check_storage() - uu.print_log(":::::Processing time for supplementary output raster creation:", elapsed_time, "\n", "\n") + uu.print_log(f':::::Processing time for supplementary output raster creation: {elapsed_time}', "\n", "\n") # If no_upload flag is activated, tiles on s3 aren't counted - if not no_upload: + if not cn.NO_UPLOAD: - uu.print_log(":::::Counting tiles output to each folder") + uu.print_log(':::::Counting tiles output to each folder') # Modifies output directory names to make them match those used during the model run. # The tiles in each of these directories and counted and logged. # If the model run isn't the standard one, the output directory and file names are changed - if sensit_type != 'std': - uu.print_log("Modifying output directory and file name pattern based on sensitivity analysis") - output_dir_list = uu.alter_dirs(sensit_type, output_dir_list) + if cn.SENSIT_TYPE != 'std': + uu.print_log('Modifying output directory and file name pattern based on sensitivity analysis') + output_dir_list = uu.alter_dirs(cn.SENSIT_TYPE, output_dir_list) # A date can optionally be provided by the full model script or a run of this script. # This replaces the date in constants_and_names. # Only done if output upload is enabled. - if run_date is not None and no_upload is not None: - output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date) + if cn.RUN_DATE is not None and cn.NO_UPLOAD is not None: + output_dir_list = uu.replace_output_dir_date(output_dir_list, cn.RUN_DATE) for output in output_dir_list: tile_count = uu.count_tiles_s3(output) - uu.print_log("Total tiles in", output, ": ", tile_count) + uu.print_log(f'Total tiles in {output}: {tile_count}') script_end = datetime.datetime.now() script_elapsed_time = script_end - script_start - uu.print_log(":::::Processing time for entire run:", script_elapsed_time, "\n") + uu.print_log(f':::::Processing time for entire run: {script_elapsed_time}', "\n") # If no_upload flag is not activated (by choice or by lack of AWS credentials), output is uploaded - if not no_upload: + if not cn.NO_UPLOAD: uu.upload_log() + if __name__ == '__main__': main() diff --git a/sensitivity_analysis/US_removal_rates.py b/sensitivity_analysis/US_removal_rates.py index 2b004476..0fc3136d 100644 --- a/sensitivity_analysis/US_removal_rates.py +++ b/sensitivity_analysis/US_removal_rates.py @@ -53,7 +53,7 @@ def US_removal_rate_calc(tile_id, gain_table_group_region_age_dict, gain_table_g start = datetime.datetime.now() # Names of the input tiles - gain = '{0}_{1}.tif'.format(cn.pattern_gain, tile_id) + gain = f'{cn.pattern_gain}_{tile_id}.tif' annual_gain_standard = '{0}_{1}.tif'.format(tile_id, cn.pattern_annual_gain_AGB_IPCC_defaults) # Used as the template extent/default for the US US_age_cat = '{0}_{1}.tif'.format(tile_id, cn.pattern_US_forest_age_cat_processed) US_forest_group = '{0}_{1}.tif'.format(tile_id, cn.pattern_FIA_forest_group_processed) diff --git a/sensitivity_analysis/legal_AMZ_loss.py b/sensitivity_analysis/legal_AMZ_loss.py index 2a3d6d87..26f093b7 100644 --- a/sensitivity_analysis/legal_AMZ_loss.py +++ b/sensitivity_analysis/legal_AMZ_loss.py @@ -14,7 +14,7 @@ def legal_Amazon_forest_age_category(tile_id, sensit_type, output_pattern): start = datetime.datetime.now() loss = '{0}_{1}.tif'.format(tile_id, cn.pattern_Brazil_annual_loss_processed) - gain = '{0}_{1}.tif'.format(cn.pattern_gain, tile_id) + gain = f'{cn.pattern_gain}_{tile_id}.tif' extent = '{0}_{1}.tif'.format(tile_id, cn.pattern_Brazil_forest_extent_2000_processed) biomass = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_WHRC_biomass_2000_non_mang_non_planted) plantations = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_planted_forest_type_unmasked) @@ -98,7 +98,7 @@ def tile_names(tile_id, sensit_type): # Names of the input files loss = '{0}_{1}.tif'.format(tile_id, cn.pattern_Brazil_annual_loss_processed) - gain = '{0}_{1}.tif'.format(cn.pattern_gain, tile_id) + gain = f'{cn.pattern_gain}_{tile_id}.tif' extent = '{0}_{1}.tif'.format(tile_id, cn.pattern_Brazil_forest_extent_2000_processed) biomass = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_WHRC_biomass_2000_non_mang_non_planted) @@ -118,7 +118,7 @@ def legal_Amazon_create_gain_year_count_loss_only(tile_id, sensit_type): # Pixels with loss only, in PRODES forest 2000 loss_calc = '--calc=(A>0)*(B==0)*(C==1)*(A-1)' - loss_outfilename = '{}_growth_years_loss_only.tif'.format(tile_id) + loss_outfilename = '{}_gain_year_count_loss_only.tif'.format(tile_id) loss_outfilearg = '--outfile={}'.format(loss_outfilename) cmd = ['gdal_calc.py', '-A', loss, '-B', gain, '-C', extent, loss_calc, loss_outfilearg, '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=DEFLATE', '--type', 'Byte', '--quiet'] @@ -128,7 +128,7 @@ def legal_Amazon_create_gain_year_count_loss_only(tile_id, sensit_type): uu.log_subprocess_output(process.stdout) # Prints information about the tile that was just processed - uu.end_of_fx_summary(start, tile_id, 'growth_years_loss_only') + uu.end_of_fx_summary(start, tile_id, 'gain_year_count_loss_only') # Creates gain year count tiles for pixels that had no loss. It doesn't matter if there was gain in these pixels because @@ -153,7 +153,7 @@ def legal_Amazon_create_gain_year_count_no_change(tile_id, sensit_type): # Pixels with loss but in areas with PRODES forest 2000 and biomass >0 (same as standard model) no_change_calc = '--calc=(A==0)*(B==1)*(C>0)*{}'.format(cn.loss_years) - no_change_outfilename = '{}_growth_years_no_change.tif'.format(tile_id) + no_change_outfilename = '{}_gain_year_count_no_change.tif'.format(tile_id) no_change_outfilearg = '--outfile={}'.format(no_change_outfilename) cmd = ['gdal_calc.py', '-A', loss_vrt, '-B', extent, '-C', biomass, no_change_calc, no_change_outfilearg, '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=DEFLATE', '--type', 'Byte', '--quiet'] @@ -163,7 +163,7 @@ def legal_Amazon_create_gain_year_count_no_change(tile_id, sensit_type): uu.log_subprocess_output(process.stdout) # Prints information about the tile that was just processed - uu.end_of_fx_summary(start, tile_id, 'growth_years_no_change') + uu.end_of_fx_summary(start, tile_id, 'gain_year_count_no_change') # Creates gain year count tiles for pixels that had both loss and gain @@ -179,7 +179,7 @@ def legal_Amazon_create_gain_year_count_loss_and_gain_standard(tile_id, sensit_t # Pixels with both loss and gain, and in PRODES forest 2000 loss_and_gain_calc = '--calc=((A>0)*(B==1)*(C==1)*((A-1)+({}+1-A)/2))'.format(cn.loss_years) - loss_and_gain_outfilename = '{}_growth_years_loss_and_gain.tif'.format(tile_id) + loss_and_gain_outfilename = f'{tile_id}_gain_year_count_loss_and_gain.tif' loss_and_gain_outfilearg = '--outfile={}'.format(loss_and_gain_outfilename) cmd = ['gdal_calc.py', '-A', loss, '-B', gain, '-C', extent, loss_and_gain_calc, loss_and_gain_outfilearg, '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=DEFLATE', '--type', 'Byte', '--quiet'] @@ -189,7 +189,7 @@ def legal_Amazon_create_gain_year_count_loss_and_gain_standard(tile_id, sensit_t uu.log_subprocess_output(process.stdout) # Prints information about the tile that was just processed - uu.end_of_fx_summary(start, tile_id, 'growth_years_loss_and_gain') + uu.end_of_fx_summary(start, tile_id, 'gain_year_count_loss_and_gain') # Merges the four gain year count tiles above to create a single gain year count tile @@ -201,9 +201,9 @@ def legal_Amazon_create_gain_year_count_merge(tile_id, output_pattern): start = datetime.datetime.now() # The four rasters from above that are to be merged - loss_outfilename = '{}_growth_years_loss_only.tif'.format(tile_id) - no_change_outfilename = '{}_growth_years_no_change.tif'.format(tile_id) - loss_and_gain_outfilename = '{}_growth_years_loss_and_gain.tif'.format(tile_id) + loss_outfilename = '{}_gain_year_count_loss_only.tif'.format(tile_id) + no_change_outfilename = '{}_gain_year_count_no_change.tif'.format(tile_id) + loss_and_gain_outfilename = '{}_gain_year_count_loss_and_gain.tif'.format(tile_id) # All four components are merged together to the final output raster age_outfile = '{}_{}.tif'.format(tile_id, output_pattern) diff --git a/sensitivity_analysis/mp_Mekong_loss.py b/sensitivity_analysis/mp_Mekong_loss.py index c282ac82..52c86219 100644 --- a/sensitivity_analysis/mp_Mekong_loss.py +++ b/sensitivity_analysis/mp_Mekong_loss.py @@ -26,7 +26,7 @@ def main (): tile_id_list = uu.tile_list_s3(cn.WHRC_biomass_2000_unmasked_dir) # tile_id_list = ['50N_130W'] # test tiles uu.print_log(tile_id_list) - uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n") + uu.print_log(f'There are {str(len(tile_id_list))} tiles to process', "\n") # Downloads the Mekong loss folder. Each year of loss has its own raster @@ -60,7 +60,8 @@ def main (): source_raster = loss_composite out_pattern = cn.pattern_Mekong_loss_processed dt = 'Byte' - pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt, no_upload=no_upload), tile_id_list) + pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), + tile_id_list) # This is necessary for changing NoData values to 0s (so they are recognized as 0s) pool.map(Mekong_loss.recode_tiles, tile_id_list) diff --git a/sensitivity_analysis/mp_Saatchi_biomass_prep.py b/sensitivity_analysis/mp_Saatchi_biomass_prep.py index fe7b49ae..2bad172a 100644 --- a/sensitivity_analysis/mp_Saatchi_biomass_prep.py +++ b/sensitivity_analysis/mp_Saatchi_biomass_prep.py @@ -27,7 +27,7 @@ def main (): # tile_id_list = ["00N_000E", "00N_050W", "00N_060W", "00N_010E", "00N_020E", "00N_030E", "00N_040E", "10N_000E", "10N_010E", "10N_010W", "10N_020E", "10N_020W"] # test tiles # tile_id_list = ['00N_110E'] # test tile uu.print_log(tile_id_list) - uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n") + uu.print_log(f'There are {str(len(tile_id_list))} tiles to process', "\n") # By definition, this script is for the biomass swap analysis (replacing WHRC AGB with Saatchi/JPL AGB) sensit_type = 'biomass_swap' @@ -40,7 +40,8 @@ def main (): out_pattern = cn.pattern_JPL_unmasked_processed dt = 'Float32' pool = multiprocessing.Pool(cn.count-5) # count-5 peaks at 320GB of memory - pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt, no_upload=no_upload), tile_id_list) + pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), + tile_id_list) # Checks if each tile has data in it. Only tiles with data are uploaded. upload_dir = cn.JPL_processed_dir diff --git a/sensitivity_analysis/mp_US_removal_rates.py b/sensitivity_analysis/mp_US_removal_rates.py index 6a547a0c..afa3a2fb 100644 --- a/sensitivity_analysis/mp_US_removal_rates.py +++ b/sensitivity_analysis/mp_US_removal_rates.py @@ -115,8 +115,8 @@ def main (): out_pattern = cn.pattern_US_forest_age_cat_processed dt = 'Int16' pool = multiprocessing.Pool(int(cn.count/2)) - pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt, - no_upload=no_upload), US_tile_id_list) + pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), + US_tile_id_list) uu.upload_final_set(cn.US_forest_age_cat_processed_dir, cn.pattern_US_forest_age_cat_processed) @@ -138,8 +138,8 @@ def main (): out_pattern = cn.pattern_FIA_forest_group_processed dt = 'Byte' pool = multiprocessing.Pool(int(cn.count/2)) - pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt, - no_upload=no_upload), US_tile_id_list) + pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), + US_tile_id_list) uu.upload_final_set(cn.FIA_forest_group_processed_dir, cn.pattern_FIA_forest_group_processed) diff --git a/sensitivity_analysis/mp_legal_AMZ_loss.py b/sensitivity_analysis/mp_legal_AMZ_loss.py index fc7a43de..394673df 100644 --- a/sensitivity_analysis/mp_legal_AMZ_loss.py +++ b/sensitivity_analysis/mp_legal_AMZ_loss.py @@ -46,11 +46,11 @@ def main (): # Checks the validity of the two arguments. If either one is invalid, the script ends. if (stage_input not in Brazil_stages): - uu.exception_log(no_upload, 'Invalid stage selection. Please provide a stage from', Brazil_stages) + uu.exception_log('Invalid stage selection. Please provide a stage from', Brazil_stages) else: pass if (run_through not in ['true', 'false']): - uu.exception_log(no_upload, 'Invalid run through option. Please enter true or false.') + uu.exception_log('Invalid run through option. Please enter true or false.') else: pass @@ -78,7 +78,7 @@ def main (): # tile_id_list = ["00N_000E", "00N_050W", "00N_060W", "00N_010E", "00N_020E", "00N_030E", "00N_040E", "10N_000E", "10N_010E", "10N_010W", "10N_020E", "10N_020W"] # test tiles # tile_id_list = ['50N_130W'] # test tiles uu.print_log(tile_id_list) - uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n") + uu.print_log(f'There are {str(len(tile_id_list))} tiles to process', "\n") # Downloads input rasters and lists them uu.s3_folder_download(cn.Brazil_forest_extent_2000_raw_dir, cn.docker_base_dir, sensit_type) @@ -109,8 +109,8 @@ def main (): out_pattern = cn.pattern_Brazil_forest_extent_2000_processed dt = 'Byte' pool = multiprocessing.Pool(int(cn.count/2)) - pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt, - no_upload=no_upload), tile_id_list) + pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), + tile_id_list) # Checks if each tile has data in it. Only tiles with data are uploaded. upload_dir = master_output_dir_list[0] @@ -126,7 +126,7 @@ def main (): tile_id_list = uu.tile_list_s3(cn.Brazil_forest_extent_2000_processed_dir) uu.print_log(tile_id_list) - uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n") + uu.print_log(f'There are {str(len(tile_id_list))} tiles to process', "\n") # Downloads input rasters and lists them cmd = ['aws', 's3', 'cp', cn.Brazil_annual_loss_raw_dir, '.', '--recursive'] @@ -163,8 +163,8 @@ def main (): out_pattern = cn.pattern_Brazil_annual_loss_processed dt = 'Byte' pool = multiprocessing.Pool(int(cn.count/2)) - pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt, - no_upload=no_upload), tile_id_list) + pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), + tile_id_list) uu.print_log(" PRODES composite loss raster warped to Hansen tiles") # Checks if each tile has data in it. Only tiles with data are uploaded. @@ -193,7 +193,7 @@ def main (): tile_id_list = uu.tile_list_s3(cn.Brazil_forest_extent_2000_processed_dir) # tile_id_list = ['00N_050W'] uu.print_log(tile_id_list) - uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n") + uu.print_log(f'There are {str(len(tile_id_list))} tiles to process', "\n") # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list @@ -205,7 +205,7 @@ def main (): # If the model run isn't the standard one, the output directory and file names are changed if sensit_type != 'std': - uu.print_log("Changing output directory and file name pattern based on sensitivity analysis") + uu.print_log('Changing output directory and file name pattern based on sensitivity analysis') stage_output_dir_list = uu.alter_dirs(sensit_type, master_output_dir_list) stage_output_pattern_list = uu.alter_patterns(sensit_type, master_output_pattern_list) @@ -250,7 +250,7 @@ def main (): tile_id_list = uu.tile_list_s3(cn.Brazil_forest_extent_2000_processed_dir) # tile_id_list = ['00N_050W'] uu.print_log(tile_id_list) - uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n") + uu.print_log(f'There are {str(len(tile_id_list))} tiles to process', "\n") # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list @@ -262,7 +262,7 @@ def main (): # If the model run isn't the standard one, the output directory and file names are changed if sensit_type != 'std': - uu.print_log("Changing output directory and file name pattern based on sensitivity analysis") + uu.print_log('Changing output directory and file name pattern based on sensitivity analysis') stage_output_dir_list = uu.alter_dirs(sensit_type, master_output_dir_list) stage_output_pattern_list = uu.alter_patterns(sensit_type, master_output_pattern_list) @@ -296,10 +296,10 @@ def main (): # legal_AMZ_loss.legal_Amazon_create_gain_year_count_merge(tile_id, output_pattern) # Intermediate output tiles for checking outputs - uu.upload_final_set(stage_output_dir_list[3], "growth_years_loss_only") - uu.upload_final_set(stage_output_dir_list[3], "growth_years_gain_only") - uu.upload_final_set(stage_output_dir_list[3], "growth_years_no_change") - uu.upload_final_set(stage_output_dir_list[3], "growth_years_loss_and_gain") + uu.upload_final_set(stage_output_dir_list[3], "gain_year_count_loss_only") + uu.upload_final_set(stage_output_dir_list[3], "gain_year_count_gain_only") + uu.upload_final_set(stage_output_dir_list[3], "gain_year_count_no_change") + uu.upload_final_set(stage_output_dir_list[3], "gain_year_count_loss_and_gain") # Uploads output from this stage uu.upload_final_set(stage_output_dir_list[3], stage_output_pattern_list[3]) @@ -322,13 +322,13 @@ def main (): tile_id_list = uu.tile_list_s3(cn.Brazil_forest_extent_2000_processed_dir) # tile_id_list = ['00N_050W'] uu.print_log(tile_id_list) - uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n") + uu.print_log(f'There are {str(len(tile_id_list))} tiles to process', "\n") # If the model run isn't the standard one, the output directory and file names are changed. # This adapts just the relevant items in the output directory and pattern lists (annual removals). if sensit_type != 'std': - uu.print_log("Changing output directory and file name pattern based on sensitivity analysis") + uu.print_log('Changing output directory and file name pattern based on sensitivity analysis') stage_output_dir_list = uu.alter_dirs(sensit_type, master_output_dir_list[4:6]) stage_output_pattern_list = uu.alter_patterns(sensit_type, master_output_pattern_list[4:6]) @@ -438,13 +438,13 @@ def main (): tile_id_list = uu.tile_list_s3(cn.Brazil_forest_extent_2000_processed_dir) # tile_id_list = ['00N_050W'] uu.print_log(tile_id_list) - uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n") + uu.print_log(f'There are {str(len(tile_id_list))} tiles to process', "\n") # If the model run isn't the standard one, the output directory and file names are changed. # This adapts just the relevant items in the output directory and pattern lists (cumulative removals). if sensit_type != 'std': - uu.print_log("Changing output directory and file name pattern based on sensitivity analysis") + uu.print_log('Changing output directory and file name pattern based on sensitivity analysis') stage_output_dir_list = uu.alter_dirs(sensit_type, master_output_dir_list[6:8]) stage_output_pattern_list = uu.alter_patterns(sensit_type, master_output_pattern_list[6:8]) @@ -510,13 +510,13 @@ def main (): tile_id_list = uu.tile_list_s3(cn.Brazil_forest_extent_2000_processed_dir) # tile_id_list = ['00N_050W'] uu.print_log(tile_id_list) - uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n") + uu.print_log(f'There are {str(len(tile_id_list))} tiles to process', "\n") # If the model run isn't the standard one, the output directory and file names are changed. # This adapts just the relevant items in the output directory and pattern lists (cumulative removals). if sensit_type != 'std': - uu.print_log("Changing output directory and file name pattern based on sensitivity analysis") + uu.print_log('Changing output directory and file name pattern based on sensitivity analysis') stage_output_dir_list = uu.alter_dirs(sensit_type, master_output_dir_list[8:10]) stage_output_pattern_list = uu.alter_patterns(sensit_type, master_output_pattern_list[8:10]) @@ -588,7 +588,7 @@ def main (): tile_id_list = uu.tile_list_s3(cn.Brazil_forest_extent_2000_processed_dir) # tile_id_list = ['00N_050W'] uu.print_log(tile_id_list) - uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n") + uu.print_log(f'There are {str(len(tile_id_list))} tiles to process', "\n") for key, values in download_dict.items(): dir = key @@ -597,7 +597,7 @@ def main (): # If the model run isn't the standard one, the output directory and file names are changed if sensit_type != 'std': - uu.print_log("Changing output directory and file name pattern based on sensitivity analysis") + uu.print_log('Changing output directory and file name pattern based on sensitivity analysis') stage_output_dir_list = uu.alter_dirs(sensit_type, master_output_dir_list[10:16]) stage_output_pattern_list = uu.alter_patterns(sensit_type, master_output_pattern_list[10:16]) @@ -675,7 +675,7 @@ def main (): uu.upload_final_set(stage_output_dir_list[0], stage_output_pattern_list[0]) else: - uu.exception_log(no_upload, "Extent argument not valid") + uu.exception_log("Extent argument not valid") uu.print_log("Creating tiles of belowground carbon") # 18 processors used between 300 and 400 GB memory, so it was okay on a r4.16xlarge spot machine @@ -749,7 +749,7 @@ def main (): uu.print_log("Skipping soil for 2000 carbon pool calculation") else: - uu.exception_log(no_upload, "Extent argument not valid") + uu.exception_log("Extent argument not valid") uu.print_log("Creating tiles of total carbon") # I tried several different processor numbers for this. Ended up using 14 processors, which used about 380 GB memory diff --git a/test/__init__.py b/test/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/test/carbon_pools/__init__.py b/test/carbon_pools/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/test/carbon_pools/test_carbon_pools.py b/test/carbon_pools/test_carbon_pools.py new file mode 100644 index 00000000..a164a3c9 --- /dev/null +++ b/test/carbon_pools/test_carbon_pools.py @@ -0,0 +1,64 @@ +import numpy as np +import pytest as pytest + +from ...carbon_pools.create_carbon_pools import create_deadwood_litter, arid_pools + + +# Use @pytest.mark.skip to skip tests if needed. + +def test_can_call_function(): + result = create_deadwood_litter("", {}, {}, [], "", True) + assert result is None + + +def test_can_call_with_biomass_swap(): + result = create_deadwood_litter("", {}, {}, [], "biomass_swap", True) + assert result is None + + +def test_arid_pools(): + result = arid_pools( + elevation_window=2000, + precip_window=1000, + bor_tem_trop_window=1, + natrl_forest_biomass_window=np.ma.array([1]), + deadwood_2000_output=np.ma.array([1]), + litter_2000_output=np.ma.array([1]) + ) + assert result == (np.ma.array([1.0094]), np.ma.array([1.0148])) + + +def test_arid_pools_with_no_deadwood_or_litter(): + result = arid_pools( + elevation_window=2000, + precip_window=1000, + bor_tem_trop_window=1, + natrl_forest_biomass_window=np.ma.array([1]), + deadwood_2000_output=np.ma.array([0]), + litter_2000_output=np.ma.array([0]) + ) + assert result == (np.ma.array([0.0094]), np.ma.array([0.0148])) + + +def test_arid_pools_no_biomass_means_none_is_added(): + result = arid_pools( + elevation_window=2000, + precip_window=1000, + bor_tem_trop_window=1, + natrl_forest_biomass_window=np.ma.array([0]), + deadwood_2000_output=np.ma.array([1]), + litter_2000_output=np.ma.array([1]) + ) + assert result == (np.ma.array([1]), np.ma.array([1])) + + +def test_arid_pools_fraction_of_biomass(): + result = arid_pools( + elevation_window=2000, + precip_window=1000, + bor_tem_trop_window=1, + natrl_forest_biomass_window=np.ma.array([0.5]), + deadwood_2000_output=np.ma.array([1]), + litter_2000_output=np.ma.array([1]) + ) + assert result == (np.ma.array([1.0047]), np.ma.array([1.0074])) \ No newline at end of file diff --git a/universal_util.py b/universal_util.py index 3585dcf0..3dfcfd25 100644 --- a/universal_util.py +++ b/universal_util.py @@ -39,10 +39,7 @@ def upload_log(): # Creates the log with a starting line -def initiate_log(tile_id_list=None, sensit_type=None, run_date=None, no_upload=None, - save_intermediates=None, stage_input=None, run_through=None, carbon_pool_extent=None, - emitted_pools=None, thresh=None, std_net_flux=None, - include_mangroves=None, include_us=None, log_note=None): +def initiate_log(tile_id_list): # For some reason, logging gets turned off when AWS credentials aren't provided. # This restores logging without AWS credentials. @@ -56,24 +53,29 @@ def initiate_log(tile_id_list=None, sensit_type=None, run_date=None, no_upload=N datefmt='%Y/%m/%d %I:%M:%S %p', level=logging.INFO) - logging.info("Log notes: {}".format(log_note)) - logging.info("Model version: {}".format(cn.version)) - logging.info("This is the start of the log for this model run. Below are the command line arguments for this run.") - logging.info("Sensitivity analysis type: {}".format(sensit_type)) - logging.info("Model stage argument: {}".format(stage_input)) - logging.info("Run model stages after the initial selected stage: {}".format(run_through)) - logging.info("Run date: {}".format(run_date)) - logging.info("Tile ID list: {}".format(tile_id_list)) - logging.info("Carbon emitted_pools to generate (optional): {}".format(carbon_pool_extent)) - logging.info("Emissions emitted_pools (optional): {}".format(emitted_pools)) - logging.info("TCD threshold for aggregated map (optional): {}".format(thresh)) - logging.info("Standard net flux for comparison with sensitivity analysis net flux (optional): {}".format(std_net_flux)) - logging.info("Include mangrove removal scripts in model run (optional): {}".format(include_mangroves)) - logging.info("Include US removal scripts in model run (optional): {}".format(include_us)) - logging.info("Do not upload anything to s3: {}".format(no_upload)) - logging.info("AWS credentials supplied: {}".format(check_aws_creds())) - logging.info("Save intermediate outputs: {}".format(save_intermediates)) - logging.info("AWS ec2 instance type and AMI ID:") + if cn.SENSIT_TYPE == 'std': + sensit_type = 'standard model' + else: + sensit_type = cn.SENSIT_TYPE + + logging.info(f'Log notes: {cn.LOG_NOTE}') + logging.info(f'Model version: {cn.version}') + logging.info(f'This is the start of the log for this model run. Below are the command line arguments for this run.') + logging.info(f'Sensitivity analysis type: {sensit_type}') + logging.info(f'Model stage argument: {cn.STAGE_INPUT}') + logging.info(f'Run model stages after the initial selected stage: {cn.RUN_THROUGH}') + logging.info(f'Run date: {cn.RUN_DATE}') + logging.info(f'Tile ID list: {tile_id_list}') + logging.info(f'Carbon emitted_pools to generate (optional): {cn.CARBON_POOL_EXTENT}') + logging.info(f'Emissions emitted_pools (optional): {cn.EMITTED_POOLS}') + logging.info(f'TCD threshold for aggregated map (optional): {cn.THRESH}') + logging.info(f'Standard net flux for comparison with sensitivity analysis net flux (optional): {cn.STD_NET_FLUX}') + logging.info(f'Include mangrove removal scripts in model run (optional): {cn.INCLUDE_MANGROVES}') + logging.info(f'Include US removal scripts in model run (optional): {cn.INCLUDE_US}') + logging.info(f'Do not upload anything to s3: {cn.NO_UPLOAD}') + logging.info(f'AWS credentials supplied: {check_aws_creds()}') + logging.info(f'Save intermediate outputs: {cn.SAVE_INTERMEDIATES}') + logging.info(f'AWS ec2 instance type and AMI ID:') # https://stackoverflow.com/questions/13735051/how-to-capture-curl-output-to-a-file # https://stackoverflow.com/questions/625644/how-to-get-the-instance-id-from-within-an-ec2-instance @@ -90,27 +92,27 @@ def initiate_log(tile_id_list=None, sensit_type=None, run_date=None, no_upload=N type_file = open("instance_type.txt", "r") type_lines = type_file.readlines() for line in type_lines: - logging.info(" Instance type: {}".format(line.strip())) + logging.info(f' Instance type: {line.strip()}') ami_file = open("ami_id.txt", "r") ami_lines = ami_file.readlines() for line in ami_lines: - logging.info(" AMI ID: {}".format(line.strip())) + logging.info(f' AMI ID: {line.strip()}') os.remove("ami_id.txt") os.remove("instance_type.txt") except: - logging.info(" Not running on AWS ec2 instance") + logging.info(' Not running on AWS ec2 instance') - logging.info("Available processors: {}".format(cn.count) + "\n") + logging.info(f"Available processors: {cn.count}") # Suppresses logging from rasterio and botocore below ERROR level for the entire model logging.getLogger("rasterio").setLevel(logging.ERROR) # https://www.tutorialspoint.com/How-to-disable-logging-from-imported-modules-in-Python logging.getLogger("botocore").setLevel(logging.ERROR) # "Found credentials in environment variables." is logged by botocore: https://github.com/boto/botocore/issues/1841 # If no_upload flag is not activated, log is uploaded - if not no_upload: + if not cn.NO_UPLOAD: upload_log() @@ -138,7 +140,7 @@ def print_log(*args): # Logs fatal errors to the log txt, uploads to s3, and then terminates the program with an exception in the console -def exception_log(no_upload, *args): +def exception_log(*args): # Empty string full_statement = str(object='') @@ -151,7 +153,7 @@ def exception_log(no_upload, *args): logging.info(full_statement, stack_info=True) # If no_upload flag is not activated (by choice or by lack of AWS credentials), output is uploaded - if not no_upload: + if not cn.NO_UPLOAD: # Need to upload log before the exception stops the script upload_log() @@ -165,7 +167,7 @@ def exception_log(no_upload, *args): def log_subprocess_output(pipe): # Reads all the output into a string - for full_out in iter(pipe.readline, b''): # b'\n'-separated lines + for full_out in iter(pipe.readline, b''): # b"\n"-separated lines # Separates the string into an array, where each entry is one line of output line_array = full_out.splitlines() @@ -198,7 +200,7 @@ def log_subprocess_output_full(cmd): with pipe: # Reads all the output into a string - for full_out in iter(pipe.readline, b''): # b'\n'-separated lines + for full_out in iter(pipe.readline, b''): # b"\n"-separated lines # Separates the string into an array, where each entry is one line of output line_array = full_out.splitlines() @@ -236,8 +238,7 @@ def check_storage(): used_storage = df_output_lines[5][2] available_storage = df_output_lines[5][3] percent_storage_used = df_output_lines[5][4] - print_log("Storage used:", used_storage, "; Available storage:", available_storage, - "; Percent storage used:", percent_storage_used) + print_log(f'Storage used: {used_storage}; Available storage: {available_storage}; Percent storage used: {percent_storage_used}') # Obtains the absolute number of RAM gigabytes currently in use by the entire system (all processors). @@ -252,8 +253,8 @@ def check_memory(): print_log(f"Memory usage is: {round(used_memory,2)} GB out of {round(total_memory,2)} = {round(percent_memory,1)}% usage") if percent_memory > 99: - print_log("WARNING: MEMORY USAGE DANGEROUSLY HIGH! TERMINATING PROGRAM.") # Not sure if this is necessary - exception_log("EXCEPTION: MEMORY USAGE DANGEROUSLY HIGH! TERMINATING PROGRAM.") + print_log('WARNING: MEMORY USAGE DANGEROUSLY HIGH! TERMINATING PROGRAM.') # Not sure if this is necessary + exception_log('EXCEPTION: MEMORY USAGE DANGEROUSLY HIGH! TERMINATING PROGRAM.') # Not currently using because it shows 1 when using with multiprocessing @@ -320,7 +321,7 @@ def tile_list_s3(source, sensit_type='std'): else: new_source = source.replace('standard', sensit_type) - print_log('\n' + "Creating list of tiles in", new_source) + print_log("\n" + f'Creating list of tiles in {new_source}') ## For an s3 folder in a bucket using AWSCLI # Captures the list of the files in the folder @@ -338,8 +339,8 @@ def tile_list_s3(source, sensit_type='std'): # Iterates through the text file to get the names of the tiles and appends them to list with open(os.path.join(cn.docker_tmp, 'tiles.txt'), 'r') as tile: for line in tile: - num = len(line.strip('\n').split(" ")) - tile_name = line.strip('\n').split(" ")[num - 1] + num = len(line.strip("\n").split(" ")) + tile_name = line.strip("\n").split(" ")[num - 1] # Only tifs will be in the tile list if '.tif' in tile_name: @@ -354,7 +355,7 @@ def tile_list_s3(source, sensit_type='std'): # In case the change of directories to look for sensitivity versions yields an empty folder. # This could be done better by using boto3 to check the potential s3 folders for files upfront but I couldn't figure # out how to do that. - print_log('\n' + "Creating list of tiles in", source) + print_log("\n" + f'Creating list of tiles in {source}') ## For an s3 folder in a bucket using AWSCLI # Captures the list of the files in the folder @@ -372,8 +373,8 @@ def tile_list_s3(source, sensit_type='std'): # Iterates through the text file to get the names of the tiles and appends them to list with open(os.path.join(cn.docker_tmp, 'tiles.txt'), 'r') as tile: for line in tile: - num = len(line.strip('\n').split(" ")) - tile_name = line.strip('\n').split(" ")[num - 1] + num = len(line.strip("\n").split(" ")) + tile_name = line.strip("\n").split(" ")[num - 1] # Only tifs will be in the tile list if '.tif' in tile_name: @@ -401,8 +402,8 @@ def tile_list_spot_machine(source, pattern): # Iterates through the text file to get the names of the tiles and appends them to list with open(os.path.join(cn.docker_tmp, 'tiles.txt'), 'r') as tile: for line in tile: - num = len(line.strip('\n').split(" ")) - tile_name = line.strip('\n').split(" ")[num - 1] + num = len(line.strip("\n").split(" ")) + tile_name = line.strip("\n").split(" ")[num - 1] # Only files with the specified pattern will be in the tile list if pattern in tile_name: @@ -415,7 +416,7 @@ def tile_list_spot_machine(source, pattern): # Creates a list of all tiles found in either two or three s3 folders and removes duplicates from the list def create_combined_tile_list(set1, set2, set3=None, sensit_type='std'): - print_log("Making a combined tile list...") + print_log('Making a combined tile list...') # Changes the directory to list tiles according to the model run. # Ff the model run is the biomass_swap or US_removals sensitivity analyses @@ -464,8 +465,8 @@ def create_combined_tile_list(set1, set2, set3=None, sensit_type='std'): with open("set1.txt", 'r') as tile: for line in tile: - num = len(line.strip('\n').split(" ")) - tile_name = line.strip('\n').split(" ")[num - 1] + num = len(line.strip("\n").split(" ")) + tile_name = line.strip("\n").split(" ")[num - 1] # Only tifs will be in the tile list if '.tif' in tile_name: @@ -477,8 +478,8 @@ def create_combined_tile_list(set1, set2, set3=None, sensit_type='std'): with open("set2.txt", 'r') as tile: for line in tile: - num = len(line.strip('\n').split(" ")) - tile_name = line.strip('\n').split(" ")[num - 1] + num = len(line.strip("\n").split(" ")) + tile_name = line.strip("\n").split(" ")[num - 1] # Only tifs will be in the tile list if '.tif' in tile_name: @@ -487,11 +488,11 @@ def create_combined_tile_list(set1, set2, set3=None, sensit_type='std'): file_list_set2.append(tile_id) if len(file_list_set1) > 1: - print_log("There are {} tiles in {}. Using this tile set.".format(len(file_list_set1), set1)) + print_log(f'There are {len(file_list_set1)} tiles in {set1}. Using this tile set.') else: - print_log("There are 0 tiles in {}. Looking for alternative tile set...".format(set1)) + print_log(f'There are 0 tiles in {set1}. Looking for alternative tile set...') set1 = set1.replace(sensit_type, 'standard') - print_log(" Looking for alternative tile set in {}".format(set1)) + print_log(f' Looking for alternative tile set in {set1}') # out = Popen(['aws', 's3', 'ls', set1, '--no-sign-request'], stdout=PIPE, stderr=STDOUT) out = Popen(['aws', 's3', 'ls', set1], stdout=PIPE, stderr=STDOUT) @@ -508,22 +509,22 @@ def create_combined_tile_list(set1, set2, set3=None, sensit_type='std'): with open("set1.txt", 'r') as tile: for line in tile: - num = len(line.strip('\n').split(" ")) - tile_name = line.strip('\n').split(" ")[num - 1] + num = len(line.strip("\n").split(" ")) + tile_name = line.strip("\n").split(" ")[num - 1] # Only tifs will be in the tile list if '.tif' in tile_name: tile_id = get_tile_id(tile_name) file_list_set1.append(tile_id) - print_log("There are {} tiles in {}. Using this tile set.".format(len(file_list_set1), set1)) + print_log(f'There are {len(file_list_set1)} tiles in {set1}. Using this tile set.') if len(file_list_set2) > 1: - print_log("There are {} tiles in {}. Using this tile set.".format(len(file_list_set2), set2)) + print_log(f'There are {len(file_list_set2)} tiles in {set2}. Using this tile set.') else: - print_log("There are 0 tiles in {}. Looking for alternative tile set.".format(set2)) + print_log(f'There are 0 tiles in {set2}. Looking for alternative tile set.') set2 = set2.replace(sensit_type, 'standard') - print_log(" Looking for alternative tile set in {}".format(set2)) + print_log(f' Looking for alternative tile set in {set2}') # out = Popen(['aws', 's3', 'ls', set2, '--no-sign-request'], stdout=PIPE, stderr=STDOUT) out = Popen(['aws', 's3', 'ls', set2], stdout=PIPE, stderr=STDOUT) @@ -539,20 +540,20 @@ def create_combined_tile_list(set1, set2, set3=None, sensit_type='std'): with open("set2.txt", 'r') as tile: for line in tile: - num = len(line.strip('\n').split(" ")) - tile_name = line.strip('\n').split(" ")[num - 1] + num = len(line.strip("\n").split(" ")) + tile_name = line.strip("\n").split(" ")[num - 1] # Only tifs will be in the tile list if '.tif' in tile_name: tile_id = get_tile_id(tile_name) file_list_set2.append(tile_id) - print_log("There are {} tiles in {}. Using this tile set.".format(len(file_list_set2), set2)) + print_log(f'There are {len(file_list_set2)} tiles in {set2}. Using this tile set.') # If there's a third folder supplied, iterates through that if set3 != None: - print_log("Third set of tiles input. Adding to first two sets of tiles...") + print_log('Third set of tiles input. Adding to first two sets of tiles...') if sensit_type == 'std': set3 = set3 @@ -573,15 +574,15 @@ def create_combined_tile_list(set1, set2, set3=None, sensit_type='std'): with open("set3.txt", 'r') as tile: for line in tile: - num = len(line.strip('\n').split(" ")) - tile_name = line.strip('\n').split(" ")[num - 1] + num = len(line.strip("\n").split(" ")) + tile_name = line.strip("\n").split(" ")[num - 1] # Only tifs will be in the tile list if '.tif' in tile_name: tile_id = get_tile_id(tile_name) file_list_set3.append(tile_id) - print_log("There are {} tiles in {}".format(len(file_list_set3), set3)) + print_log(f'There are {len(file_list_set3)} tiles in {set3}') # Combines both tile lists all_tiles = file_list_set1 + file_list_set2 @@ -629,8 +630,8 @@ def count_tiles_s3(source, pattern=None): # Iterates through the text file to get the names of the tiles and appends them to list with open(os.path.join(cn.docker_tmp, tile_list_name), 'r') as tile: for line in tile: - num = len(line.strip('\n').split(" ")) - tile_name = line.strip('\n').split(" ")[num - 1] + num = len(line.strip("\n").split(" ")) + tile_name = line.strip("\n").split(" ")[num - 1] # For gain, tcd, pixel area, and loss tiles (and their rewindowed versions), # which have the tile_id after the the pattern @@ -656,7 +657,6 @@ def count_tiles_s3(source, pattern=None): return len(file_list) - # Gets the bounding coordinates of a tile def coords(tile_id): NS = tile_id.split("_")[0][-1:] @@ -693,9 +693,9 @@ def s3_flexible_download(source_dir, pattern, dest, sensit_type, tile_id_list): for tile_id in tile_id_list: if pattern in [cn.pattern_gain, cn.pattern_tcd, cn.pattern_pixel_area, cn.pattern_loss, cn.pattern_gain_rewindow, cn.pattern_tcd_rewindow, cn.pattern_pixel_area_rewindow]: # For tiles that do not have the tile_id first - source = '{0}{1}_{2}.tif'.format(source_dir, pattern, tile_id) + source = f'{source_dir}{pattern}_{tile_id}.tif' else: # For every other type of tile - source = '{0}{1}_{2}.tif'.format(source_dir, tile_id, pattern) + source = f'{source_dir}{tile_id}_{pattern}.tif' s3_file_download(source, dest, sensit_type) @@ -719,7 +719,7 @@ def s3_folder_download(source, dest, sensit_type, pattern = None): local_tile_count = len(glob.glob('{}*.tif'.format(pattern))) - print_log("There are", local_tile_count, "tiles on the spot machine with the pattern", pattern) + print_log(f'There are {local_tile_count} tiles on the spot machine with the pattern {pattern}') # Changes the path to download from based on the sensitivity analysis being run and whether that particular input # has a sensitivity analysis path on s3 @@ -728,15 +728,15 @@ def s3_folder_download(source, dest, sensit_type, pattern = None): # Creates the appropriate path for getting sensitivity analysis tiles source_sens = source.replace('standard', sensit_type) - print_log("Attempting to change source directory {0} to {1} to reflect sensitivity analysis".format(source, source_sens)) + print_log(f'Attempting to change source directory {source} to {source_sens} to reflect sensitivity analysis') # Counts how many tiles are in the sensitivity analysis source s3 folder s3_count_sens = count_tiles_s3(source_sens) - print_log("There are", s3_count_sens, "tiles in sensitivity analysis folder", source_sens, "with the pattern", pattern) + print_log(f'There are {s3_count_sens} tiles in sensitivity analysis folder {source_sens} with the pattern {pattern}') # Counts how many tiles are in the standard model source s3 folder s3_count_std = count_tiles_s3(source) - print_log("There are", s3_count_std, "tiles in standard model folder", source, "with the pattern", pattern) + print_log(f'There are {s3_count_std} tiles in standard model folder {source} with the pattern {pattern}') # Decides which source folder to use the count from: standard model or sensitivity analysis. # If there are sensitivity analysis tiles, that source folder should be used. @@ -750,14 +750,14 @@ def s3_folder_download(source, dest, sensit_type, pattern = None): # If there are as many tiles on the spot machine with the relevant pattern as there are on s3, no tiles are downloaded if local_tile_count == s3_count: - print_log("Tiles with pattern", pattern, "are already on spot machine. Not downloading.", '\n') + print_log(f'Tiles with pattern {pattern} are already on spot machine. Not downloading.', "\n") return # If there appears to be a full set of tiles in the sensitivity analysis folder (7 is semi arbitrary), # the sensitivity folder is downloaded if s3_count > 7: - print_log("Source directory used:", source_final) + print_log(f'Source directory used: {source_final}') cmd = ['aws', 's3', 'cp', source_final, dest, '--no-sign-request', '--recursive', '--exclude', '*tiled/*', '--exclude', '*geojason', '--exclude', '*vrt', '--exclude', '*csv', '--no-progress'] @@ -765,7 +765,7 @@ def s3_folder_download(source, dest, sensit_type, pattern = None): # '--exclude', '*geojason', '--exclude', '*vrt', '--exclude', '*csv'] log_subprocess_output_full(cmd) - print_log('\n') + print_log("\n") # If there are fewer than 7 files in the sensitivity folder (i.e., either folder doesn't exist or it just has # a few test tiles), the standard folder is downloaded. @@ -773,7 +773,7 @@ def s3_folder_download(source, dest, sensit_type, pattern = None): # for this date. else: - print_log("Source directory used:", source) + print_log(f'Source directory used: {source}') cmd = ['aws', 's3', 'cp', source, dest, '--no-sign-request', '--recursive', '--exclude', '*tiled/*', '--exclude', '*geojason', '--exclude', '*vrt', '--exclude', '*csv', '--no-progress'] @@ -781,21 +781,21 @@ def s3_folder_download(source, dest, sensit_type, pattern = None): # '--exclude', '*geojason', '--exclude', '*vrt', '--exclude', '*csv'] log_subprocess_output_full(cmd) - print_log('\n') + print_log("\n") # For the standard model, the standard folder is downloaded. else: # Counts how many tiles are in the source s3 folder s3_count = count_tiles_s3(source, pattern=pattern) - print_log("There are", s3_count, "tiles at", source, "with the pattern", pattern) + print_log(f'There are {s3_count} tiles at {source} with the pattern {pattern}') # If there are as many tiles on the spot machine with the relevant pattern as there are on s3, no tiles are downloaded if local_tile_count == s3_count: - print_log("Tiles with pattern", pattern, "are already on spot machine. Not downloading.", '\n') + print_log(f'Tiles with pattern {pattern} are already on spot machine. Not downloading.', "\n") return - print_log("Tiles with pattern", pattern, "are not on spot machine. Downloading...") + print_log(f'Tiles with pattern {pattern} are not on spot machine. Downloading...') cmd = ['aws', 's3', 'cp', source, dest, '--no-sign-request', '--recursive', '--exclude', '*tiled/*', '--exclude', '*geojason', '--exclude', '*vrt', '--exclude', '*csv', '--no-progress'] @@ -804,7 +804,7 @@ def s3_folder_download(source, dest, sensit_type, pattern = None): log_subprocess_output_full(cmd) - print_log('\n') + print_log("\n") # Downloads individual tiles from s3 @@ -832,13 +832,13 @@ def s3_file_download(source, dest, sensit_type): file_name_sens = file_name[:-4] + '_' + sensit_type + '.tif' # Doesn't download the tile if sensitivity version is already on the spot machine - print_log("Option 1: Checking if {} is already on spot machine...".format(file_name_sens)) + print_log(f'Option 1: Checking if {file_name_sens} is already on spot machine...') if os.path.exists(file_name_sens): - print_log(" Option 1 success:", file_name_sens, "already downloaded", "\n") + print_log(f' Option 1 success: {file_name_sens} already downloaded', "\n") return else: - print_log(" Option 1 failure: {0} is not already on spot machine.".format(file_name_sens)) - print_log("Option 2: Checking for sensitivity analysis tile {0}/{1} on s3...".format(dir_sens[15:], file_name_sens)) + print_log(f' Option 1 failure: {file_name_sens} is not already on spot machine.') + print_log(f'Option 2: Checking for sensitivity analysis tile {dir_sens[15:]}/{file_name_sens} on s3...') # If not already downloaded, first tries to download the sensitivity analysis version # cmd = ['aws', 's3', 'cp', '{0}/{1}'.format(dir_sens, file_name_sens), dest, '--no-sign-request', '--only-show-errors'] @@ -846,22 +846,22 @@ def s3_file_download(source, dest, sensit_type): log_subprocess_output_full(cmd) if os.path.exists(file_name_sens): - print_log(" Option 2 success: Sensitivity analysis tile {0}/{1} found on s3 and downloaded".format(dir_sens, file_name_sens), "\n") + print_log(f' Option 2 success: Sensitivity analysis tile {dir_sens}/{file_name_sens} found on s3 and downloaded', "\n") return else: - print_log(" Option 2 failure: Tile {0}/{1} not found on s3. Looking for standard model source...".format(dir_sens, file_name_sens)) + print_log(f' Option 2 failure: Tile {dir_sens}/{file_name_sens} not found on s3. Looking for standard model source...') # Next option is to use standard version of tile if on spot machine. # This can happen despite it being a sensitivity run because this input file doesn't have a sensitivity version # for this date. - print_log("Option 3: Checking if standard version {} is already on spot machine...".format(file_name)) + print_log(f'Option 3: Checking if standard version {file_name} is already on spot machine...') if os.path.exists(file_name): - print_log(" Option 3 success:", file_name, "already downloaded", "\n") + print_log(f' Option 3 success: {file_name} already downloaded', "\n") return else: - print_log(" Option 3 failure: {} is not already on spot machine. ".format(file_name)) - print_log("Option 4: Looking for standard version of {} to download...".format(file_name)) + print_log(f' Option 3 failure: {file_name} is not already on spot machine. ') + print_log(f'Option 4: Looking for standard version of {file_name} to download...') # If not already downloaded, final option is to try to download the standard version of the tile. # If this doesn't work, the script throws a fatal error because no variant of this tile was found. @@ -870,20 +870,20 @@ def s3_file_download(source, dest, sensit_type): log_subprocess_output_full(cmd) if os.path.exists(file_name): - print_log(" Option 4 success: Standard tile {} found on s3 and downloaded".format(source), "\n") + print_log(f' Option 4 success: Standard tile {source} found on s3 and downloaded', "\n") return else: - print_log(" Option 4 failure: Tile {0} not found on s3. Tile not found but it seems it should be. Check file paths and names.".format(source), "\n") + print_log(f' Option 4 failure: Tile {source} not found on s3. Tile not found but it seems it should be. Check file paths and names.', "\n") # If not a sensitivity run or a tile type without sensitivity analysis variants, the standard file is downloaded else: - print_log("Option 1: Checking if {} is already on spot machine...".format(file_name)) + print_log(f'Option 1: Checking if {file_name} is already on spot machine...') if os.path.exists(os.path.join(dest, file_name)): - print_log(" Option 1 success:", os.path.join(dest, file_name), "already downloaded", "\n") + print_log(f' Option 1 success: {os.path.join(dest, file_name)} already downloaded', "\n") return else: - print_log(" Option 1 failure: {0} is not already on spot machine.".format(file_name)) - print_log("Option 2: Checking for tile {} on s3...".format(source)) + print_log(f' Option 1 failure: {file_name} is not already on spot machine.') + print_log(f'Option 2: Checking for tile {source} on s3...') # If the tile isn't already downloaded, download is attempted @@ -893,23 +893,23 @@ def s3_file_download(source, dest, sensit_type): cmd = ['aws', 's3', 'cp', source, dest, '--only-show-errors'] log_subprocess_output_full(cmd) if os.path.exists(os.path.join(dest, file_name)): - print_log(" Option 2 success: Tile {} found on s3 and downloaded".format(source), "\n") + print_log(f' Option 2 success: Tile {source} found on s3 and downloaded', "\n") return else: - print_log(" Option 2 failure: Tile {} not found on s3. Tile not found but it seems it should be. Check file paths and names.".format(source), "\n") + print_log(f' Option 2 failure: Tile {source} not found on s3. Tile not found but it seems it should be. Check file paths and names.', "\n") # Uploads all tiles of a pattern to specified location def upload_final_set(upload_dir, pattern): - print_log("Uploading tiles with pattern {0} to {1}".format(pattern, upload_dir)) + print_log(f'Uploading tiles with pattern {pattern} to {upload_dir}') cmd = ['aws', 's3', 'cp', cn.docker_base_dir, upload_dir, '--exclude', '*', '--include', '*{}*tif'.format(pattern), '--recursive', '--no-progress'] try: log_subprocess_output_full(cmd) - print_log(" Upload of tiles with {} pattern complete!".format(pattern)) + print_log(f' Upload of tiles with {pattern} pattern complete!') except: - print_log("Error uploading output tile(s)") + print_log('Error uploading output tile(s)') # Uploads the log as each model output tile set is finished upload_log() @@ -927,7 +927,7 @@ def upload_final(upload_dir, tile_id, pattern): try: log_subprocess_output_full(cmd) except: - print_log("Error uploading output tile") + print_log('Error uploading output tile') # This version of checking for data is bad because it can miss tiles that have very little data in them. @@ -935,7 +935,7 @@ def upload_final(upload_dir, tile_id, pattern): # This method creates a tif.aux.xml file that I tried to add a line to delete but couldn't get to work. def check_and_delete_if_empty_light(tile_id, output_pattern): - tile_name = '{0}_{1}.tif'.format(tile_id, output_pattern) + tile_name = f'{tile_id}_{output_pattern}.tif' # Source: http://gis.stackexchange.com/questions/90726 # Opens raster and chooses band to find min, max @@ -945,9 +945,9 @@ def check_and_delete_if_empty_light(tile_id, output_pattern): print_log(" Tile stats = Minimum=%.3f, Maximum=%.3f, Mean=%.3f, StdDev=%.3f" % (stats[0], stats[1], stats[2], stats[3])) if stats[0] != 0: - print_log(" Data found in {}. Keeping file...".format(tile_name)) + print_log(f' Data found in {tile_name}. Keeping file...') else: - print_log(" No data found. Deleting {}...".format(tile_name)) + print_log(f' No data found. Deleting {tile_name}...') os.remove(tile_name) # Using this gdal data check method creates a tif.aux.xml file that is unnecessary. @@ -960,49 +960,49 @@ def check_for_data(tile): with rasterio.open(tile) as img: msk = img.read_masks(1).astype(bool) if msk[msk].size == 0: - # print_log("Tile {} is empty".format(tile)) + # print_log(f"Tile {tile} is empty") return True else: - # print_log("Tile {} is not empty".format(tile)) + # print_log(f"Tile {tile} is not empty") return False def check_and_delete_if_empty(tile_id, output_pattern): - tile_name = '{0}_{1}.tif'.format(tile_id, output_pattern) + tile_name = f'{tile_id}_{output_pattern}.tif' # Only checks for data if the tile exists if not os.path.exists(tile_name): - print_log(tile_name, "does not exist. Skipping check of whether there is data.") + print_log(f'{tile_name} does not exist. Skipping check of whether there is data.') return - print_log("Checking if {} contains any data...".format(tile_name)) + print_log(f'Checking if {tile_name} contains any data...') no_data = check_for_data(tile_name) if no_data: - print_log(" No data found in {}. Deleting tile...".format(tile_name)) + print_log(f' No data found in {tile_name}. Deleting tile...') os.remove(tile_name) else: - print_log(" Data found in {}. Keeping tile to copy to s3...".format(tile_name)) + print_log(f' Data found in {tile_name}. Keeping tile to copy to s3...') # Checks if there's data in a tile and, if so, uploads it to s3 def check_and_upload(tile_id, upload_dir, pattern): - print_log("Checking if {} contains any data...".format(tile_id)) - out_tile = '{0}_{1}.tif'.format(tile_id, pattern) + print_log(f'Checking if {tile_id} contains any data...') + out_tile = f'{tile_id}_{pattern}.tif' no_data = check_for_data(out_tile) if no_data: - print_log(" No data found. Not copying {}.".format(tile_id)) + print_log(f' No data found. Not copying {tile_id}.') else: - print_log(" Data found in {}. Copying tile to s3...".format(tile_id)) + print_log(f' Data found in {tile_id}. Copying tile to s3...') upload_final(upload_dir, tile_id, pattern) - print_log(" Tile copied to s3") + print_log(' Tile copied to s3') # Prints the number of tiles that have been processed so far @@ -1010,7 +1010,7 @@ def count_completed_tiles(pattern): completed = len(glob.glob1(cn.docker_base_dir, '*{}*'.format(pattern))) - print_log("Number of completed or in-progress tiles:", completed) + print_log(f'Number of completed or in-progress tiles: {completed}') # Returns the NoData value of a raster @@ -1028,25 +1028,25 @@ def get_raster_nodata_value(tile): # Prints information about the tile that was just processed: how long it took and how many tiles have been completed -def end_of_fx_summary(start, tile_id, pattern, no_upload): +def end_of_fx_summary(start, tile_id, pattern): # Checking memory at this point (end of the function) seems to record memory usage when it is at its peak check_memory() end = datetime.datetime.now() elapsed_time = end-start - print_log("Processing time for tile", tile_id, ":", elapsed_time) + print_log(f'Processing time for tile {tile_id}: {elapsed_time}') count_completed_tiles(pattern) # If no_upload flag is not activated, log is uploaded - if not no_upload: + if not cn.NO_UPLOAD: # Uploads the log as each tile is finished upload_log() # Warps raster to Hansen tiles using multiple processors -def mp_warp_to_Hansen(tile_id, source_raster, out_pattern, dt, no_upload): +def mp_warp_to_Hansen(tile_id, source_raster, out_pattern, dt): # Start time start = datetime.datetime.now() @@ -1054,7 +1054,7 @@ def mp_warp_to_Hansen(tile_id, source_raster, out_pattern, dt, no_upload): print_log("Getting extent of", tile_id) xmin, ymin, xmax, ymax = coords(tile_id) - out_tile = '{0}_{1}.tif'.format(tile_id, out_pattern) + out_tile = f'{tile_id}_{out_pattern}.tif' cmd = ['gdalwarp', '-t_srs', 'EPSG:4326', '-co', 'COMPRESS=DEFLATE', '-tr', str(cn.Hansen_res), str(cn.Hansen_res), '-tap', '-te', str(xmin), str(ymin), str(xmax), str(ymax), '-dstnodata', '0', '-ot', dt, '-overwrite', source_raster, out_tile] @@ -1062,7 +1062,7 @@ def mp_warp_to_Hansen(tile_id, source_raster, out_pattern, dt, no_upload): with process.stdout: log_subprocess_output(process.stdout) - end_of_fx_summary(start, tile_id, out_pattern, no_upload) + end_of_fx_summary(start, tile_id, out_pattern) def warp_to_Hansen(in_file, out_file, xmin, ymin, xmax, ymax, dt): @@ -1093,27 +1093,27 @@ def rasterize(in_shape, out_tif, xmin, ymin, xmax, ymax, blocksizex, blocksizey, # Creates a tile of all 0s for any tile passed to it. # Uses the Hansen loss tile for information about the tile. # Based on https://gis.stackexchange.com/questions/220753/how-do-i-create-blank-geotiff-with-same-spatial-properties-as-existing-geotiff -def make_blank_tile(tile_id, pattern, folder, sensit_type): +def make_blank_tile(tile_id, pattern, folder): # Creates tile names for standard and sensitivity analyses. # Going into this, the function doesn't know whether there should be a standard tile or a sensitivity tile. # Thus, it has to be prepared for either one. - file_name = '{0}{1}_{2}.tif'.format(folder, tile_id, pattern) - file_name_sens = '{0}{1}_{2}_{3}.tif'.format(folder, tile_id, pattern, sensit_type) + file_name = f'{folder}{tile_id}_{pattern}.tif' + file_name_sens = f'{folder}{tile_id}_{pattern}_{cn.SENSIT_TYPE}.tif' # Checks if the standard file exists. If it does, a blank tile isn't created. if os.path.exists(file_name): - print_log('{} exists. Not creating a blank tile.'.format(os.path.join(folder, file_name))) + print_log(f'{os.path.join(folder, file_name)} exists. Not creating a blank tile.') return # Checks if the sensitivity analysis file exists. If it does, a blank tile isn't created. elif os.path.exists(file_name_sens): - print_log('{} exists. Not creating a blank tile.'.format(os.path.join(folder, file_name_sens))) + print_log(f'{os.path.join(folder, file_name_sens)} exists. Not creating a blank tile.') return # If neither a standard tile nor a sensitivity analysis tile exists, a blank tile is created. else: - print_log('{} does not exist. Creating a blank tile.'.format(file_name)) + print_log(f'{file_name} does not exist. Creating a blank tile.') with open(os.path.join(cn.docker_tmp, cn.blank_tile_txt), 'a') as f: f.write('{0}_{1}.tif'.format(tile_id, pattern)) @@ -1123,8 +1123,8 @@ def make_blank_tile(tile_id, pattern, folder, sensit_type): # Preferentially uses Hansen loss tile as the template for creating a blank plantation tile # (tile extent, resolution, pixel alignment, compression, etc.). # If the tile is already on the spot machine, it uses the downloaded tile. - if os.path.exists(os.path.join(folder, '{0}_{1}.tif'.format(cn.pattern_loss, tile_id))): - print_log("Hansen loss tile exists for {}. Using that as template for blank tile.".format(tile_id)) + if os.path.exists(os.path.join(folder, f'{cn.pattern_loss}_{tile_id}.tif')): + print_log(f'Hansen loss tile exists for {tile_id}. Using that as template for blank tile.') cmd = ['gdal_merge.py', '-createonly', '-init', '0', '-co', 'COMPRESS=DEFLATE', '-ot', 'Byte', '-o', '{0}{1}_{2}.tif'.format(folder, tile_id, pattern), '{0}{1}_{2}.tif'.format(folder, cn.pattern_loss, tile_id)] @@ -1135,7 +1135,7 @@ def make_blank_tile(tile_id, pattern, folder, sensit_type): s3_file_download('{0}{1}_{2}.tif'.format(cn.pixel_area_dir, cn.pattern_pixel_area, tile_id), os.path.join(folder, '{0}_{1}.tif'.format(tile_id, 'empty_tile_template')), 'std') - print_log("Downloaded pixel area tile for", tile_id, "to create a blank tile") + print_log(f'Downloaded pixel area tile for {tile_id} to create a blank tile') # Determines what pattern to use (standard or sensitivity) based on the first tile in the list tile_list= tile_list_spot_machine(folder, pattern) @@ -1147,7 +1147,7 @@ def make_blank_tile(tile_id, pattern, folder, sensit_type): '-o', '{0}/{1}_{2}.tif'.format(folder, tile_id, full_pattern), '{0}/{1}_{2}.tif'.format(folder, tile_id, 'empty_tile_template')] check_call(cmd) - print_log("Created raster of all 0s for", file_name) + print_log(f'Created raster of all 0s for {file_name}') # Creates a txt that will have blank dummy tiles listed in it for certain scripts that need those @@ -1161,22 +1161,22 @@ def create_blank_tile_txt(): def list_and_delete_blank_tiles(): blank_tiles_list = open(os.path.join(cn.docker_tmp, cn.blank_tile_txt)).read().splitlines() - print_log("Blank tile list:", blank_tiles_list) + print_log(f'Blank tile list: {blank_tiles_list}') - print_log("Deleting blank tiles...") + print_log('Deleting blank tiles...') for blank_tile in blank_tiles_list: os.remove(blank_tile) - print_log("Deleting blank tile textfile...") + print_log('Deleting blank tile textfile...') os.remove(os.path.join(cn.docker_tmp, cn.blank_tile_txt)) # Reformats the patterns for the 10x10 degree model output tiles for the aggregated output names -def name_aggregated_output(pattern, thresh, sensit_type): +def name_aggregated_output(pattern, thresh): out_pattern = re.sub('ha_', '', pattern) # print out_pattern - out_pattern = re.sub('2001_{}'.format(cn.loss_years), 'per_year', out_pattern) + out_pattern = re.sub(f'2001_{cn.loss_years}', 'per_year', out_pattern) # print out_pattern out_pattern = re.sub('gross_emis_year', 'gross_emis_per_year', out_pattern) # print out_pattern @@ -1187,12 +1187,7 @@ def name_aggregated_output(pattern, thresh, sensit_type): date = datetime.datetime.now() date_formatted = date.strftime("%Y%m%d") - # print thresh - # print cn.pattern_aggreg - # print sensit_type - # print date_formatted - - out_name = '{0}_tcd{1}_{2}_{3}_{4}'.format(out_pattern, thresh, cn.pattern_aggreg, sensit_type, date_formatted) + out_name = f'{out_pattern}_tcd{thresh}_{cn.pattern_aggreg}_{cn.SENSIT_TYPE}_{date_formatted}' # print out_name @@ -1204,7 +1199,7 @@ def mask_pre_2000_plantation(pre_2000_plant, tile_to_mask, out_name, tile_id): if os.path.exists(pre_2000_plant): - print_log("Pre-2000 plantation exists for {}. Cutting out pixels in those plantations...".format(tile_id)) + print_log(f'Pre-2000 plantation exists for {tile_id}. Cutting out pixels in those plantations...') # In order to mask out the pre-2000 plantation pixels from the loss raster, the pre-2000 plantations need to # become a vrt. I couldn't get gdal_calc to work while keeping pre-2000 plantations as a raster; it wasn't @@ -1229,12 +1224,12 @@ def mask_pre_2000_plantation(pre_2000_plant, tile_to_mask, out_name, tile_id): return else: - print_log("No pre-2000 plantation exists for {}. Tile done.".format(tile_id)) + print_log(f'No pre-2000 plantation exists for {tile_id}. Tile done.') # print tile_to_mask # print out_name copyfile(tile_to_mask, out_name) - print_log(" Pre-2000 plantations for {} complete".format(tile_id)) + print_log(f' Pre-2000 plantations for {tile_id} complete') # Checks whether the provided sensitivity analysis type is valid @@ -1242,7 +1237,7 @@ def check_sensit_type(sensit_type): # Checks the validity of the two arguments. If either one is invalid, the script ends. if (sensit_type not in cn.sensitivity_list): - exception_log('Invalid model type. Please provide a model type from {}.'.format(cn.sensitivity_list)) + exception_log(f'Invalid model type. Please provide a model type from {cn.sensitivity_list}.') else: pass @@ -1250,22 +1245,22 @@ def check_sensit_type(sensit_type): # Changes the name of the input or output directory according to the sensitivity analysis def alter_dirs(sensit_type, raw_dir_list): - print_log("Raw output directory list:", raw_dir_list) + print_log(f'Raw output directory list: {raw_dir_list}') processed_dir_list = [d.replace('standard', sensit_type) for d in raw_dir_list] - print_log("Processed output directory list:", processed_dir_list, "\n") + print_log(f'Processed output directory list: {processed_dir_list}', "\n") return processed_dir_list # Alters the file patterns in a list according to the sensitivity analysis def alter_patterns(sensit_type, raw_pattern_list): - print_log("Raw output pattern list:", raw_pattern_list) + print_log(f'Raw output pattern list: {raw_pattern_list}') processed_pattern_list = [(d + '_' + sensit_type) for d in raw_pattern_list] - print_log("Processed output pattern list:", processed_pattern_list, "\n") + print_log(f'Processed output pattern list: {processed_pattern_list}', "\n") return processed_pattern_list @@ -1275,10 +1270,10 @@ def sensit_tile_rename(sensit_type, tile_id, raw_pattern): # print '{0}_{1}_{2}.tif'.format(tile_id, raw_pattern, sensit_type) # Uses whatever name of the tile is found on the spot machine - if os.path.exists('{0}_{1}_{2}.tif'.format(tile_id, raw_pattern, sensit_type)): - processed_name = '{0}_{1}_{2}.tif'.format(tile_id, raw_pattern, sensit_type) + if os.path.exists(f'{tile_id}_{raw_pattern}_{sensit_type}.tif'): + processed_name = f'{tile_id}_{raw_pattern}_{sensit_type}.tif' else: - processed_name = '{0}_{1}.tif'.format(tile_id, raw_pattern) + processed_name = f'{tile_id}_{raw_pattern}.tif' return processed_name @@ -1323,7 +1318,7 @@ def analysis_stages(stage_list, stage_input, run_through, sensit_type, def tile_id_list_check(tile_id_list): if tile_id_list == 'all': - print_log("All tiles will be run through model. Actual list of tiles will be listed for each model stage as it begins...") + print_log('All tiles will be run through model. Actual list of tiles will be listed for each model stage as it begins...') return tile_id_list # Checks tile id list input validity against the pixel area tiles else: @@ -1340,28 +1335,30 @@ def tile_id_list_check(tile_id_list): for tile_id in tile_id_list: if tile_id not in possible_tile_list: - exception_log('Tile_id {} not valid'.format(tile_id)) + exception_log(f'Tile_id {tile_id} not valid') else: - print_log("{} tiles have been supplied for running through the model".format(str(len(tile_id_list))), "\n") + print_log(f'{str(len(tile_id_list))} tiles have been supplied for running through the model', "\n") return tile_id_list # Replaces the date specified in constants_and_names with the date provided by the model run-through def replace_output_dir_date(output_dir_list, run_date): - print_log("Changing output directory date based on date provided with model run-through") + print_log('Changing output directory date based on date provided with model run-through') output_dir_list = [output_dir.replace(output_dir[-9:-1], run_date) for output_dir in output_dir_list] print_log(output_dir_list, "\n") return output_dir_list # Adds various metadata tags to the raster -def add_rasterio_tags(output_dst, sensit_type): +def add_universal_metadata_rasterio(output_dst): # based on https://rasterio.readthedocs.io/en/latest/topics/tags.html - if sensit_type == 'std': + if cn.SENSIT_TYPE == 'std': sensit_type = 'standard model' + else: + sensit_type = cn.SENSIT_TYPE output_dst.update_tags( model_version=cn.version) @@ -1374,70 +1371,62 @@ def add_rasterio_tags(output_dst, sensit_type): output_dst.update_tags( citation='Harris et al. 2021 Nature Climate Change https://www.nature.com/articles/s41558-020-00976-6') output_dst.update_tags( - model_year_range='2001 through 20{}'.format(cn.loss_years) + model_year_range=f'2001 through 20{cn.loss_years}' ) return output_dst -def add_universal_metadata_tags(output_raster, sensit_type): +def add_universal_metadata_gdal(output_raster): print_log("Adding universal metadata tags to", output_raster) - cmd = ['gdal_edit.py', '-mo', 'model_version={}'.format(cn.version), - '-mo', 'date_created={}'.format(date_today), - '-mo', 'model_type={}'.format(sensit_type), + cmd = ['gdal_edit.py', + '-mo', f'model_version={cn.version}', + '-mo', f'date_created={date_today}', + '-mo', f'model_type={cn.SENSIT_TYPE}', '-mo', 'originator=Global Forest Watch at the World Resources Institute', - '-mo', 'model_year_range=2001 through 20{}'.format(cn.loss_years), + '-mo', f'model_year_range=2001 through 20{cn.loss_years}', output_raster] log_subprocess_output_full(cmd) -# Adds metadata tags to raster. -# Certain tags are included for all rasters, while other tags can be customized for each input set. -def add_metadata_tags(tile_id, output_pattern, sensit_type, metadata_list): +# Adds metadata tags to the output rasters +def add_emissions_metadata(tile_id, output_pattern): - output_raster = '{0}_{1}.tif'.format(tile_id, output_pattern) - - print_log("Adding metadata tags to", output_raster) - - # Universal metadata tags - cmd = ['gdal_edit.py', '-mo', 'model_version={}'.format(cn.version), - '-mo', 'date_created={}'.format(date_today), - '-mo', 'model_type={}'.format(sensit_type), - '-mo', 'originator=Global Forest Watch at the World Resources Institute', - '-mo', 'model_year_range=2001 through 20{}'.format(cn.loss_years)] - - # Metadata tags specifically for this dataset - for metadata in metadata_list: - cmd += ['-mo', metadata] - - cmd += [output_raster] + # Adds metadata tags to output rasters + add_universal_metadata_gdal(f'{tile_id}_{output_pattern}.tif') + cmd = ['gdal_edit.py', '-mo', + f'units=Mg CO2e/ha over model duration (2001-20{cn.loss_years})', + '-mo', 'source=many data sources', + '-mo', 'extent=Tree cover loss pixels within model extent (and tree cover loss driver, if applicable)', + f'{tile_id}_{output_pattern}.tif'] log_subprocess_output_full(cmd) + # Converts 10x10 degree Hansen tiles that are in windows of 40000x1 pixels to windows of 160x160 pixels, # which is the resolution of the output tiles. This allows the 30x30 m pixels in each window to be summed # into 0.04x0.04 degree rasters. -def rewindow(tile_id, download_pattern_name, no_upload): +def rewindow(tile_id, download_pattern_name): # start time start = datetime.datetime.now() # These tiles have the tile_id after the pattern if download_pattern_name in [cn.pattern_pixel_area, cn.pattern_tcd, cn.pattern_gain, cn.pattern_loss]: - in_tile = "{0}_{1}.tif".format(download_pattern_name, tile_id) - out_tile = "{0}_rewindow_{1}.tif".format(download_pattern_name, tile_id) + in_tile = f'{download_pattern_name}_{tile_id}.tif' + out_tile = f'{download_pattern_name}_rewindow_{tile_id}.tif' else: - in_tile = "{0}_{1}.tif".format(tile_id, download_pattern_name) - out_tile = "{0}_{1}_rewindow.tif".format(tile_id, download_pattern_name) + in_tile = f'{tile_id}_{download_pattern_name}.tif' + out_tile = f'{tile_id}_{download_pattern_name}_rewindow.tif' check_memory() # Only rewindows if the tile exists if os.path.exists(in_tile): - print_log("{0} exists. Rewindowing to {1} at {2}x{3} pixel windows...".format(in_tile, out_tile, cn.agg_pixel_window, cn.agg_pixel_window)) + print_log(f'{in_tile} exists. Rewindowing to {out_tile} at {cn.agg_pixel_window}x{cn.agg_pixel_window} pixel windows...') # Just using gdalwarp inflated the output rasters about 10x, even with COMPRESS=LZW. # Solution was to use gdal_translate instead, although, for unclear reasons, this still inflates the size @@ -1449,7 +1438,7 @@ def rewindow(tile_id, download_pattern_name, no_upload): else: - print_log("{} does not exist. Not rewindowing".format(in_tile)) + print_log(f'{in_tile} does not exist. Not rewindowing') # Prints information about the tile that was just processed - end_of_fx_summary(start, tile_id, "{}_rewindow".format(download_pattern_name), no_upload) + end_of_fx_summary(start, tile_id, "{}_rewindow".format(download_pattern_name)) From 4876fd8fd7207ec183a1ada52e2dded2ca85080a Mon Sep 17 00:00:00 2001 From: dagibbs22 Date: Sat, 27 Aug 2022 00:09:51 -0400 Subject: [PATCH 4/9] Feature/single processing flag (#27) * Added a command line argument `--single-processor` or `-sp` to run_full_model.py and each model step through net flux that sets whether the tile processing is done with the multiprocessing module or not. This involved adding another if...else statement (or sometimes statements) to each step to have it use the correct processing route. Also changed readme.md to add the new argument. * Ran 00N_000E locally for all model steps with single and multiprocessing options to make sure both still worked after this reconfiguration. Both worked. Single processing took (no uploading of outputs): 1 hour 23 minutes Multi-processing took (no uploading of outputs): 1 hour 11 minutes --- analyses/mp_net_flux.py | 39 +-- carbon_pools/mp_create_carbon_pools.py | 245 ++++++++++-------- constants_and_names.py | 58 +++-- data_prep/mp_model_extent.py | 43 +-- emissions/mp_calculate_gross_emissions.py | 95 +++---- readme.md | 3 +- ...nual_gain_rate_AGC_BGC_all_forest_types.py | 45 ++-- removals/mp_annual_gain_rate_IPCC_defaults.py | 13 +- removals/mp_annual_gain_rate_mangrove.py | 40 +-- removals/mp_forest_age_category_IPCC.py | 47 ++-- .../mp_gain_year_count_all_forest_types.py | 190 +++++++------- .../mp_gross_removals_all_forest_types.py | 41 +-- run_full_model.py | 9 +- universal_util.py | 1 + 14 files changed, 461 insertions(+), 408 deletions(-) diff --git a/analyses/mp_net_flux.py b/analyses/mp_net_flux.py index 09be0515..3bc15fc7 100644 --- a/analyses/mp_net_flux.py +++ b/analyses/mp_net_flux.py @@ -67,25 +67,25 @@ def mp_net_flux(tile_id_list): output_dir_list = uu.replace_output_dir_date(output_dir_list, cn.RUN_DATE) - # Creates a single filename pattern to pass to the multiprocessor call - pattern = output_pattern_list[0] - if cn.count == 96: - if cn.SENSIT_TYPE == 'biomass_swap': - processes = 32 # 32 processors = XXX GB peak - else: - processes = 40 # 38 = 690 GB peak; 40 = 715 GB peak - else: - processes = 9 - uu.print_log(f'Net flux max processors={processes}') - with multiprocessing.Pool(processes) as pool: - pool.map(partial(net_flux.net_calc, pattern=pattern), - tile_id_list) - pool.close() - pool.join() + if cn.SINGLE_PROCESSOR: + for tile_id in tile_id_list: + net_flux.net_calc(tile_id, output_pattern_list[0]) - # # For single processor use - # for tile_id in tile_id_list: - # net_flux.net_calc(tile_id, output_pattern_list[0]) + else: + pattern = output_pattern_list[0] + if cn.count == 96: + if cn.SENSIT_TYPE == 'biomass_swap': + processes = 32 # 32 processors = XXX GB peak + else: + processes = 40 # 38 = 690 GB peak; 40 = 715 GB peak + else: + processes = 9 + uu.print_log(f'Net flux max processors={processes}') + with multiprocessing.Pool(processes) as pool: + pool.map(partial(net_flux.net_calc, pattern=pattern), + tile_id_list) + pool.close() + pool.join() # If cn.NO_UPLOAD flag is not activated (by choice or by lack of AWS credentials), output is uploaded @@ -106,12 +106,15 @@ def mp_net_flux(tile_id_list): help='Date of run. Must be format YYYYMMDD.') parser.add_argument('--no-upload', '-nu', action='store_true', help='Disables uploading of outputs to s3') + parser.add_argument('--single-processor', '-sp', action='store_true', + help='Uses single processing rather than multiprocessing') args = parser.parse_args() # Sets global variables to the command line arguments cn.SENSIT_TYPE = args.model_type cn.RUN_DATE = args.run_date cn.NO_UPLOAD = args.no_upload + cn.SINGLE_PROCESSOR = args.single_processor tile_id_list = args.tile_id_list diff --git a/carbon_pools/mp_create_carbon_pools.py b/carbon_pools/mp_create_carbon_pools.py index 9481eb27..3b567651 100644 --- a/carbon_pools/mp_create_carbon_pools.py +++ b/carbon_pools/mp_create_carbon_pools.py @@ -199,28 +199,31 @@ def mp_create_carbon_pools(tile_id_list, carbon_pool_extent): cn.litter_to_above_subtrop_mang) uu.print_log(f'Creating tiles of aboveground carbon in {carbon_pool_extent}') - if cn.count == 96: - # More processors can be used for loss carbon pools than for 2000 carbon pools - if carbon_pool_extent == 'loss': - if cn.SENSIT_TYPE == 'biomass_swap': - processes = 16 # 16 processors = XXX GB peak - else: - processes = 20 # 25 processors > 750 GB peak; 16 = 560 GB peak; - # 18 = 570 GB peak; 19 = 620 GB peak; 20 = 690 GB peak (stops at 600, then increases slowly); 21 > 750 GB peak - else: # For 2000, or loss & 2000 - processes = 15 # 12 processors = 490 GB peak (stops around 455, then increases slowly); 15 = XXX GB peak + + if cn.SINGLE_PROCESSOR: + for tile_id in tile_id_list: + create_carbon_pools.create_AGC(tile_id, carbon_pool_extent) + else: - processes = 2 - uu.print_log(f'AGC loss year max processors={processes}') - with multiprocessing.Pool(processes) as pool: - pool.map(partial(create_carbon_pools.create_AGC, carbon_pool_extent=carbon_pool_extent), - tile_id_list) - pool.close() - pool.join() - - # # For single processor use - # for tile_id in tile_id_list: - # create_carbon_pools.create_AGC(tile_id, carbon_pool_extent) + if cn.count == 96: + # More processors can be used for loss carbon pools than for 2000 carbon pools + if carbon_pool_extent == 'loss': + if cn.SENSIT_TYPE == 'biomass_swap': + processes = 16 # 16 processors = XXX GB peak + else: + processes = 20 # 25 processors > 750 GB peak; 16 = 560 GB peak; + # 18 = 570 GB peak; 19 = 620 GB peak; 20 = 690 GB peak (stops at 600, then increases slowly); 21 > 750 GB peak + else: # For 2000, or loss & 2000 + processes = 15 # 12 processors = 490 GB peak (stops around 455, then increases slowly); 15 = XXX GB peak + else: + processes = 2 + uu.print_log(f'AGC loss year max processors={processes}') + with multiprocessing.Pool(processes) as pool: + pool.map(partial(create_carbon_pools.create_AGC, carbon_pool_extent=carbon_pool_extent), + tile_id_list) + pool.close() + pool.join() + # If cn.NO_UPLOAD flag is not activated (by choice or by lack of AWS credentials), output is uploaded if not cn.NO_UPLOAD: @@ -247,29 +250,31 @@ def mp_create_carbon_pools(tile_id_list, carbon_pool_extent): uu.print_log(f'Creating tiles of belowground carbon in {carbon_pool_extent}') - # Creates a single filename pattern to pass to the multiprocessor call - if cn.count == 96: - # More processors can be used for loss carbon pools than for 2000 carbon pools - if carbon_pool_extent == 'loss': - if cn.SENSIT_TYPE == 'biomass_swap': - processes = 30 # 30 processors = XXX GB peak - else: - processes = 39 # 20 processors = 370 GB peak; 32 = 590 GB peak; 36 = 670 GB peak; 38 = 690 GB peak; 39 = XXX GB peak - else: # For 2000, or loss & 2000 - processes = 30 # 20 processors = 370 GB peak; 25 = 460 GB peak; 30 = XXX GB peak + + if cn.SINGLE_PROCESSOR: + for tile_id in tile_id_list: + create_carbon_pools.create_BGC(tile_id, mang_BGB_AGB_ratio, carbon_pool_extent) + else: - processes = 2 - uu.print_log(f'BGC max processors={processes}') - with multiprocessing.Pool(processes) as pool: - pool.map(partial(create_carbon_pools.create_BGC, mang_BGB_AGB_ratio=mang_BGB_AGB_ratio, - carbon_pool_extent=carbon_pool_extent), - tile_id_list) - pool.close() - pool.join() - - # # For single processor use - # for tile_id in tile_id_list: - # create_carbon_pools.create_BGC(tile_id, mang_BGB_AGB_ratio, carbon_pool_extent) + if cn.count == 96: + # More processors can be used for loss carbon pools than for 2000 carbon pools + if carbon_pool_extent == 'loss': + if cn.SENSIT_TYPE == 'biomass_swap': + processes = 30 # 30 processors = XXX GB peak + else: + processes = 39 # 20 processors = 370 GB peak; 32 = 590 GB peak; 36 = 670 GB peak; 38 = 690 GB peak; 39 = XXX GB peak + else: # For 2000, or loss & 2000 + processes = 30 # 20 processors = 370 GB peak; 25 = 460 GB peak; 30 = XXX GB peak + else: + processes = 2 + uu.print_log(f'BGC max processors={processes}') + with multiprocessing.Pool(processes) as pool: + pool.map(partial(create_carbon_pools.create_BGC, mang_BGB_AGB_ratio=mang_BGB_AGB_ratio, + carbon_pool_extent=carbon_pool_extent), + tile_id_list) + pool.close() + pool.join() + # If cn.NO_UPLOAD flag is not activated (by choice or by lack of AWS credentials), output is uploaded if not cn.NO_UPLOAD: @@ -303,35 +308,39 @@ def mp_create_carbon_pools(tile_id_list, carbon_pool_extent): uu.print_log(f'Creating tiles of deadwood and litter carbon in {carbon_pool_extent}') - if cn.count == 96: - # More processors can be used for loss carbon pools than for 2000 carbon pools - if carbon_pool_extent == 'loss': - if cn.SENSIT_TYPE == 'biomass_swap': - processes = 10 # 10 processors = XXX GB peak - else: - # 32 processors = >750 GB peak; 24 > 750 GB peak; 14 = 685 GB peak (stops around 600, then increases very very slowly); - # 15 = 700 GB peak once but also too much memory another time, so back to 14 - processes = 14 - else: # For 2000, or loss & 2000 - ### Note: deleted precip, elevation, and WHRC AGB tiles at equatorial latitudes as deadwood and litter were produced. - ### There wouldn't have been enough room for all deadwood and litter otherwise. - ### For example, when deadwood and litter generation started getting up to around 50N, I deleted - ### 00N precip, elevation, and WHRC AGB. I deleted all of those from 30N to 20S. - processes = 16 # 7 processors = 320 GB peak; 14 = 620 GB peak; 16 = XXX GB peak + + if cn.SINGLE_PROCESSOR: + # For single processor use + for tile_id in tile_id_list: + create_carbon_pools.create_deadwood_litter(tile_id, mang_deadwood_AGB_ratio, mang_litter_AGB_ratio, carbon_pool_extent) + else: - processes = 2 - uu.print_log(f'Deadwood and litter max processors={processes}') - with multiprocessing.Pool(processes) as pool: - pool.map(partial(create_carbon_pools.create_deadwood_litter, mang_deadwood_AGB_ratio=mang_deadwood_AGB_ratio, - mang_litter_AGB_ratio=mang_litter_AGB_ratio, - carbon_pool_extent=carbon_pool_extent), - tile_id_list) - pool.close() - pool.join() - - # # For single processor use - # for tile_id in tile_id_list: - # create_carbon_pools.create_deadwood_litter(tile_id, mang_deadwood_AGB_ratio, mang_litter_AGB_ratio, carbon_pool_extent) + if cn.count == 96: + # More processors can be used for loss carbon pools than for 2000 carbon pools + if carbon_pool_extent == 'loss': + if cn.SENSIT_TYPE == 'biomass_swap': + processes = 10 # 10 processors = XXX GB peak + else: + # 32 processors = >750 GB peak; 24 > 750 GB peak; 14 = 685 GB peak (stops around 600, then increases very very slowly); + # 15 = 700 GB peak once but also too much memory another time, so back to 14 + processes = 14 + else: # For 2000, or loss & 2000 + ### Note: deleted precip, elevation, and WHRC AGB tiles at equatorial latitudes as deadwood and litter were produced. + ### There wouldn't have been enough room for all deadwood and litter otherwise. + ### For example, when deadwood and litter generation started getting up to around 50N, I deleted + ### 00N precip, elevation, and WHRC AGB. I deleted all of those from 30N to 20S. + processes = 16 # 7 processors = 320 GB peak; 14 = 620 GB peak; 16 = XXX GB peak + else: + processes = 2 + uu.print_log(f'Deadwood and litter max processors={processes}') + with multiprocessing.Pool(processes) as pool: + pool.map(partial(create_carbon_pools.create_deadwood_litter, mang_deadwood_AGB_ratio=mang_deadwood_AGB_ratio, + mang_litter_AGB_ratio=mang_litter_AGB_ratio, + carbon_pool_extent=carbon_pool_extent), + tile_id_list) + pool.close() + pool.join() + # If cn.NO_UPLOAD flag is not activated (by choice or by lack of AWS credentials), output is uploaded if not cn.NO_UPLOAD: @@ -375,27 +384,30 @@ def mp_create_carbon_pools(tile_id_list, carbon_pool_extent): else: pattern = output_pattern_list[10] - if cn.count == 96: - # More processors can be used for loss carbon pools than for 2000 carbon pools - if carbon_pool_extent == 'loss': - if cn.SENSIT_TYPE == 'biomass_swap': - processes = 36 # 36 processors = XXX GB peak - else: - processes = 44 # 24 processors = 360 GB peak; 32 = 490 GB peak; 38 = 580 GB peak; 42 = 640 GB peak; 44 = XXX GB peak - else: # For 2000, or loss & 2000 - processes = 12 # 12 processors = XXX GB peak + if cn.SINGLE_PROCESSOR: + # For single processor use + for tile_id in tile_id_list: + create_carbon_pools.create_soil_emis_extent(tile_id, pattern) + else: - processes = 2 - uu.print_log(f'Soil carbon loss year max processors={processes}') - with multiprocessing.Pool(processes) as pool: - pool.map(partial(create_carbon_pools.create_soil_emis_extent, pattern=pattern), - tile_id_list) - pool.close() - pool.join() + if cn.count == 96: + # More processors can be used for loss carbon pools than for 2000 carbon pools + if carbon_pool_extent == 'loss': + if cn.SENSIT_TYPE == 'biomass_swap': + processes = 36 # 36 processors = XXX GB peak + else: + processes = 44 # 24 processors = 360 GB peak; 32 = 490 GB peak; 38 = 580 GB peak; 42 = 640 GB peak; 44 = XXX GB peak + else: # For 2000, or loss & 2000 + processes = 12 # 12 processors = XXX GB peak + else: + processes = 2 + uu.print_log(f'Soil carbon loss year max processors={processes}') + with multiprocessing.Pool(processes) as pool: + pool.map(partial(create_carbon_pools.create_soil_emis_extent, pattern=pattern), + tile_id_list) + pool.close() + pool.join() - # # For single processor use - # for tile_id in tile_id_list: - # create_carbon_pools.create_soil_emis_extent(tile_id, pattern) # If cn.NO_UPLOAD flag is not activated (by choice or by lack of AWS credentials), output is uploaded if not cn.NO_UPLOAD: @@ -414,9 +426,6 @@ def mp_create_carbon_pools(tile_id_list, carbon_pool_extent): uu.check_storage() - # 825 GB isn't enough space to create deadwood and litter 2000 while having AGC and BGC 2000 on. - # Thus must delete BGC and soil C 2000 for creation of deadwood and litter, then copy them back to spot machine - # for total C 2000 calculation. if '2000' in carbon_pool_extent: # Files to download for total C 2000. Previously deleted to save space @@ -432,27 +441,30 @@ def mp_create_carbon_pools(tile_id_list, carbon_pool_extent): uu.print_log('Creating tiles of total carbon') - if cn.count == 96: - # More processors can be used for loss carbon pools than for 2000 carbon pools - if carbon_pool_extent == 'loss': - if cn.SENSIT_TYPE == 'biomass_swap': - processes = 14 # 14 processors = XXX GB peak - else: - processes = 19 # 20 processors > 750 GB peak (by just a bit, I think); 15 = 550 GB peak; 18 = 660 GB peak; 19 = XXX GB peak - else: # For 2000, or loss & 2000 - processes = 12 # 12 processors = XXX GB peak + + if cn.SINGLE_PROCESSOR: + for tile_id in tile_id_list: + create_carbon_pools.create_total_C(tile_id, carbon_pool_extent) + else: - processes = 2 - uu.print_log(f'Total carbon loss year max processors={processes}') - with multiprocessing.Pool(processes) as pool: - pool.map(partial(create_carbon_pools.create_total_C, carbon_pool_extent=carbon_pool_extent), - tile_id_list) - pool.close() - pool.join() - - # # For single processor use - # for tile_id in tile_id_list: - # create_carbon_pools.create_total_C(tile_id, carbon_pool_extent) + if cn.count == 96: + # More processors can be used for loss carbon pools than for 2000 carbon pools + if carbon_pool_extent == 'loss': + if cn.SENSIT_TYPE == 'biomass_swap': + processes = 14 # 14 processors = XXX GB peak + else: + processes = 19 # 20 processors > 750 GB peak (by just a bit, I think); 15 = 550 GB peak; 18 = 660 GB peak; 19 = XXX GB peak + else: # For 2000, or loss & 2000 + processes = 12 # 12 processors = XXX GB peak + else: + processes = 2 + uu.print_log(f'Total carbon loss year max processors={processes}') + with multiprocessing.Pool(processes) as pool: + pool.map(partial(create_carbon_pools.create_total_C, carbon_pool_extent=carbon_pool_extent), + tile_id_list) + pool.close() + pool.join() + # If cn.NO_UPLOAD flag is not activated (by choice or by lack of AWS credentials), output is uploaded if not cn.NO_UPLOAD: @@ -475,20 +487,23 @@ def mp_create_carbon_pools(tile_id_list, carbon_pool_extent): help=f'{cn.model_type_arg_help}') parser.add_argument('--tile_id_list', '-l', required=True, help='List of tile ids to use in the model. Should be of form 00N_110E or 00N_110E,00N_120E or all.') - parser.add_argument('--carbon_pool_extent', '-ce', required=True, - help='Extent over which carbon emitted_pools should be calculated: loss, 2000, loss,2000, or 2000,loss') parser.add_argument('--run-date', '-d', required=False, help='Date of run. Must be format YYYYMMDD.') parser.add_argument('--no-upload', '-nu', action='store_true', help='Disables uploading of outputs to s3') + parser.add_argument('--single-processor', '-sp', action='store_true', + help='Uses single processing rather than multiprocessing') parser.add_argument('--save-intermediates', '-si', action='store_true', help='Saves intermediate model outputs rather than deleting them to save storage') + parser.add_argument('--carbon_pool_extent', '-ce', required=True, + help='Extent over which carbon emitted_pools should be calculated: loss, 2000, loss,2000, or 2000,loss') args = parser.parse_args() # Sets global variables to the command line arguments cn.SENSIT_TYPE = args.model_type cn.RUN_DATE = args.run_date cn.NO_UPLOAD = args.no_upload + cn.SINGLE_PROCESSOR = args.single_processor cn.SAVE_INTERMEDIATES = args.save_intermediates cn.CARBON_POOL_EXTENT = args.carbon_pool_extent # Tells the pool creation functions to calculate carbon emitted_pools as they were at the year of loss in loss pixels only diff --git a/constants_and_names.py b/constants_and_names.py index 4491237e..453ffc32 100644 --- a/constants_and_names.py +++ b/constants_and_names.py @@ -37,6 +37,8 @@ INCLUDE_US = False global SAVE_INTERMEDIATES SAVE_INTERMEDIATES = True +global SINGLE_PROCESSOR +SINGLE_PROCESSOR = True global LOG_NOTE LOG_NOTE = '' @@ -141,7 +143,7 @@ ### Model extent ###### pattern_model_extent = 'model_extent' -model_extent_dir = os.path.join(s3_base_dir, 'model_extent/standard/20220309/') +model_extent_dir = os.path.join(s3_base_dir, 'model_extent/standard/20229999/') ###### ### Biomass tiles @@ -303,7 +305,7 @@ # Age categories over entire model extent, as a precursor to assigning IPCC default removal rates pattern_age_cat_IPCC = 'forest_age_category_IPCC__1_young_2_mid_3_old' -age_cat_IPCC_dir = os.path.join(s3_base_dir, 'forest_age_category_IPCC/standard/20220309/') +age_cat_IPCC_dir = os.path.join(s3_base_dir, 'forest_age_category_IPCC/standard/20229999/') ### US-specific removal precursors @@ -362,31 +364,31 @@ # Annual aboveground biomass removals rate using IPCC default removal rates pattern_annual_gain_AGB_IPCC_defaults = 'annual_removal_factor_AGB_Mg_ha_IPCC_defaults_all_ages' -annual_gain_AGB_IPCC_defaults_dir = os.path.join(s3_base_dir, 'annual_removal_factor_AGB_IPCC_defaults_all_ages/standard/20220309/') +annual_gain_AGB_IPCC_defaults_dir = os.path.join(s3_base_dir, 'annual_removal_factor_AGB_IPCC_defaults_all_ages/standard/20229999/') # Annual aboveground biomass removals rate using IPCC default removal rates pattern_annual_gain_BGB_IPCC_defaults = 'annual_removal_factor_BGB_Mg_ha_IPCC_defaults_all_ages' -annual_gain_BGB_IPCC_defaults_dir = os.path.join(s3_base_dir, 'annual_removal_factor_BGB_IPCC_defaults_all_ages/standard/20220309/') +annual_gain_BGB_IPCC_defaults_dir = os.path.join(s3_base_dir, 'annual_removal_factor_BGB_IPCC_defaults_all_ages/standard/20229999/') ### Annual composite removal factor # Annual aboveground removals rate for all forest types pattern_annual_gain_AGC_all_types = 'annual_removal_factor_AGC_Mg_ha_all_forest_types' -annual_gain_AGC_all_types_dir = os.path.join(s3_base_dir, 'annual_removal_factor_AGC_all_forest_types/standard/20220309/') +annual_gain_AGC_all_types_dir = os.path.join(s3_base_dir, 'annual_removal_factor_AGC_all_forest_types/standard/20229999/') # Annual belowground removals rate for all forest types pattern_annual_gain_BGC_all_types = 'annual_removal_factor_BGC_Mg_ha_all_forest_types' -annual_gain_BGC_all_types_dir = os.path.join(s3_base_dir, 'annual_removal_factor_BGC_all_forest_types/standard/20220309/') +annual_gain_BGC_all_types_dir = os.path.join(s3_base_dir, 'annual_removal_factor_BGC_all_forest_types/standard/20229999/') # Annual aboveground+belowground removals rate for all forest types pattern_annual_gain_AGC_BGC_all_types = 'annual_removal_factor_AGC_BGC_Mg_ha_all_forest_types' -annual_gain_AGC_BGC_all_types_dir = os.path.join(s3_base_dir, 'annual_removal_factor_AGC_BGC_all_forest_types/standard/20220309/') +annual_gain_AGC_BGC_all_types_dir = os.path.join(s3_base_dir, 'annual_removal_factor_AGC_BGC_all_forest_types/standard/20229999/') ### Removal forest types (sources) # Forest type used in removals model pattern_removal_forest_type = 'removal_forest_type' -removal_forest_type_dir = os.path.join(s3_base_dir, 'removal_forest_type/standard/20220309/') +removal_forest_type_dir = os.path.join(s3_base_dir, 'removal_forest_type/standard/20229999/') # Removal model forest type codes mangrove_rank = 6 @@ -401,26 +403,26 @@ # Number of removals years for all forest types pattern_gain_year_count = 'gain_year_count_all_forest_types' -gain_year_count_dir = os.path.join(s3_base_dir, 'gain_year_count_all_forest_types/standard/20220309/') +gain_year_count_dir = os.path.join(s3_base_dir, 'gain_year_count_all_forest_types/standard/20229999/') ### Cumulative gross carbon dioxide removals # Gross aboveground removals for all forest types pattern_cumul_gain_AGCO2_all_types = 'gross_removals_AGCO2_Mg_ha_all_forest_types_2001_{}'.format(loss_years) -cumul_gain_AGCO2_all_types_dir = os.path.join(s3_base_dir, 'gross_removals_AGCO2_all_forest_types/standard/per_hectare/20220309/') +cumul_gain_AGCO2_all_types_dir = os.path.join(s3_base_dir, 'gross_removals_AGCO2_all_forest_types/standard/per_hectare/20229999/') # Gross belowground removals for all forest types pattern_cumul_gain_BGCO2_all_types = 'gross_removals_BGCO2_Mg_ha_all_forest_types_2001_{}'.format(loss_years) -cumul_gain_BGCO2_all_types_dir = os.path.join(s3_base_dir, 'gross_removals_BGCO2_all_forest_types/standard/per_hectare/20220309/') +cumul_gain_BGCO2_all_types_dir = os.path.join(s3_base_dir, 'gross_removals_BGCO2_all_forest_types/standard/per_hectare/20229999/') # Gross aboveground and belowground removals for all forest types in all pixels pattern_cumul_gain_AGCO2_BGCO2_all_types = 'gross_removals_AGCO2_BGCO2_Mg_ha_all_forest_types_2001_{}'.format(loss_years) -cumul_gain_AGCO2_BGCO2_all_types_dir = os.path.join(s3_base_dir, 'gross_removals_AGCO2_BGCO2_all_forest_types/standard/full_extent/per_hectare/20220309/') +cumul_gain_AGCO2_BGCO2_all_types_dir = os.path.join(s3_base_dir, 'gross_removals_AGCO2_BGCO2_all_forest_types/standard/full_extent/per_hectare/20229999/') # Gross aboveground and belowground removals for all forest types in pixels within forest extent pattern_cumul_gain_AGCO2_BGCO2_all_types_forest_extent = 'gross_removals_AGCO2_BGCO2_Mg_ha_all_forest_types_forest_extent_2001_{}'.format(loss_years) -cumul_gain_AGCO2_BGCO2_all_types_forest_extent_dir = os.path.join(s3_base_dir, 'gross_removals_AGCO2_BGCO2_all_forest_types/standard/forest_extent/per_hectare/20220309/') +cumul_gain_AGCO2_BGCO2_all_types_forest_extent_dir = os.path.join(s3_base_dir, 'gross_removals_AGCO2_BGCO2_all_forest_types/standard/forest_extent/per_hectare/20229999/') ###### @@ -456,7 +458,7 @@ ## Carbon emitted_pools in loss year # Date to include in the output directory for all emissions year carbon emitted_pools -emis_pool_run_date = '20220309' +emis_pool_run_date = '20229999' # Aboveground carbon in the year of emission for all forest types in loss pixels pattern_AGC_emis_year = "Mg_AGC_ha_emis_year" @@ -513,7 +515,7 @@ # Soil C full extent but just from SoilGrids250 (mangrove soil C layer not added in) # Not used in model. pattern_soil_C_full_extent_2000_non_mang = 'soil_C_ha_full_extent_2000_non_mangrove_Mg_ha' -soil_C_full_extent_2000_non_mang_dir = os.path.join(base_carbon_pool_dir, 'soil_carbon/intermediate_full_extent/no_mangrove/20220414/') +soil_C_full_extent_2000_non_mang_dir = os.path.join(base_carbon_pool_dir, 'soil_carbon/intermediate_full_extent/no_mangrove/20210414/') # Soil C full extent (all soil pixels, with mangrove soil C in Giri mangrove extent getting priority over mineral soil C) # Non-mangrove C is 0-30 cm, mangrove C is 0-100 cm @@ -532,7 +534,7 @@ ### Emissions from biomass and soil (all carbon emitted_pools) # Date to include in the output directory -emis_run_date_biomass_soil = '20220316' +emis_run_date_biomass_soil = '20229999' # pattern_gross_emis_commod_biomass_soil = 'gross_emis_commodity_Mg_CO2e_ha_biomass_soil_2001_{}'.format(loss_years) pattern_gross_emis_commod_biomass_soil = 'gross_emis_commodity_Mg_CO2e_ha_biomass_soil_2001_{}'.format(loss_years) @@ -571,7 +573,7 @@ ### Emissions from soil only # Date to include in the output directory -emis_run_date_soil_only = '20220318' +emis_run_date_soil_only = '20229999' pattern_gross_emis_commod_soil_only = 'gross_emis_commodity_Mg_CO2e_ha_soil_only_2001_{}'.format(loss_years) gross_emis_commod_soil_only_dir = '{0}gross_emissions/commodities/soil_only/standard/{1}/'.format(s3_base_dir, emis_run_date_soil_only) @@ -609,11 +611,11 @@ # Net emissions for all forest types and all carbon emitted_pools in all pixels pattern_net_flux = 'net_flux_Mg_CO2e_ha_biomass_soil_2001_{}'.format(loss_years) -net_flux_dir = os.path.join(s3_base_dir, 'net_flux_all_forest_types_all_drivers/biomass_soil/standard/full_extent/per_hectare/20220316/') +net_flux_dir = os.path.join(s3_base_dir, 'net_flux_all_forest_types_all_drivers/biomass_soil/standard/full_extent/per_hectare/20229999/') # Net emissions for all forest types and all carbon emitted_pools in forest extent pattern_net_flux_forest_extent = 'net_flux_Mg_CO2e_ha_biomass_soil_forest_extent_2001_{}'.format(loss_years) -net_flux_forest_extent_dir = os.path.join(s3_base_dir, 'net_flux_all_forest_types_all_drivers/biomass_soil/standard/forest_extent/per_hectare/20220316/') +net_flux_forest_extent_dir = os.path.join(s3_base_dir, 'net_flux_all_forest_types_all_drivers/biomass_soil/standard/forest_extent/per_hectare/20229999/') ### Per pixel model outputs @@ -621,27 +623,27 @@ # Gross removals per pixel in all pixels pattern_cumul_gain_AGCO2_BGCO2_all_types_per_pixel_full_extent = 'gross_removals_AGCO2_BGCO2_Mg_pixel_all_forest_types_full_extent_2001_{}'.format(loss_years) -cumul_gain_AGCO2_BGCO2_all_types_per_pixel_full_extent_dir = os.path.join(s3_base_dir, 'gross_removals_AGCO2_BGCO2_all_forest_types/standard/full_extent/per_pixel/20220309/') +cumul_gain_AGCO2_BGCO2_all_types_per_pixel_full_extent_dir = os.path.join(s3_base_dir, 'gross_removals_AGCO2_BGCO2_all_forest_types/standard/full_extent/per_pixel/20229999/') # Gross removals per pixel in forest extent pattern_cumul_gain_AGCO2_BGCO2_all_types_per_pixel_forest_extent = 'gross_removals_AGCO2_BGCO2_Mg_pixel_all_forest_types_forest_extent_2001_{}'.format(loss_years) -cumul_gain_AGCO2_BGCO2_all_types_per_pixel_forest_extent_dir = os.path.join(s3_base_dir, 'gross_removals_AGCO2_BGCO2_all_forest_types/standard/forest_extent/per_pixel/20220309/') +cumul_gain_AGCO2_BGCO2_all_types_per_pixel_forest_extent_dir = os.path.join(s3_base_dir, 'gross_removals_AGCO2_BGCO2_all_forest_types/standard/forest_extent/per_pixel/20229999/') # Gross emissions per pixel in all pixels pattern_gross_emis_all_gases_all_drivers_biomass_soil_per_pixel_full_extent = 'gross_emis_all_gases_all_drivers_Mg_CO2e_pixel_biomass_soil_full_extent_2001_{}'.format(loss_years) -gross_emis_all_gases_all_drivers_biomass_soil_per_pixel_full_extent_dir = os.path.join(s3_base_dir, 'gross_emissions/all_drivers/all_gases/biomass_soil/standard/full_extent/per_pixel/20220316/') +gross_emis_all_gases_all_drivers_biomass_soil_per_pixel_full_extent_dir = os.path.join(s3_base_dir, 'gross_emissions/all_drivers/all_gases/biomass_soil/standard/full_extent/per_pixel/20229999/') # Gross emissions per pixel in forest extent pattern_gross_emis_all_gases_all_drivers_biomass_soil_per_pixel_forest_extent = 'gross_emis_all_gases_all_drivers_Mg_CO2e_pixel_biomass_soil_forest_extent_2001_{}'.format(loss_years) -gross_emis_all_gases_all_drivers_biomass_soil_per_pixel_forest_extent_dir = os.path.join(s3_base_dir, 'gross_emissions/all_drivers/all_gases/biomass_soil/standard/forest_extent/per_pixel/20220316/') +gross_emis_all_gases_all_drivers_biomass_soil_per_pixel_forest_extent_dir = os.path.join(s3_base_dir, 'gross_emissions/all_drivers/all_gases/biomass_soil/standard/forest_extent/per_pixel/20229999/') # Net flux per pixel in all pixels pattern_net_flux_per_pixel_full_extent = 'net_flux_Mg_CO2e_pixel_biomass_soil_full_extent_2001_{}'.format(loss_years) -net_flux_per_pixel_full_extent_dir = os.path.join(s3_base_dir, 'net_flux_all_forest_types_all_drivers/biomass_soil/standard/full_extent/per_pixel/20220316/') +net_flux_per_pixel_full_extent_dir = os.path.join(s3_base_dir, 'net_flux_all_forest_types_all_drivers/biomass_soil/standard/full_extent/per_pixel/20229999/') # Net flux per pixel in forest extent pattern_net_flux_per_pixel_forest_extent = 'net_flux_Mg_CO2e_pixel_biomass_soil_forest_extent_2001_{}'.format(loss_years) -net_flux_per_pixel_forest_extent_dir = os.path.join(s3_base_dir, 'net_flux_all_forest_types_all_drivers/biomass_soil/standard/forest_extent/per_pixel/20220316/') +net_flux_per_pixel_forest_extent_dir = os.path.join(s3_base_dir, 'net_flux_all_forest_types_all_drivers/biomass_soil/standard/forest_extent/per_pixel/20229999/') ### 4x4 km aggregation tiles for mapping @@ -651,7 +653,7 @@ pattern_aggreg_sensit_perc_diff = 'net_flux_0_04deg_modelv{}_perc_diff_std'.format(version_filename) pattern_aggreg_sensit_sign_change = 'net_flux_0_04deg_modelv{}_sign_change_std'.format(version_filename) -output_aggreg_dir = os.path.join(s3_base_dir, '0_04deg_output_aggregation/biomass_soil/standard/20220316/') +output_aggreg_dir = os.path.join(s3_base_dir, '0_04deg_output_aggregation/biomass_soil/standard/20229999/') @@ -689,11 +691,11 @@ # Standard deviation for annual aboveground biomass removal factors using IPCC default removal rates pattern_stdev_annual_gain_AGB_IPCC_defaults = 'annual_removal_factor_stdev_AGB_Mg_ha_IPCC_defaults_all_ages' -stdev_annual_gain_AGB_IPCC_defaults_dir = os.path.join(s3_base_dir, 'stdev_annual_removal_factor_AGB_IPCC_defaults_all_ages/standard/20220309/') +stdev_annual_gain_AGB_IPCC_defaults_dir = os.path.join(s3_base_dir, 'stdev_annual_removal_factor_AGB_IPCC_defaults_all_ages/standard/20229999/') # Standard deviation for aboveground and belowground removal factors for all forest types pattern_stdev_annual_gain_AGC_all_types = 'annual_removal_factor_stdev_AGC_Mg_ha_all_forest_types' -stdev_annual_gain_AGC_all_types_dir = os.path.join(s3_base_dir, 'stdev_annual_removal_factor_AGC_all_forest_types/standard/20220309/') +stdev_annual_gain_AGC_all_types_dir = os.path.join(s3_base_dir, 'stdev_annual_removal_factor_AGC_all_forest_types/standard/20229999/') # Raw mineral soil C file site diff --git a/data_prep/mp_model_extent.py b/data_prep/mp_model_extent.py index 507a8a82..a1c325fc 100644 --- a/data_prep/mp_model_extent.py +++ b/data_prep/mp_model_extent.py @@ -89,27 +89,28 @@ def mp_model_extent(tile_id_list): # Creates a single filename pattern to pass to the multiprocessor call pattern = output_pattern_list[0] - # This configuration of the multiprocessing call is necessary for passing multiple arguments to the main function - # It is based on the example here: http://spencerimp.blogspot.com/2015/12/python-multiprocess-with-multiple.html - if cn.count == 96: - if cn.SENSIT_TYPE == 'biomass_swap': - processes = 38 - else: - processes = 45 # 30 processors = 480 GB peak (sporadic decreases followed by sustained increases); - # 36 = 550 GB peak; 40 = 590 GB peak; 42 = 631 GB peak; 43 = 690 GB peak; 45 = too high - else: - processes = 3 - uu.print_log('Model extent processors=', processes) - with multiprocessing.Pool(processes) as pool: - pool.map(partial(model_extent.model_extent, pattern=pattern), tile_id_list) - pool.close() - pool.join() - - # # For single processor use - # for tile_id in tile_id_list: - # model_extent.model_extent(tile_id, pattern) + if cn.SINGLE_PROCESSOR: + for tile_id in tile_id_list: + model_extent.model_extent(tile_id, pattern) + else: + # This configuration of the multiprocessing call is necessary for passing multiple arguments to the main function + # It is based on the example here: http://spencerimp.blogspot.com/2015/12/python-multiprocess-with-multiple.html + if cn.count == 96: + if cn.SENSIT_TYPE == 'biomass_swap': + processes = 38 + else: + processes = 45 # 30 processors = 480 GB peak (sporadic decreases followed by sustained increases); + # 36 = 550 GB peak; 40 = 590 GB peak; 42 = 631 GB peak; 43 = 690 GB peak; 45 = too high + else: + processes = 3 + uu.print_log('Model extent processors=', processes) + with multiprocessing.Pool(processes) as pool: + pool.map(partial(model_extent.model_extent, pattern=pattern), tile_id_list) + pool.close() + pool.join() + # No single-processor versions of these check-if-empty functions output_pattern = output_pattern_list[0] if cn.count <= 2: # For local tests processes = 1 @@ -129,7 +130,6 @@ def mp_model_extent(tile_id_list): # If no_upload flag is not activated (by choice or by lack of AWS credentials), output is uploaded if not cn.NO_UPLOAD: - uu.upload_final_set(output_dir_list[0], output_pattern_list[0]) @@ -147,12 +147,15 @@ def mp_model_extent(tile_id_list): help='Date of run. Must be format YYYYMMDD.') parser.add_argument('--no-upload', '-nu', action='store_true', help='Disables uploading of outputs to s3') + parser.add_argument('--single-processor', '-sp', action='store_true', + help='Uses single processing rather than multiprocessing') args = parser.parse_args() # Sets global variables to the command line arguments cn.SENSIT_TYPE = args.model_type cn.RUN_DATE = args.run_date cn.NO_UPLOAD = args.no_upload + cn.SINGLE_PROCESSOR = args.single_processor tile_id_list = args.tile_id_list diff --git a/emissions/mp_calculate_gross_emissions.py b/emissions/mp_calculate_gross_emissions.py index ff1c1093..558ddc39 100644 --- a/emissions/mp_calculate_gross_emissions.py +++ b/emissions/mp_calculate_gross_emissions.py @@ -201,64 +201,68 @@ def mp_calculate_gross_emissions(tile_id_list, emitted_pools): # This will be iterated through to delete the tiles at the end of the script. uu.create_blank_tile_txt() - processes=80 # 60 = 100 GB peak; 80 = XXX GB peak - for output_pattern in pattern_list: - with multiprocessing.Pool(processes) as pool: - pool.map(partial(uu.make_blank_tile, pattern=output_pattern, folder=folder), - tile_id_list) - pool.close() - pool.join() + if cn.SINGLE_PROCESSOR: + for pattern in pattern_list: + for tile in tile_id_list: + uu.make_blank_tile(tile, pattern, folder) - # # For single processor use - # for pattern in pattern_list: - # for tile in tile_id_list: - # uu.make_blank_tile(tile, pattern, folder) + else: + processes=80 # 60 = 100 GB peak; 80 = XXX GB peak + for output_pattern in pattern_list: + with multiprocessing.Pool(processes) as pool: + pool.map(partial(uu.make_blank_tile, pattern=output_pattern, folder=folder), + tile_id_list) + pool.close() + pool.join() # Calculates gross emissions for each tile - # count/4 uses about 390 GB on a r4.16xlarge spot machine. - # processes=18 uses about 440 GB on an r4.16xlarge spot machine. - if cn.count == 96: - if cn.SENSIT_TYPE == 'biomass_swap': - processes = 15 # 15 processors = XXX GB peak - else: - processes = 19 # 17 = 650 GB peak; 18 = 677 GB peak; 19 = 716 GB peak - else: - processes = 9 - uu.print_log(f'Gross emissions max processors={processes}') - with multiprocessing.Pool(processes) as pool: - pool.map(partial(calculate_gross_emissions.calc_emissions, emitted_pools=emitted_pools, - folder=folder), - tile_id_list) - pool.close() - pool.join() + if cn.SINGLE_PROCESSOR: + for tile in tile_id_list: + calculate_gross_emissions.calc_emissions(tile, emitted_pools, folder) - # # For single processor use - # for tile in tile_id_list: - # calculate_gross_emissions.calc_emissions(tile, emitted_pools, folder) + else: + # count/4 uses about 390 GB on a r4.16xlarge spot machine. + # processes=18 uses about 440 GB on an r4.16xlarge spot machine. + if cn.count == 96: + if cn.SENSIT_TYPE == 'biomass_swap': + processes = 15 # 15 processors = XXX GB peak + else: + processes = 19 # 17 = 650 GB peak; 18 = 677 GB peak; 19 = 716 GB peak + else: + processes = 9 + uu.print_log(f'Gross emissions max processors={processes}') + with multiprocessing.Pool(processes) as pool: + pool.map(partial(calculate_gross_emissions.calc_emissions, emitted_pools=emitted_pools, + folder=folder), + tile_id_list) + pool.close() + pool.join() # Print the list of blank created tiles, delete the tiles, and delete their text file uu.list_and_delete_blank_tiles() - for i, output_pattern in enumerate(output_pattern_list): uu.print_log(f'Adding metadata tags for pattern {output_pattern}') - if cn.count == 96: - processes = 75 # 45 processors = ~30 GB peak; 55 = XXX GB peak; 75 = XXX GB peak + if cn.SINGLE_PROCESSOR: + for tile_id in tile_id_list: + uu.add_emissions_metadata(tile_id, output_pattern) + else: - processes = 9 - uu.print_log(f'Adding metadata tags max processors={processes}') - with multiprocessing.Pool(processes) as pool: - pool.map(partial(uu.add_emissions_metadata, output_pattern=output_pattern), - tile_id_list) - pool.close() - pool.join() + if cn.count == 96: + processes = 75 # 45 processors = ~30 GB peak; 55 = XXX GB peak; 75 = XXX GB peak + else: + processes = 9 + uu.print_log(f'Adding metadata tags max processors={processes}') + with multiprocessing.Pool(processes) as pool: + pool.map(partial(uu.add_emissions_metadata, output_pattern=output_pattern), + tile_id_list) + pool.close() + pool.join() - # for tile_id in tile_id_list: - # calculate_gross_emissions.add_metadata_tags(tile_id, pattern) # If cn.NO_UPLOAD flag is not activated (by choice or by lack of AWS credentials), output is uploaded @@ -273,8 +277,6 @@ def mp_calculate_gross_emissions(tile_id_list, emitted_pools): # Two arguments for the script: whether only emissions from biomass (soil_only) is being calculated or emissions from biomass and soil (biomass_soil), # and which model type is being run (standard or sensitivity analysis) parser = argparse.ArgumentParser(description='Calculates gross emissions') - parser.add_argument('--emitted-pools-to-use', '-p', required=True, - help='Options are soil_only or biomass_soil. Former only considers emissions from soil. Latter considers emissions from biomass and soil.') parser.add_argument('--tile_id_list', '-l', required=True, help='List of tile ids to use in the model. Should be of form 00N_110E or 00N_110E,00N_120E or all.') parser.add_argument('--model-type', '-t', required=True, @@ -283,12 +285,17 @@ def mp_calculate_gross_emissions(tile_id_list, emitted_pools): help='Date of run. Must be format YYYYMMDD.') parser.add_argument('--no-upload', '-nu', action='store_true', help='Disables uploading of outputs to s3') + parser.add_argument('--single-processor', '-sp', action='store_true', + help='Uses single processing rather than multiprocessing') + parser.add_argument('--emitted-pools-to-use', '-p', required=True, + help='Options are soil_only or biomass_soil. Former only considers emissions from soil. Latter considers emissions from biomass and soil.') args = parser.parse_args() # Sets global variables to the command line arguments cn.SENSIT_TYPE = args.model_type cn.RUN_DATE = args.run_date cn.NO_UPLOAD = args.no_upload + cn.SINGLE_PROCESSOR = args.single_processor cn.EMITTED_POOLS = args.emitted_pools_to_use tile_id_list = args.tile_id_list diff --git a/readme.md b/readme.md index b941736f..205bde85 100644 --- a/readme.md +++ b/readme.md @@ -218,12 +218,13 @@ they are run very infrequently. | `run-date` | `-d` | Required | All | Date of run. Must be format YYYYMMDD. This sets the output folder in s3. | | `tile-id-list` | `-l` | Required | All | List of tile ids to use in the model. Should be of form 00N_110E or 00N_110E,00N_120E or all | | `no-upload` | `-nu` | Optional | All | No files are uploaded to s3 during or after model run (including logs and model outputs). Use for testing to save time. When AWS credentials are not available, upload is automatically disabled and this flag does not have to be manually activated. | -| `save-intermdiates` | `-si`| Optional | `run_full_model.py` | Intermediate outputs are not deleted within `run_full_model.py`. Use for local model runs. If uploading to s3 is not enabled, intermediate files are automatically saved. | +| `single-processor` | `-sp` | Optional | All | Tile processing will be done without `multiprocessing` module whenever possible, i.e. no parallel processing. Use for testing. | | `log-note` | `-ln`| Optional | All | Adds text to the beginning of the log | | `carbon-pool-extent` | `-ce` | Optional | Carbon pool creation | Extent over which carbon pools should be calculated: loss or 2000 or loss,2000 or 2000,loss | | `pools-to-use` | `-p` | Optional | Emissions| Options are soil_only or biomass_soil. Former only considers emissions from soil. Latter considers emissions from biomass and soil. | | `tcd-threshold` | `-tcd`| Optional | Aggregation | Tree cover density threshold above which pixels will be included in the aggregation. Defaults to 30. | | `std-net-flux-aggreg` | `-std` | Optional | Aggregation | The s3 standard model net flux aggregated tif, for comparison with the sensitivity analysis map. | +| `save-intermdiates` | `-si`| Optional | `run_full_model.py` | Intermediate outputs are not deleted within `run_full_model.py`. Use for local model runs. If uploading to s3 is not enabled, intermediate files are automatically saved. | | `mangroves` | `-ma` | Optional | `run_full_model.py` | Create mangrove removal factor tiles as the first stage. Activate with flag. | | `us-rates` | `-us` | Optional | `run_full_model.py` | Create US-specific removal factor tiles as the first stage (or second stage, if mangroves are enabled). Activate with flag. | diff --git a/removals/mp_annual_gain_rate_AGC_BGC_all_forest_types.py b/removals/mp_annual_gain_rate_AGC_BGC_all_forest_types.py index a5a8229d..ce01bf91 100644 --- a/removals/mp_annual_gain_rate_AGC_BGC_all_forest_types.py +++ b/removals/mp_annual_gain_rate_AGC_BGC_all_forest_types.py @@ -92,27 +92,30 @@ def mp_annual_gain_rate_AGC_BGC_all_forest_types(tile_id_list): output_dir_list = uu.replace_output_dir_date(output_dir_list, cn.RUN_DATE) - # This configuration of the multiprocessing call is necessary for passing multiple arguments to the main function - # It is based on the example here: http://spencerimp.blogspot.com/2015/12/python-multiprocess-with-multiple.html - if cn.count == 96: - if cn.SENSIT_TYPE == 'biomass_swap': - processes = 13 - else: - processes = 17 # 30 processors > 740 GB peak; 18 = >740 GB peak; 16 = 660 GB peak; 17 = >680 GB peak + if cn.SINGLE_PROCESSOR: + for tile_id in tile_id_list: + annual_gain_rate_AGC_BGC_all_forest_types.annual_gain_rate_AGC_BGC_all_forest_types(tile_id, output_pattern_list) + else: - processes = 2 - uu.print_log(f'Removal factor processors={processes}') - with multiprocessing.Pool(processes) as pool: - pool.map(partial(annual_gain_rate_AGC_BGC_all_forest_types.annual_gain_rate_AGC_BGC_all_forest_types, - output_pattern_list=output_pattern_list), - tile_id_list) - pool.close() - pool.join() - - # # For single processor use - # for tile_id in tile_id_list: - # annual_gain_rate_AGC_BGC_all_forest_types.annual_gain_rate_AGC_BGC_all_forest_types(tile_id) + # This configuration of the multiprocessing call is necessary for passing multiple arguments to the main function + # It is based on the example here: http://spencerimp.blogspot.com/2015/12/python-multiprocess-with-multiple.html + if cn.count == 96: + if cn.SENSIT_TYPE == 'biomass_swap': + processes = 13 + else: + processes = 17 # 30 processors > 740 GB peak; 18 = >740 GB peak; 16 = 660 GB peak; 17 = >680 GB peak + else: + processes = 2 + uu.print_log(f'Removal factor processors={processes}') + with multiprocessing.Pool(processes) as pool: + pool.map(partial(annual_gain_rate_AGC_BGC_all_forest_types.annual_gain_rate_AGC_BGC_all_forest_types, + output_pattern_list=output_pattern_list), + tile_id_list) + pool.close() + pool.join() + + # No single-processor versions of these check-if-empty functions # Checks the gross removals outputs for tiles with no data for output_pattern in output_pattern_list: if cn.count <= 2: # For local tests @@ -133,7 +136,6 @@ def mp_annual_gain_rate_AGC_BGC_all_forest_types(tile_id_list): # If cn.NO_UPLOAD flag is not activated (by choice or by lack of AWS credentials), output is uploaded if not cn.NO_UPLOAD: - for output_dir, output_pattern in zip(output_dir_list, output_pattern_list): uu.upload_final_set(output_dir, output_pattern) @@ -152,6 +154,8 @@ def mp_annual_gain_rate_AGC_BGC_all_forest_types(tile_id_list): help='Date of run. Must be format YYYYMMDD.') parser.add_argument('--no-upload', '-nu', action='store_true', help='Disables uploading of outputs to s3') + parser.add_argument('--single-processor', '-sp', action='store_true', + help='Uses single processing rather than multiprocessing') args = parser.parse_args() @@ -159,6 +163,7 @@ def mp_annual_gain_rate_AGC_BGC_all_forest_types(tile_id_list): cn.SENSIT_TYPE = args.model_type cn.RUN_DATE = args.run_date cn.NO_UPLOAD = args.no_upload + cn.SINGLE_PROCESSOR = args.single_processor tile_id_list = args.tile_id_list diff --git a/removals/mp_annual_gain_rate_IPCC_defaults.py b/removals/mp_annual_gain_rate_IPCC_defaults.py index ce4bef3a..4cadd12d 100644 --- a/removals/mp_annual_gain_rate_IPCC_defaults.py +++ b/removals/mp_annual_gain_rate_IPCC_defaults.py @@ -193,6 +193,9 @@ def mp_annual_gain_rate_IPCC_defaults(tile_id_list): # Converts all the keys (continent-ecozone-age codes) to float type stdev_table_dict = {float(key): value for key, value in stdev_table_dict.items()} + if cn.SINGLE_PROCESSOR: + for tile_id in tile_id_list: + annual_gain_rate_IPCC_defaults.annual_gain_rate(tile_id, gain_table_dict, stdev_table_dict, output_pattern_list) # This configuration of the multiprocessing call is necessary for passing multiple arguments to the main function # It is based on the example here: http://spencerimp.blogspot.com/2015/12/python-multiprocess-with-multiple.html @@ -212,16 +215,9 @@ def mp_annual_gain_rate_IPCC_defaults(tile_id_list): pool.close() pool.join() - # # For single processor use - # for tile_id in tile_id_list: - # - # annual_gain_rate_IPCC_defaults.annual_gain_rate(tile_id, - # gain_table_dict, stdev_table_dict, output_pattern_list) - # If no_upload flag is not activated (by choice or by lack of AWS credentials), output is uploaded if not cn.NO_UPLOAD: - for output_dir, output_pattern in zip(output_dir_list, output_pattern_list): uu.upload_final_set(output_dir, output_pattern) @@ -240,6 +236,8 @@ def mp_annual_gain_rate_IPCC_defaults(tile_id_list): help='Date of run. Must be format YYYYMMDD.') parser.add_argument('--no-upload', '-nu', action='store_true', help='Disables uploading of outputs to s3') + parser.add_argument('--single-processor', '-sp', action='store_true', + help='Uses single processing rather than multiprocessing') args = parser.parse_args() @@ -247,6 +245,7 @@ def mp_annual_gain_rate_IPCC_defaults(tile_id_list): cn.SENSIT_TYPE = args.model_type cn.RUN_DATE = args.run_date cn.NO_UPLOAD = args.no_upload + cn.SINGLE_PROCESSOR = args.single_processor tile_id_list = args.tile_id_list diff --git a/removals/mp_annual_gain_rate_mangrove.py b/removals/mp_annual_gain_rate_mangrove.py index 8461fc29..5e12d6ec 100644 --- a/removals/mp_annual_gain_rate_mangrove.py +++ b/removals/mp_annual_gain_rate_mangrove.py @@ -119,30 +119,29 @@ def mp_annual_gain_rate_mangrove(tile_id_list): stdev_dict = {float(key): value for key, value in stdev_dict.items()} - # This configuration of the multiprocessing call is necessary for passing multiple arguments to the main function - # It is based on the example here: http://spencerimp.blogspot.com/2015/12/python-multiprocess-with-multiple.html - # Ran with 18 processors on r4.16xlarge (430 GB memory peak) - if cn.count == 96: - processes = 20 #26 processors = >740 GB peak; 18 = 550 GB peak; 20 = 610 GB peak; 23 = 700 GB peak; 24 > 750 GB peak - else: - processes = 4 - uu.print_log('Mangrove annual removals rate max processors=', processes) - pool = multiprocessing.Pool(processes) - pool.map(partial(annual_gain_rate_mangrove.annual_gain_rate, output_pattern_list=output_pattern_list, - gain_above_dict=gain_above_dict, gain_below_dict=gain_below_dict, stdev_dict=stdev_dict), tile_id_list) - pool.close() - pool.join() + if cn.SINGLE_PROCESSOR: + for tile in tile_id_list: + annual_gain_rate_mangrove.annual_gain_rate(tile, output_pattern_list, gain_above_dict, gain_below_dict, stdev_dict) - # # For single processor use - # for tile in tile_id_list: - # - # annual_gain_rate_mangrove.annual_gain_rate(tile, output_pattern_list, - # gain_above_dict, gain_below_dict, stdev_dict) + else: + # This configuration of the multiprocessing call is necessary for passing multiple arguments to the main function + # It is based on the example here: http://spencerimp.blogspot.com/2015/12/python-multiprocess-with-multiple.html + # Ran with 18 processors on r4.16xlarge (430 GB memory peak) + if cn.count == 96: + processes = 20 #26 processors = >740 GB peak; 18 = 550 GB peak; 20 = 610 GB peak; 23 = 700 GB peak; 24 > 750 GB peak + else: + processes = 4 + uu.print_log('Mangrove annual removals rate max processors=', processes) + pool = multiprocessing.Pool(processes) + pool.map(partial(annual_gain_rate_mangrove.annual_gain_rate, output_pattern_list=output_pattern_list, + gain_above_dict=gain_above_dict, gain_below_dict=gain_below_dict, stdev_dict=stdev_dict), + tile_id_list) + pool.close() + pool.join() # If no_upload flag is not activated (by choice or by lack of AWS credentials), output is uploaded if not no_upload: - for i in range(0, len(output_dir_list)): uu.upload_final_set(output_dir_list[i], output_pattern_list[i]) @@ -161,12 +160,15 @@ def mp_annual_gain_rate_mangrove(tile_id_list): help='Date of run. Must be format YYYYMMDD.') parser.add_argument('--no-upload', '-nu', action='store_true', help='Disables uploading of outputs to s3') + parser.add_argument('--single-processor', '-sp', action='store_true', + help='Uses single processing rather than multiprocessing') args = parser.parse_args() # Sets global variables to the command line arguments cn.SENSIT_TYPE = args.model_type cn.RUN_DATE = args.run_date cn.NO_UPLOAD = args.no_upload + cn.SINGLE_PROCESSOR = args.single_processor tile_id_list = args.tile_id_list diff --git a/removals/mp_forest_age_category_IPCC.py b/removals/mp_forest_age_category_IPCC.py index 9b8ae93e..bde60fae 100644 --- a/removals/mp_forest_age_category_IPCC.py +++ b/removals/mp_forest_age_category_IPCC.py @@ -112,33 +112,31 @@ def mp_forest_age_category_IPCC(tile_id_list): # Creates a single filename pattern to pass to the multiprocessor call pattern = output_pattern_list[0] - # This configuration of the multiprocessing call is necessary for passing multiple arguments to the main function - # It is based on the example here: http://spencerimp.blogspot.com/2015/12/python-multiprocess-with-multiple.html - # With processes=30, peak usage was about 350 GB using WHRC AGB. - # processes=26 maxes out above 480 GB for biomass_swap, so better to use fewer than that. - if cn.count == 96: - if cn.SENSIT_TYPE == 'biomass_swap': - processes = 32 # 32 processors = 610 GB peak - else: - processes = 42 # 30 processors=460 GB peak; 36 = 550 GB peak; 40 = XXX GB peak - else: - processes = 2 - uu.print_log(f'Natural forest age category max processors={processes}') - with multiprocessing.Pool(processes) as pool: - pool.map(partial(forest_age_category_IPCC.forest_age_category, gain_table_dict=gain_table_dict, pattern=pattern), - tile_id_list) - pool.close() - pool.join() - - # # For single processor use - # for tile_id in tile_id_list: - # - # forest_age_category_IPCC.forest_age_category(tile_id, gain_table_dict, pattern) + if cn.SINGLE_PROCESSOR: + for tile_id in tile_id_list: + forest_age_category_IPCC.forest_age_category(tile_id, gain_table_dict, pattern) + else: + # This configuration of the multiprocessing call is necessary for passing multiple arguments to the main function + # It is based on the example here: http://spencerimp.blogspot.com/2015/12/python-multiprocess-with-multiple.html + # With processes=30, peak usage was about 350 GB using WHRC AGB. + # processes=26 maxes out above 480 GB for biomass_swap, so better to use fewer than that. + if cn.count == 96: + if cn.SENSIT_TYPE == 'biomass_swap': + processes = 32 # 32 processors = 610 GB peak + else: + processes = 42 # 30 processors=460 GB peak; 36 = 550 GB peak; 40 = XXX GB peak + else: + processes = 2 + uu.print_log(f'Natural forest age category max processors={processes}') + with multiprocessing.Pool(processes) as pool: + pool.map(partial(forest_age_category_IPCC.forest_age_category, gain_table_dict=gain_table_dict, pattern=pattern), + tile_id_list) + pool.close() + pool.join() # If no_upload flag is not activated (by choice or by lack of AWS credentials), output is uploaded if not cn.NO_UPLOAD: - uu.upload_final_set(output_dir_list[0], output_pattern_list[0]) @@ -156,12 +154,15 @@ def mp_forest_age_category_IPCC(tile_id_list): help='Date of run. Must be format YYYYMMDD.') parser.add_argument('--no-upload', '-nu', action='store_true', help='Disables uploading of outputs to s3') + parser.add_argument('--single-processor', '-sp', action='store_true', + help='Uses single processing rather than multiprocessing') args = parser.parse_args() # Sets global variables to the command line arguments cn.SENSIT_TYPE = args.model_type cn.RUN_DATE = args.run_date cn.NO_UPLOAD = args.no_upload + cn.SINGLE_PROCESSOR = args.single_processor tile_id_list = args.tile_id_list diff --git a/removals/mp_gain_year_count_all_forest_types.py b/removals/mp_gain_year_count_all_forest_types.py index b6e79ca9..e87a8cfe 100644 --- a/removals/mp_gain_year_count_all_forest_types.py +++ b/removals/mp_gain_year_count_all_forest_types.py @@ -82,106 +82,107 @@ def mp_gain_year_count_all_forest_types(tile_id_list): # Creates a single filename pattern to pass to the multiprocessor call pattern = output_pattern_list[0] - # Creates gain year count tiles using only pixels that had only loss - if cn.count == 96: - processes = 90 # 66 = 310 GB peak; 75 = 380 GB peak; 90 = 480 GB peak - else: - processes = int(cn.count/2) - uu.print_log(f'Gain year count loss only pixels max processors={processes}') - with multiprocessing.Pool(processes) as pool: - pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_loss_only), - tile_id_list) - pool.close() - pool.join() - - if cn.count == 96: - processes = 90 # 66 = 330 GB peak; 75 = 380 GB peak; 90 = 530 GB peak - else: - processes = int(cn.count/2) - uu.print_log(f'Gain year count gain only pixels max processors={processes}') - with multiprocessing.Pool(processes) as pool: - if cn.SENSIT_TYPE == 'maxgain': - # Creates gain year count tiles using only pixels that had only gain - pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_gain_only_maxgain), - tile_id_list) - elif cn.SENSIT_TYPE == 'legal_Amazon_loss': - uu.print_log('Gain-only pixels do not apply to legal_Amazon_loss sensitivity analysis. Skipping this step.') - else: - # Creates gain year count tiles using only pixels that had only gain - pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_gain_only_standard), - tile_id_list) - pool.close() - pool.join() - # Creates gain year count tiles using only pixels that had neither loss nor gain pixels - if cn.count == 96: - processes = 90 # 66 = 360 GB peak; 88 = 430 GB peak; 90 = 510 GB peak + if cn.SINGLE_PROCESSOR: + + for tile_id in tile_id_list: + gain_year_count_all_forest_types.create_gain_year_count_loss_only(tile_id) + + for tile_id in tile_id_list: + if cn.SENSIT_TYPE == 'maxgain': + gain_year_count_all_forest_types.create_gain_year_count_gain_only_maxgain(tile_id) + else: + gain_year_count_all_forest_types.create_gain_year_count_gain_only_standard(tile_id) + + for tile_id in tile_id_list: + gain_year_count_all_forest_types.create_gain_year_count_no_change_standard(tile_id) + + for tile_id in tile_id_list: + if cn.SENSIT_TYPE == 'maxgain': + gain_year_count_all_forest_types.create_gain_year_count_loss_and_gain_maxgain(tile_id) + else: + gain_year_count_all_forest_types.create_gain_year_count_loss_and_gain_standard(tile_id) + + for tile_id in tile_id_list: + gain_year_count_all_forest_types.create_gain_year_count_merge(tile_id, pattern) + else: - processes = int(cn.count/2) - uu.print_log(f'Gain year count no change pixels max processors={processes}') - with multiprocessing.Pool(processes) as pool: - if cn.SENSIT_TYPE == 'legal_Amazon_loss': - pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_no_change_legal_Amazon_loss), - tile_id_list) + + # Creates gain year count tiles using only pixels that had only loss + if cn.count == 96: + processes = 90 # 66 = 310 GB peak; 75 = 380 GB peak; 90 = 480 GB peak else: - pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_no_change_standard), + processes = int(cn.count/2) + uu.print_log(f'Gain year count loss only pixels max processors={processes}') + with multiprocessing.Pool(processes) as pool: + pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_loss_only), tile_id_list) - pool.close() - pool.join() + pool.close() + pool.join() - if cn.count == 96: - processes = 90 # 66 = 370 GB peak; 88 = 430 GB peak; 90 = 550 GB peak - else: - processes = int(cn.count/2) - uu.print_log(f'Gain year count loss & gain pixels max processors={processes}') - with multiprocessing.Pool(processes) as pool: - if cn.SENSIT_TYPE == 'maxgain': - # Creates gain year count tiles using only pixels that had only gain - pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_loss_and_gain_maxgain), - tile_id_list) + # Creates gain year count tiles using only pixels that had only gain + if cn.count == 96: + processes = 90 # 66 = 330 GB peak; 75 = 380 GB peak; 90 = 530 GB peak else: - # Creates gain year count tiles using only pixels that had only gain - pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_loss_and_gain_standard), + processes = int(cn.count/2) + uu.print_log(f'Gain year count gain only pixels max processors={processes}') + with multiprocessing.Pool(processes) as pool: + if cn.SENSIT_TYPE == 'maxgain': + pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_gain_only_maxgain), + tile_id_list) + elif cn.SENSIT_TYPE == 'legal_Amazon_loss': + uu.print_log('Gain-only pixels do not apply to legal_Amazon_loss sensitivity analysis. Skipping this step.') + else: + pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_gain_only_standard), + tile_id_list) + pool.close() + pool.join() + + # Creates gain year count tiles using only pixels that had neither loss nor gain pixels + if cn.count == 96: + processes = 90 # 66 = 360 GB peak; 88 = 430 GB peak; 90 = 510 GB peak + else: + processes = int(cn.count/2) + uu.print_log(f'Gain year count no change pixels max processors={processes}') + with multiprocessing.Pool(processes) as pool: + if cn.SENSIT_TYPE == 'legal_Amazon_loss': + pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_no_change_legal_Amazon_loss), + tile_id_list) + else: + pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_no_change_standard), + tile_id_list) + pool.close() + pool.join() + + # Creates gain year count tiles using only pixels that had only gain + if cn.count == 96: + processes = 90 # 66 = 370 GB peak; 88 = 430 GB peak; 90 = 550 GB peak + else: + processes = int(cn.count/2) + uu.print_log(f'Gain year count loss & gain pixels max processors={processes}') + with multiprocessing.Pool(processes) as pool: + if cn.SENSIT_TYPE == 'maxgain': + pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_loss_and_gain_maxgain), + tile_id_list) + else: + pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_loss_and_gain_standard), + tile_id_list) + pool.close() + pool.join() + + # Combines the four above gain year count tiles for each Hansen tile into a single output tile + if cn.count == 96: + processes = 84 # 28 processors = 220 GB peak; 62 = 470 GB peak; 78 = 600 GB peak; 80 = 620 GB peak; 84 = XXX GB peak + elif cn.count < 4: + processes = 1 + else: + processes = int(cn.count/4) + uu.print_log(f'Gain year count gain merge all combos max processors={processes}') + with multiprocessing.Pool(processes) as pool: + pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_merge, pattern=pattern), tile_id_list) - pool.close() - pool.join() - - # Combines the four above gain year count tiles for each Hansen tile into a single output tile - if cn.count == 96: - processes = 84 # 28 processors = 220 GB peak; 62 = 470 GB peak; 78 = 600 GB peak; 80 = 620 GB peak; 84 = XXX GB peak - elif cn.count < 4: - processes = 1 - else: - processes = int(cn.count/4) - uu.print_log(f'Gain year count gain merge all combos max processors={processes}') - with multiprocessing.Pool(processes) as pool: - pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_merge, pattern=pattern), - tile_id_list) - pool.close() - pool.join() - - - # # For single processor use - # for tile_id in tile_id_list: - # gain_year_count_all_forest_types.create_gain_year_count_loss_only(tile_id) - # - # for tile_id in tile_id_list: - # if cn.SENSIT_TYPE == 'maxgain': - # gain_year_count_all_forest_types.create_gain_year_count_gain_only_maxgain(tile_id) - # else: - # gain_year_count_all_forest_types.create_gain_year_count_gain_only_standard(tile_id) - # - # for tile_id in tile_id_list: - # gain_year_count_all_forest_types.create_gain_year_count_no_change_standard(tile_id) - # - # for tile_id in tile_id_list: - # if cn.SENSIT_TYPE == 'maxgain': - # gain_year_count_all_forest_types.create_gain_year_count_loss_and_gain_maxgain(tile_id) - # else: - # gain_year_count_all_forest_types.create_gain_year_count_loss_and_gain_standard(tile_id) - # - # for tile_id in tile_id_list: - # gain_year_count_all_forest_types.create_gain_year_count_merge(tile_id, pattern) + pool.close() + pool.join() # If cn.NO_UPLOAD flag is not activated (by choice or by lack of AWS credentials), output is uploaded @@ -213,12 +214,15 @@ def mp_gain_year_count_all_forest_types(tile_id_list): help='Date of run. Must be format YYYYMMDD.') parser.add_argument('--no-upload', '-nu', action='store_true', help='Disables uploading of outputs to s3') + parser.add_argument('--single-processor', '-sp', action='store_true', + help='Uses single processing rather than multiprocessing') args = parser.parse_args() # Sets global variables to the command line arguments cn.SENSIT_TYPE = args.model_type cn.RUN_DATE = args.run_date cn.NO_UPLOAD = args.no_upload + cn.SINGLE_PROCESSOR = args.single_processor tile_id_list = args.tile_id_list diff --git a/removals/mp_gross_removals_all_forest_types.py b/removals/mp_gross_removals_all_forest_types.py index c64c33c2..c1ac614c 100644 --- a/removals/mp_gross_removals_all_forest_types.py +++ b/removals/mp_gross_removals_all_forest_types.py @@ -74,25 +74,26 @@ def mp_gross_removals_all_forest_types(tile_id_list): output_dir_list = uu.replace_output_dir_date(output_dir_list, cn.RUN_DATE) - # Calculates gross removals - if cn.count == 96: - if cn.SENSIT_TYPE == 'biomass_swap': - processes = 18 - else: - processes = 22 # 50 processors > 740 GB peak; 25 = >740 GB peak; 15 = 490 GB peak; 20 = 590 GB peak; 22 = 710 GB peak + if cn.SINGLE_PROCESSOR: + for tile_id in tile_id_list: + gross_removals_all_forest_types.gross_removals_all_forest_types(tile_id, output_pattern_list) + else: - processes = 2 - uu.print_log(f'Gross removals max processors={processes}') - with multiprocessing.Pool(processes) as pool: - pool.map(partial(gross_removals_all_forest_types.gross_removals_all_forest_types, - output_pattern_list=output_pattern_list), - tile_id_list) - pool.close() - pool.join() - - # # For single processor use - # for tile_id in tile_id_list: - # gross_removals_all_forest_types.gross_removals_all_forest_types(tile_id, output_pattern_list) + if cn.count == 96: + if cn.SENSIT_TYPE == 'biomass_swap': + processes = 18 + else: + processes = 22 # 50 processors > 740 GB peak; 25 = >740 GB peak; 15 = 490 GB peak; 20 = 590 GB peak; 22 = 710 GB peak + else: + processes = 2 + uu.print_log(f'Gross removals max processors={processes}') + with multiprocessing.Pool(processes) as pool: + pool.map(partial(gross_removals_all_forest_types.gross_removals_all_forest_types, + output_pattern_list=output_pattern_list), + tile_id_list) + pool.close() + pool.join() + # Checks the gross removals outputs for tiles with no data for output_pattern in output_pattern_list: @@ -113,7 +114,6 @@ def mp_gross_removals_all_forest_types(tile_id_list): # If cn.NO_UPLOAD flag is not activated (by choice or by lack of AWS credentials), output is uploaded if not cn.NO_UPLOAD: - for output_dir, output_pattern in zip(output_dir_list, output_pattern_list): uu.upload_final_set(output_dir, output_pattern) @@ -132,12 +132,15 @@ def mp_gross_removals_all_forest_types(tile_id_list): help='Date of run. Must be format YYYYMMDD.') parser.add_argument('--no-upload', '-nu', action='store_true', help='Disables uploading of outputs to s3') + parser.add_argument('--single-processor', '-sp', action='store_true', + help='Uses single processing rather than multiprocessing') args = parser.parse_args() # Sets global variables to the command line arguments cn.SENSIT_TYPE = args.model_type cn.RUN_DATE = args.run_date cn.NO_UPLOAD = args.no_upload + cn.SINGLE_PROCESSOR = args.single_processor tile_id_list = args.tile_id_list diff --git a/run_full_model.py b/run_full_model.py index 5cea0cb4..45374646 100644 --- a/run_full_model.py +++ b/run_full_model.py @@ -19,9 +19,13 @@ python run_full_model.py -si -t std -s all -r -d 20229999 -l 00N_000E -ce loss -p biomass_soil -tcd 30 -ln "00N_000E test" Run 00N_000E in standard model; save intermediate outputs; do not upload outputs to s3; run all model stages; -starting from the beginning; get carbon pools at time of loss; emissions from biomass and soil +starting from the beginning; get carbon pools at time of loss; emissions from biomass and soil; use multiprocessing python run_full_model.py -si -t std -s all -r -nu -d 20229999 -l 00N_000E -ce loss -p biomass_soil -tcd 30 -ln "00N_000E test" +Run 00N_000E in standard model; save intermediate outputs; do not upload outputs to s3; run all model stages; +starting from the beginning; get carbon pools at time of loss; emissions from biomass and soil; use singelprocessing +python run_full_model.py -si -t std -s all -r -nu -d 20229999 -l 00N_000E -ce loss -p biomass_soil -tcd 30 -sp -ln "00N_000E test" + FULL STANDARD MODEL RUN: Run all tiles in standard model; save intermediate outputs; do upload outputs to s3; run all model stages; starting from the beginning; get carbon pools at time of loss; emissions from biomass and soil python run_full_model.py -si -t std -s all -r -l all -ce loss -p biomass_soil -tcd 30 -ln "Running all tiles" @@ -88,6 +92,8 @@ def main (): help='Include US removal rate and standard deviation tile creation step (before model extent).') parser.add_argument('--no-upload', '-nu', action='store_true', help='Disables uploading of outputs to s3') + parser.add_argument('--single-processor', '-sp', action='store_true', + help='Uses single processing rather than multiprocessing') parser.add_argument('--save-intermediates', '-si', action='store_true', help='Saves intermediate model outputs rather than deleting them to save storage') parser.add_argument('--log-note', '-ln', required=False, @@ -106,6 +112,7 @@ def main (): cn.INCLUDE_MANGROVES = args.mangroves cn.INCLUDE_US = args.us_rates cn.NO_UPLOAD = args.no_upload + cn.SINGLE_PROCESSOR = args.single_processor cn.SAVE_INTERMEDIATES = args.save_intermediates cn.LOG_NOTE = args.log_note diff --git a/universal_util.py b/universal_util.py index 3dfcfd25..c0a10a85 100644 --- a/universal_util.py +++ b/universal_util.py @@ -75,6 +75,7 @@ def initiate_log(tile_id_list): logging.info(f'Do not upload anything to s3: {cn.NO_UPLOAD}') logging.info(f'AWS credentials supplied: {check_aws_creds()}') logging.info(f'Save intermediate outputs: {cn.SAVE_INTERMEDIATES}') + logging.info(f'Use single processor: {cn.SINGLE_PROCESSOR}') logging.info(f'AWS ec2 instance type and AMI ID:') # https://stackoverflow.com/questions/13735051/how-to-capture-curl-output-to-a-file From a30a79b1306617fe2e41682e84d1cd8e1aae32a7 Mon Sep 17 00:00:00 2001 From: Gary Tempus Date: Fri, 16 Sep 2022 14:38:38 -0400 Subject: [PATCH 5/9] Pairing on Carbon Pools: 2022-09-15 Tests and Refactor (#29) * :white_check_mark: test(Carbon Pools): Mark failing tests with `xfail` This is handy if we're writing the tests first or we have a large batch of tests failing for some reason and we want to cut down on the error output generated during a test run. * :art: refactor(Carbon Pools): Extract `deadwood_litter_equations` This refactoring pattern is described here: https://refactoring.guru/extract-method * :art: style(Carbon Pools): Add proper spacing between functions --- carbon_pools/create_carbon_pools.py | 129 +++++++++++++------------ test/carbon_pools/test_carbon_pools.py | 54 ++++++++++- 2 files changed, 117 insertions(+), 66 deletions(-) diff --git a/carbon_pools/create_carbon_pools.py b/carbon_pools/create_carbon_pools.py index eaee24b6..b854c556 100644 --- a/carbon_pools/create_carbon_pools.py +++ b/carbon_pools/create_carbon_pools.py @@ -578,67 +578,9 @@ def create_deadwood_litter(tile_id, mang_deadwood_AGB_ratio, mang_litter_AGB_rat # Reads in the windows of each input file that definitely exist natrl_forest_biomass_window = natrl_forest_biomass_2000_src.read(1, window=window) - # The deadwood and litter conversions generally come from here: https://cdm.unfccc.int/methodologies/ARmethodologies/tools/ar-am-tool-12-v3.0.pdf, p. 17-18 - # They depend on the elevation, precipitation, and broad biome category (boreal/temperate/tropical). - # For some reason, the masks need to be named different variables for each equation. - # If they all have the same name (e.g., elev_mask and condition_mask are reused), then at least the condition_mask_4 - # equation won't work properly.) - - # Equation for elevation <= 2000, precip <= 1000, bor/temp/trop = 1 (tropical) - elev_mask_1 = elevation_window <= 2000 - precip_mask_1 = precip_window <= 1000 - ecozone_mask_1 = bor_tem_trop_window == 1 - condition_mask_1 = elev_mask_1 & precip_mask_1 & ecozone_mask_1 - agb_masked_1 = np.ma.array(natrl_forest_biomass_window, mask=np.invert(condition_mask_1)) - deadwood_masked = agb_masked_1 * 0.02 * cn.biomass_to_c_non_mangrove - deadwood_2000_output = deadwood_2000_output + deadwood_masked.filled(0) - litter_masked = agb_masked_1 * 0.04 * cn.biomass_to_c_non_mangrove_litter - litter_2000_output = litter_2000_output + litter_masked.filled(0) - - - # Equation for elevation <= 2000, 1000 < precip <= 1600, bor/temp/trop = 1 (tropical) - elev_mask_2 = elevation_window <= 2000 - precip_mask_2 = (precip_window > 1000) & (precip_window <= 1600) - ecozone_mask_2 = bor_tem_trop_window == 1 - condition_mask_2 = elev_mask_2 & precip_mask_2 & ecozone_mask_2 - agb_masked_2 = np.ma.array(natrl_forest_biomass_window, mask=np.invert(condition_mask_2)) - deadwood_masked = agb_masked_2 * 0.01 * cn.biomass_to_c_non_mangrove - deadwood_2000_output = deadwood_2000_output + deadwood_masked.filled(0) - litter_masked = agb_masked_2 * 0.01 * cn.biomass_to_c_non_mangrove_litter - litter_2000_output = litter_2000_output + litter_masked.filled(0) - - # Equation for elevation <= 2000, precip > 1600, bor/temp/trop = 1 (tropical) - elev_mask_3 = elevation_window <= 2000 - precip_mask_3 = precip_window > 1600 - ecozone_mask_3 = bor_tem_trop_window == 1 - condition_mask_3 = elev_mask_3 & precip_mask_3 & ecozone_mask_3 - agb_masked_3 = np.ma.array(natrl_forest_biomass_window, mask=np.invert(condition_mask_3)) - deadwood_masked = agb_masked_3 * 0.06 * cn.biomass_to_c_non_mangrove - deadwood_2000_output = deadwood_2000_output + deadwood_masked.filled(0) - litter_masked = agb_masked_3 * 0.01 * cn.biomass_to_c_non_mangrove_litter - litter_2000_output = litter_2000_output + litter_masked.filled(0) - - # Equation for elevation > 2000, precip = any value, bor/temp/trop = 1 (tropical) - elev_mask_4 = elevation_window > 2000 - ecozone_mask_4 = bor_tem_trop_window == 1 - condition_mask_4 = elev_mask_4 & ecozone_mask_4 - agb_masked_4 = np.ma.array(natrl_forest_biomass_window, mask=np.invert(condition_mask_4)) - deadwood_masked = agb_masked_4 * 0.07 * cn.biomass_to_c_non_mangrove - deadwood_2000_output = deadwood_2000_output + deadwood_masked.filled(0) - litter_masked = agb_masked_4 * 0.01 * cn.biomass_to_c_non_mangrove_litter - litter_2000_output = litter_2000_output + litter_masked.filled(0) - - # Equation for elevation = any value, precip = any value, bor/temp/trop = 2 or 3 (boreal or temperate) - ecozone_mask_5 = bor_tem_trop_window != 1 - condition_mask_5 = ecozone_mask_5 - agb_masked_5 = np.ma.array(natrl_forest_biomass_window, mask=np.invert(condition_mask_5)) - deadwood_masked = agb_masked_5 * 0.08 * cn.biomass_to_c_non_mangrove - deadwood_2000_output = deadwood_2000_output + deadwood_masked.filled(0) - litter_masked = agb_masked_5 * 0.04 * cn.biomass_to_c_non_mangrove_litter - litter_2000_output = litter_2000_output + litter_masked.filled(0) - - deadwood_2000_output = deadwood_2000_output.astype('float32') - litter_2000_output = litter_2000_output.astype('float32') + deadwood_2000_output, litter_2000_output = deadwood_litter_equations( + bor_tem_trop_window, deadwood_2000_output, elevation_window, + litter_2000_output, natrl_forest_biomass_window, precip_window) # Replaces non-mangrove deadwood and litter with special mangrove deadwood and litter values if there is mangrove if os.path.exists(mangrove_biomass_2000): @@ -714,6 +656,71 @@ def create_deadwood_litter(tile_id, mang_deadwood_AGB_ratio, mang_litter_AGB_rat uu.end_of_fx_summary(start, tile_id, cn.pattern_deadwood_2000) +def deadwood_litter_equations(bor_tem_trop_window, deadwood_2000_output, + elevation_window, litter_2000_output, + natrl_forest_biomass_window, precip_window): + # The deadwood and litter conversions generally come from here: https://cdm.unfccc.int/methodologies/ARmethodologies/tools/ar-am-tool-12-v3.0.pdf, p. 17-18 + # They depend on the elevation, precipitation, and broad biome category (boreal/temperate/tropical). + # For some reason, the masks need to be named different variables for each equation. + # If they all have the same name (e.g., elev_mask and condition_mask are reused), then at least the condition_mask_4 + # equation won't work properly.) + # Equation for elevation <= 2000, precip <= 1000, bor/temp/trop = 1 (tropical) + elev_mask_1 = elevation_window <= 2000 + precip_mask_1 = precip_window <= 1000 + ecozone_mask_1 = bor_tem_trop_window == 1 + condition_mask_1 = elev_mask_1 & precip_mask_1 & ecozone_mask_1 + agb_masked_1 = np.ma.array(natrl_forest_biomass_window, + mask=np.invert(condition_mask_1)) + deadwood_masked = agb_masked_1 * 0.02 * cn.biomass_to_c_non_mangrove + deadwood_2000_output = deadwood_2000_output + deadwood_masked.filled(0) + litter_masked = agb_masked_1 * 0.04 * cn.biomass_to_c_non_mangrove_litter + litter_2000_output = litter_2000_output + litter_masked.filled(0) + # Equation for elevation <= 2000, 1000 < precip <= 1600, bor/temp/trop = 1 (tropical) + elev_mask_2 = elevation_window <= 2000 + precip_mask_2 = (precip_window > 1000) & (precip_window <= 1600) + ecozone_mask_2 = bor_tem_trop_window == 1 + condition_mask_2 = elev_mask_2 & precip_mask_2 & ecozone_mask_2 + agb_masked_2 = np.ma.array(natrl_forest_biomass_window, + mask=np.invert(condition_mask_2)) + deadwood_masked = agb_masked_2 * 0.01 * cn.biomass_to_c_non_mangrove + deadwood_2000_output = deadwood_2000_output + deadwood_masked.filled(0) + litter_masked = agb_masked_2 * 0.01 * cn.biomass_to_c_non_mangrove_litter + litter_2000_output = litter_2000_output + litter_masked.filled(0) + # Equation for elevation <= 2000, precip > 1600, bor/temp/trop = 1 (tropical) + elev_mask_3 = elevation_window <= 2000 + precip_mask_3 = precip_window > 1600 + ecozone_mask_3 = bor_tem_trop_window == 1 + condition_mask_3 = elev_mask_3 & precip_mask_3 & ecozone_mask_3 + agb_masked_3 = np.ma.array(natrl_forest_biomass_window, + mask=np.invert(condition_mask_3)) + deadwood_masked = agb_masked_3 * 0.06 * cn.biomass_to_c_non_mangrove + deadwood_2000_output = deadwood_2000_output + deadwood_masked.filled(0) + litter_masked = agb_masked_3 * 0.01 * cn.biomass_to_c_non_mangrove_litter + litter_2000_output = litter_2000_output + litter_masked.filled(0) + # Equation for elevation > 2000, precip = any value, bor/temp/trop = 1 (tropical) + elev_mask_4 = elevation_window > 2000 + ecozone_mask_4 = bor_tem_trop_window == 1 + condition_mask_4 = elev_mask_4 & ecozone_mask_4 + agb_masked_4 = np.ma.array(natrl_forest_biomass_window, + mask=np.invert(condition_mask_4)) + deadwood_masked = agb_masked_4 * 0.07 * cn.biomass_to_c_non_mangrove + deadwood_2000_output = deadwood_2000_output + deadwood_masked.filled(0) + litter_masked = agb_masked_4 * 0.01 * cn.biomass_to_c_non_mangrove_litter + litter_2000_output = litter_2000_output + litter_masked.filled(0) + # Equation for elevation = any value, precip = any value, bor/temp/trop = 2 or 3 (boreal or temperate) + ecozone_mask_5 = bor_tem_trop_window != 1 + condition_mask_5 = ecozone_mask_5 + agb_masked_5 = np.ma.array(natrl_forest_biomass_window, + mask=np.invert(condition_mask_5)) + deadwood_masked = agb_masked_5 * 0.08 * cn.biomass_to_c_non_mangrove + deadwood_2000_output = deadwood_2000_output + deadwood_masked.filled(0) + litter_masked = agb_masked_5 * 0.04 * cn.biomass_to_c_non_mangrove_litter + litter_2000_output = litter_2000_output + litter_masked.filled(0) + deadwood_2000_output = deadwood_2000_output.astype('float32') + litter_2000_output = litter_2000_output.astype('float32') + return deadwood_2000_output, litter_2000_output + + def create_soil_emis_extent(tile_id, pattern): """ Creates soil carbon tiles in loss pixels only diff --git a/test/carbon_pools/test_carbon_pools.py b/test/carbon_pools/test_carbon_pools.py index a164a3c9..2483d5d1 100644 --- a/test/carbon_pools/test_carbon_pools.py +++ b/test/carbon_pools/test_carbon_pools.py @@ -1,21 +1,27 @@ import numpy as np -import pytest as pytest +import pytest -from ...carbon_pools.create_carbon_pools import create_deadwood_litter, arid_pools +from carbon_pools.create_carbon_pools import create_deadwood_litter, \ + deadwood_litter_equations -# Use @pytest.mark.skip to skip tests if needed. +def arid_pools(**kwargs): + pass + +@pytest.mark.xfail def test_can_call_function(): - result = create_deadwood_litter("", {}, {}, [], "", True) + result = create_deadwood_litter("", {}, {}, []) assert result is None +@pytest.mark.xfail def test_can_call_with_biomass_swap(): result = create_deadwood_litter("", {}, {}, [], "biomass_swap", True) assert result is None +@pytest.mark.xfail def test_arid_pools(): result = arid_pools( elevation_window=2000, @@ -28,6 +34,7 @@ def test_arid_pools(): assert result == (np.ma.array([1.0094]), np.ma.array([1.0148])) +@pytest.mark.xfail def test_arid_pools_with_no_deadwood_or_litter(): result = arid_pools( elevation_window=2000, @@ -40,6 +47,7 @@ def test_arid_pools_with_no_deadwood_or_litter(): assert result == (np.ma.array([0.0094]), np.ma.array([0.0148])) +@pytest.mark.xfail def test_arid_pools_no_biomass_means_none_is_added(): result = arid_pools( elevation_window=2000, @@ -52,6 +60,7 @@ def test_arid_pools_no_biomass_means_none_is_added(): assert result == (np.ma.array([1]), np.ma.array([1])) +@pytest.mark.xfail def test_arid_pools_fraction_of_biomass(): result = arid_pools( elevation_window=2000, @@ -61,4 +70,39 @@ def test_arid_pools_fraction_of_biomass(): deadwood_2000_output=np.ma.array([1]), litter_2000_output=np.ma.array([1]) ) - assert result == (np.ma.array([1.0047]), np.ma.array([1.0074])) \ No newline at end of file + assert result == (np.ma.array([1.0047]), np.ma.array([1.0074])) + + +def test_deadwood_litter_equations_can_be_called(): + result = deadwood_litter_equations( + bor_tem_trop_window=np.zeros((1, 1), dtype='float32'), + deadwood_2000_output=np.zeros((1, 1), dtype='float32'), + elevation_window=np.zeros((1, 1), dtype='float32'), + litter_2000_output=np.zeros((1, 1), dtype='float32'), + natrl_forest_biomass_window=np.zeros((1, 1), dtype='float32'), + precip_window=np.zeros((1, 1), dtype='float32') + ) + + +def test_deadwood_litter_equations_return_zero_deadwood_for_zero_biomass(): + deadwood, _ = deadwood_litter_equations( + bor_tem_trop_window=np.zeros((1, 1), dtype='float32'), + deadwood_2000_output=np.zeros((1, 1), dtype='float32'), + elevation_window=np.zeros((1, 1), dtype='float32'), + litter_2000_output=np.zeros((1, 1), dtype='float32'), + natrl_forest_biomass_window=np.zeros((1, 1), dtype='float32'), + precip_window=np.zeros((1, 1), dtype='float32') + ) + assert deadwood == np.array([0.]) + + +def test_deadwood_litter_equations_return_zero_litter_for_zero_biomass(): + _, litter = deadwood_litter_equations( + bor_tem_trop_window=np.zeros((1, 1), dtype='float32'), + deadwood_2000_output=np.zeros((1, 1), dtype='float32'), + elevation_window=np.zeros((1, 1), dtype='float32'), + litter_2000_output=np.zeros((1, 1), dtype='float32'), + natrl_forest_biomass_window=np.zeros((1, 1), dtype='float32'), + precip_window=np.zeros((1, 1), dtype='float32') + ) + assert litter == np.array([0.]) From 353c9ab262c836b4ef12376b6db7a9afab0ca0cb Mon Sep 17 00:00:00 2001 From: dagibbs22 Date: Wed, 21 Sep 2022 14:31:29 -0400 Subject: [PATCH 6/9] Feature/carbon pool testing (#30) * Testing not working. Import errors. * Testing works when I run pytest from usr/local/app/test. Added deadwood and litter pool tests for the simple numpy operations that represent the five categories of domain/elevation/precipitation. The tests are on 1x1 numpy arrays to keep things simple (not on actual tiles). Doing this testing involved refacting the numpy parts of create_deadwood_litter into their own function that inputs and outputs just arrays of any dimension. * Carbon pool creation still works, even with the deadwood and litter equations factored out. All tests of the different equations work, too. --- carbon_pools/create_carbon_pools.py | 36 +++-- constants_and_names.py | 2 +- test/carbon_pools/test_carbon_pools.py | 214 ++++++++++++++++--------- 3 files changed, 160 insertions(+), 92 deletions(-) diff --git a/carbon_pools/create_carbon_pools.py b/carbon_pools/create_carbon_pools.py index b854c556..abeb0cbf 100644 --- a/carbon_pools/create_carbon_pools.py +++ b/carbon_pools/create_carbon_pools.py @@ -656,68 +656,78 @@ def create_deadwood_litter(tile_id, mang_deadwood_AGB_ratio, mang_litter_AGB_rat uu.end_of_fx_summary(start, tile_id, cn.pattern_deadwood_2000) -def deadwood_litter_equations(bor_tem_trop_window, deadwood_2000_output, - elevation_window, litter_2000_output, +def deadwood_litter_equations(bor_tem_trop_window, deadwood_2000_output, elevation_window, litter_2000_output, natrl_forest_biomass_window, precip_window): + """ + :param bor_tem_trop_window: array representing boreal, temperate or tropical climate domains + :param deadwood_2000_output: array representing the deadwood output + :param elevation_window: array representing elevation + :param litter_2000_output: array representing litter output + :param natrl_forest_biomass_window: array representing aboveground biomass + :param precip_window: array representing annual precipitation + :return: arrays of deadwood and litter carbon + """ + # The deadwood and litter conversions generally come from here: https://cdm.unfccc.int/methodologies/ARmethodologies/tools/ar-am-tool-12-v3.0.pdf, p. 17-18 - # They depend on the elevation, precipitation, and broad biome category (boreal/temperate/tropical). + # They depend on the elevation, precipitation, and climate domain (boreal/temperate/tropical). # For some reason, the masks need to be named different variables for each equation. # If they all have the same name (e.g., elev_mask and condition_mask are reused), then at least the condition_mask_4 # equation won't work properly.) + # Equation for elevation <= 2000, precip <= 1000, bor/temp/trop = 1 (tropical) elev_mask_1 = elevation_window <= 2000 precip_mask_1 = precip_window <= 1000 ecozone_mask_1 = bor_tem_trop_window == 1 condition_mask_1 = elev_mask_1 & precip_mask_1 & ecozone_mask_1 - agb_masked_1 = np.ma.array(natrl_forest_biomass_window, - mask=np.invert(condition_mask_1)) + agb_masked_1 = np.ma.array(natrl_forest_biomass_window, mask=np.invert(condition_mask_1)) deadwood_masked = agb_masked_1 * 0.02 * cn.biomass_to_c_non_mangrove deadwood_2000_output = deadwood_2000_output + deadwood_masked.filled(0) litter_masked = agb_masked_1 * 0.04 * cn.biomass_to_c_non_mangrove_litter litter_2000_output = litter_2000_output + litter_masked.filled(0) + # Equation for elevation <= 2000, 1000 < precip <= 1600, bor/temp/trop = 1 (tropical) elev_mask_2 = elevation_window <= 2000 precip_mask_2 = (precip_window > 1000) & (precip_window <= 1600) ecozone_mask_2 = bor_tem_trop_window == 1 condition_mask_2 = elev_mask_2 & precip_mask_2 & ecozone_mask_2 - agb_masked_2 = np.ma.array(natrl_forest_biomass_window, - mask=np.invert(condition_mask_2)) + agb_masked_2 = np.ma.array(natrl_forest_biomass_window, mask=np.invert(condition_mask_2)) deadwood_masked = agb_masked_2 * 0.01 * cn.biomass_to_c_non_mangrove deadwood_2000_output = deadwood_2000_output + deadwood_masked.filled(0) litter_masked = agb_masked_2 * 0.01 * cn.biomass_to_c_non_mangrove_litter litter_2000_output = litter_2000_output + litter_masked.filled(0) + # Equation for elevation <= 2000, precip > 1600, bor/temp/trop = 1 (tropical) elev_mask_3 = elevation_window <= 2000 precip_mask_3 = precip_window > 1600 ecozone_mask_3 = bor_tem_trop_window == 1 condition_mask_3 = elev_mask_3 & precip_mask_3 & ecozone_mask_3 - agb_masked_3 = np.ma.array(natrl_forest_biomass_window, - mask=np.invert(condition_mask_3)) + agb_masked_3 = np.ma.array(natrl_forest_biomass_window, mask=np.invert(condition_mask_3)) deadwood_masked = agb_masked_3 * 0.06 * cn.biomass_to_c_non_mangrove deadwood_2000_output = deadwood_2000_output + deadwood_masked.filled(0) litter_masked = agb_masked_3 * 0.01 * cn.biomass_to_c_non_mangrove_litter litter_2000_output = litter_2000_output + litter_masked.filled(0) + # Equation for elevation > 2000, precip = any value, bor/temp/trop = 1 (tropical) elev_mask_4 = elevation_window > 2000 ecozone_mask_4 = bor_tem_trop_window == 1 condition_mask_4 = elev_mask_4 & ecozone_mask_4 - agb_masked_4 = np.ma.array(natrl_forest_biomass_window, - mask=np.invert(condition_mask_4)) + agb_masked_4 = np.ma.array(natrl_forest_biomass_window, mask=np.invert(condition_mask_4)) deadwood_masked = agb_masked_4 * 0.07 * cn.biomass_to_c_non_mangrove deadwood_2000_output = deadwood_2000_output + deadwood_masked.filled(0) litter_masked = agb_masked_4 * 0.01 * cn.biomass_to_c_non_mangrove_litter litter_2000_output = litter_2000_output + litter_masked.filled(0) + # Equation for elevation = any value, precip = any value, bor/temp/trop = 2 or 3 (boreal or temperate) ecozone_mask_5 = bor_tem_trop_window != 1 condition_mask_5 = ecozone_mask_5 - agb_masked_5 = np.ma.array(natrl_forest_biomass_window, - mask=np.invert(condition_mask_5)) + agb_masked_5 = np.ma.array(natrl_forest_biomass_window, mask=np.invert(condition_mask_5)) deadwood_masked = agb_masked_5 * 0.08 * cn.biomass_to_c_non_mangrove deadwood_2000_output = deadwood_2000_output + deadwood_masked.filled(0) litter_masked = agb_masked_5 * 0.04 * cn.biomass_to_c_non_mangrove_litter litter_2000_output = litter_2000_output + litter_masked.filled(0) deadwood_2000_output = deadwood_2000_output.astype('float32') litter_2000_output = litter_2000_output.astype('float32') + return deadwood_2000_output, litter_2000_output diff --git a/constants_and_names.py b/constants_and_names.py index 453ffc32..640447ec 100644 --- a/constants_and_names.py +++ b/constants_and_names.py @@ -182,7 +182,7 @@ # Spreadsheet with annual removals rates -gain_spreadsheet = 'gain_rate_continent_ecozone_age_20200820.xlsx' +gain_spreadsheet = 'gain_rate_continent_ecozone_age_20220914.xlsx' gain_spreadsheet_dir = os.path.join(s3_base_dir, 'removal_rate_tables/') # Annual Hansen loss tiles (2001-2021) diff --git a/test/carbon_pools/test_carbon_pools.py b/test/carbon_pools/test_carbon_pools.py index 2483d5d1..a9886efb 100644 --- a/test/carbon_pools/test_carbon_pools.py +++ b/test/carbon_pools/test_carbon_pools.py @@ -1,76 +1,7 @@ import numpy as np import pytest -from carbon_pools.create_carbon_pools import create_deadwood_litter, \ - deadwood_litter_equations - - -def arid_pools(**kwargs): - pass - - -@pytest.mark.xfail -def test_can_call_function(): - result = create_deadwood_litter("", {}, {}, []) - assert result is None - - -@pytest.mark.xfail -def test_can_call_with_biomass_swap(): - result = create_deadwood_litter("", {}, {}, [], "biomass_swap", True) - assert result is None - - -@pytest.mark.xfail -def test_arid_pools(): - result = arid_pools( - elevation_window=2000, - precip_window=1000, - bor_tem_trop_window=1, - natrl_forest_biomass_window=np.ma.array([1]), - deadwood_2000_output=np.ma.array([1]), - litter_2000_output=np.ma.array([1]) - ) - assert result == (np.ma.array([1.0094]), np.ma.array([1.0148])) - - -@pytest.mark.xfail -def test_arid_pools_with_no_deadwood_or_litter(): - result = arid_pools( - elevation_window=2000, - precip_window=1000, - bor_tem_trop_window=1, - natrl_forest_biomass_window=np.ma.array([1]), - deadwood_2000_output=np.ma.array([0]), - litter_2000_output=np.ma.array([0]) - ) - assert result == (np.ma.array([0.0094]), np.ma.array([0.0148])) - - -@pytest.mark.xfail -def test_arid_pools_no_biomass_means_none_is_added(): - result = arid_pools( - elevation_window=2000, - precip_window=1000, - bor_tem_trop_window=1, - natrl_forest_biomass_window=np.ma.array([0]), - deadwood_2000_output=np.ma.array([1]), - litter_2000_output=np.ma.array([1]) - ) - assert result == (np.ma.array([1]), np.ma.array([1])) - - -@pytest.mark.xfail -def test_arid_pools_fraction_of_biomass(): - result = arid_pools( - elevation_window=2000, - precip_window=1000, - bor_tem_trop_window=1, - natrl_forest_biomass_window=np.ma.array([0.5]), - deadwood_2000_output=np.ma.array([1]), - litter_2000_output=np.ma.array([1]) - ) - assert result == (np.ma.array([1.0047]), np.ma.array([1.0074])) +from ...carbon_pools.create_carbon_pools import create_deadwood_litter, deadwood_litter_equations def test_deadwood_litter_equations_can_be_called(): @@ -83,19 +14,17 @@ def test_deadwood_litter_equations_can_be_called(): precip_window=np.zeros((1, 1), dtype='float32') ) - def test_deadwood_litter_equations_return_zero_deadwood_for_zero_biomass(): deadwood, _ = deadwood_litter_equations( - bor_tem_trop_window=np.zeros((1, 1), dtype='float32'), - deadwood_2000_output=np.zeros((1, 1), dtype='float32'), - elevation_window=np.zeros((1, 1), dtype='float32'), - litter_2000_output=np.zeros((1, 1), dtype='float32'), - natrl_forest_biomass_window=np.zeros((1, 1), dtype='float32'), - precip_window=np.zeros((1, 1), dtype='float32') + bor_tem_trop_window=np.zeros((1, 1), dtype='float32'), + deadwood_2000_output=np.zeros((1, 1), dtype='float32'), + elevation_window=np.zeros((1, 1), dtype='float32'), + litter_2000_output=np.zeros((1, 1), dtype='float32'), + natrl_forest_biomass_window=np.zeros((1, 1), dtype='float32'), + precip_window=np.zeros((1, 1), dtype='float32') ) assert deadwood == np.array([0.]) - def test_deadwood_litter_equations_return_zero_litter_for_zero_biomass(): _, litter = deadwood_litter_equations( bor_tem_trop_window=np.zeros((1, 1), dtype='float32'), @@ -106,3 +35,132 @@ def test_deadwood_litter_equations_return_zero_litter_for_zero_biomass(): precip_window=np.zeros((1, 1), dtype='float32') ) assert litter == np.array([0.]) + + +# Scenario 1- tropical, low elevation, low precipitation +def test_deadwood_litter_equations_return_zero_deadwood__tropical_low_elev_low_precip(): + deadwood, _ = deadwood_litter_equations( + bor_tem_trop_window=np.array([1], dtype='float32'), + deadwood_2000_output=np.array([0], dtype='float32'), + elevation_window=np.array([1], dtype='float32'), + litter_2000_output=np.array([0], dtype='float32'), + natrl_forest_biomass_window=np.array([1], dtype='float32'), + precip_window=np.array([1], dtype='float32') + ) + assert deadwood == np.array([0.0094], dtype='float32') + +def test_deadwood_litter_equations_return_zero_litter__tropical_low_elev_low_precip(): + _, litter = deadwood_litter_equations( + bor_tem_trop_window=np.array([1], dtype='float32'), + deadwood_2000_output=np.array([0], dtype='float32'), + elevation_window=np.array([1], dtype='float32'), + litter_2000_output=np.array([0], dtype='float32'), + natrl_forest_biomass_window=np.array([1], dtype='float32'), + precip_window=np.array([1], dtype='float32') + ) + assert litter == np.array([0.0148], dtype='float32') + + +# Scenario 2- tropical, low elevation, moderate precipitation +def test_deadwood_litter_equations_return_zero_deadwood__tropical_low_elev_mod_precip(): + deadwood, _ = deadwood_litter_equations( + bor_tem_trop_window=np.array([1], dtype='float32'), + deadwood_2000_output=np.array([0], dtype='float32'), + elevation_window=np.array([1], dtype='float32'), + litter_2000_output=np.array([0], dtype='float32'), + natrl_forest_biomass_window=np.array([100], dtype='float32'), + precip_window=np.array([1600], dtype='float32') + ) + assert deadwood == np.array([0.47], dtype='float32') + +def test_deadwood_litter_equations_return_zero_litter__tropical_low_elev_mod_precip(): + _, litter = deadwood_litter_equations( + bor_tem_trop_window=np.array([1], dtype='float32'), + deadwood_2000_output=np.array([0], dtype='float32'), + elevation_window=np.array([1], dtype='float32'), + litter_2000_output=np.array([0], dtype='float32'), + natrl_forest_biomass_window=np.array([100], dtype='float32'), + precip_window=np.array([1600], dtype='float32') + ) + assert litter == np.array([0.37], dtype='float32') + + +# Scenario 3- tropical, low elevation, high precipitation +def test_deadwood_litter_equations_return_zero_deadwood__tropical_low_elev_high_precip(): + deadwood, _ = deadwood_litter_equations( + bor_tem_trop_window=np.array([1], dtype='float32'), + deadwood_2000_output=np.array([0], dtype='float32'), + elevation_window=np.array([1], dtype='float32'), + litter_2000_output=np.array([0], dtype='float32'), + natrl_forest_biomass_window=np.array([100], dtype='float32'), + precip_window=np.array([1601], dtype='float32') + ) + assert deadwood == np.array([2.82], dtype='float32') + +def test_deadwood_litter_equations_return_zero_litter__tropical_low_elev_high_precip(): + _, litter = deadwood_litter_equations( + bor_tem_trop_window=np.array([1], dtype='float32'), + deadwood_2000_output=np.array([0], dtype='float32'), + elevation_window=np.array([1], dtype='float32'), + litter_2000_output=np.array([0], dtype='float32'), + natrl_forest_biomass_window=np.array([100], dtype='float32'), + precip_window=np.array([1601], dtype='float32') + ) + assert litter == np.array([0.37], dtype='float32') + + +# Scenario 4- tropical, high elevation, any precipitation +def test_deadwood_litter_equations_return_zero_deadwood__tropical_high_elev_any_precip(): + deadwood, _ = deadwood_litter_equations( + bor_tem_trop_window=np.array([1], dtype='float32'), + deadwood_2000_output=np.array([0], dtype='float32'), + elevation_window=np.array([2001], dtype='float32'), + litter_2000_output=np.array([0], dtype='float32'), + natrl_forest_biomass_window=np.array([100], dtype='float32'), + precip_window=np.array([1], dtype='float32') + ) + assert deadwood == np.array([3.29], dtype='float32') + +def test_deadwood_litter_equations_return_zero_litter__tropical_high_elev_any_precip(): + _, litter = deadwood_litter_equations( + bor_tem_trop_window=np.array([1], dtype='float32'), + deadwood_2000_output=np.array([0], dtype='float32'), + elevation_window=np.array([2001], dtype='float32'), + litter_2000_output=np.array([0], dtype='float32'), + natrl_forest_biomass_window=np.array([100], dtype='float32'), + precip_window=np.array([1], dtype='float32') + ) + assert litter == np.array([0.37], dtype='float32') + + +# Scenario 5- non-tropical, any elevation, any precipitation +def test_deadwood_litter_equations_return_zero_deadwood__non_tropical_any_elev_any_precip(): + deadwood, _ = deadwood_litter_equations( + bor_tem_trop_window=np.array([2], dtype='float32'), + deadwood_2000_output=np.array([0], dtype='float32'), + elevation_window=np.array([1], dtype='float32'), + litter_2000_output=np.array([0], dtype='float32'), + natrl_forest_biomass_window=np.array([100], dtype='float32'), + precip_window=np.array([1], dtype='float32') + ) + assert deadwood == np.array([3.76], dtype='float32') + +def test_deadwood_litter_equations_return_zero_litter__non_tropical_any_elev_any_precip(): + _, litter = deadwood_litter_equations( + bor_tem_trop_window=np.array([2], dtype='float32'), + deadwood_2000_output=np.array([0], dtype='float32'), + elevation_window=np.array([1], dtype='float32'), + litter_2000_output=np.array([0], dtype='float32'), + natrl_forest_biomass_window=np.array([100], dtype='float32'), + precip_window=np.array([1], dtype='float32') + ) + assert litter == np.array([1.48], dtype='float32') + + +def test_create_deadwood_litter(): + result = create_deadwood_litter( + tile_id="00N_000E", + mang_deadwood_AGB_ratio= {'1': 0.5, '2': 0.4, '3': 0.2, '4': 100}, + mang_litter_AGB_ratio={'1': 0.8, '2': 0.7, '3': 0.6, '4': 100}, + carbon_pool_extent=['loss'] + ) \ No newline at end of file From 6eb7b10e0126121a4c79f3f187120d248120063d Mon Sep 17 00:00:00 2001 From: Gary Tempus Jr Date: Fri, 30 Sep 2022 09:29:55 -0400 Subject: [PATCH 7/9] :art: refactor(carbon pools): Make conditionals more declarative This is a good first start to create more intention revealing code. --- carbon_pools/create_carbon_pools.py | 50 ++++++++++++++++------------- 1 file changed, 27 insertions(+), 23 deletions(-) diff --git a/carbon_pools/create_carbon_pools.py b/carbon_pools/create_carbon_pools.py index abeb0cbf..cef872da 100644 --- a/carbon_pools/create_carbon_pools.py +++ b/carbon_pools/create_carbon_pools.py @@ -673,23 +673,18 @@ def deadwood_litter_equations(bor_tem_trop_window, deadwood_2000_output, elevati # For some reason, the masks need to be named different variables for each equation. # If they all have the same name (e.g., elev_mask and condition_mask are reused), then at least the condition_mask_4 # equation won't work properly.) + is_low_elevation = elevation_window <= 2000 + is_low_precip = precip_window <= 1000 + is_mid_precip = (not is_low_precip) & (precip_window <= 1600) + is_high_precip = precip_window > 1600 + is_tropical_climate = bor_tem_trop_window == 1 - # Equation for elevation <= 2000, precip <= 1000, bor/temp/trop = 1 (tropical) - elev_mask_1 = elevation_window <= 2000 - precip_mask_1 = precip_window <= 1000 - ecozone_mask_1 = bor_tem_trop_window == 1 - condition_mask_1 = elev_mask_1 & precip_mask_1 & ecozone_mask_1 - agb_masked_1 = np.ma.array(natrl_forest_biomass_window, mask=np.invert(condition_mask_1)) - deadwood_masked = agb_masked_1 * 0.02 * cn.biomass_to_c_non_mangrove - deadwood_2000_output = deadwood_2000_output + deadwood_masked.filled(0) - litter_masked = agb_masked_1 * 0.04 * cn.biomass_to_c_non_mangrove_litter - litter_2000_output = litter_2000_output + litter_masked.filled(0) + deadwood_2000_output, litter_2000_output = low_elevation_low_precip_tropical( + deadwood_2000_output, litter_2000_output, is_low_precip, + is_tropical_climate, is_low_elevation, natrl_forest_biomass_window) # Equation for elevation <= 2000, 1000 < precip <= 1600, bor/temp/trop = 1 (tropical) - elev_mask_2 = elevation_window <= 2000 - precip_mask_2 = (precip_window > 1000) & (precip_window <= 1600) - ecozone_mask_2 = bor_tem_trop_window == 1 - condition_mask_2 = elev_mask_2 & precip_mask_2 & ecozone_mask_2 + condition_mask_2 = is_low_elevation & is_mid_precip & is_tropical_climate agb_masked_2 = np.ma.array(natrl_forest_biomass_window, mask=np.invert(condition_mask_2)) deadwood_masked = agb_masked_2 * 0.01 * cn.biomass_to_c_non_mangrove deadwood_2000_output = deadwood_2000_output + deadwood_masked.filled(0) @@ -697,10 +692,7 @@ def deadwood_litter_equations(bor_tem_trop_window, deadwood_2000_output, elevati litter_2000_output = litter_2000_output + litter_masked.filled(0) # Equation for elevation <= 2000, precip > 1600, bor/temp/trop = 1 (tropical) - elev_mask_3 = elevation_window <= 2000 - precip_mask_3 = precip_window > 1600 - ecozone_mask_3 = bor_tem_trop_window == 1 - condition_mask_3 = elev_mask_3 & precip_mask_3 & ecozone_mask_3 + condition_mask_3 = is_low_elevation & is_high_precip & is_tropical_climate agb_masked_3 = np.ma.array(natrl_forest_biomass_window, mask=np.invert(condition_mask_3)) deadwood_masked = agb_masked_3 * 0.06 * cn.biomass_to_c_non_mangrove deadwood_2000_output = deadwood_2000_output + deadwood_masked.filled(0) @@ -708,9 +700,7 @@ def deadwood_litter_equations(bor_tem_trop_window, deadwood_2000_output, elevati litter_2000_output = litter_2000_output + litter_masked.filled(0) # Equation for elevation > 2000, precip = any value, bor/temp/trop = 1 (tropical) - elev_mask_4 = elevation_window > 2000 - ecozone_mask_4 = bor_tem_trop_window == 1 - condition_mask_4 = elev_mask_4 & ecozone_mask_4 + condition_mask_4 = (not is_low_elevation) & is_tropical_climate agb_masked_4 = np.ma.array(natrl_forest_biomass_window, mask=np.invert(condition_mask_4)) deadwood_masked = agb_masked_4 * 0.07 * cn.biomass_to_c_non_mangrove deadwood_2000_output = deadwood_2000_output + deadwood_masked.filled(0) @@ -718,8 +708,7 @@ def deadwood_litter_equations(bor_tem_trop_window, deadwood_2000_output, elevati litter_2000_output = litter_2000_output + litter_masked.filled(0) # Equation for elevation = any value, precip = any value, bor/temp/trop = 2 or 3 (boreal or temperate) - ecozone_mask_5 = bor_tem_trop_window != 1 - condition_mask_5 = ecozone_mask_5 + condition_mask_5 = (not is_tropical_climate) agb_masked_5 = np.ma.array(natrl_forest_biomass_window, mask=np.invert(condition_mask_5)) deadwood_masked = agb_masked_5 * 0.08 * cn.biomass_to_c_non_mangrove deadwood_2000_output = deadwood_2000_output + deadwood_masked.filled(0) @@ -731,6 +720,21 @@ def deadwood_litter_equations(bor_tem_trop_window, deadwood_2000_output, elevati return deadwood_2000_output, litter_2000_output +def low_elevation_low_precip_tropical(deadwood_2000_output, litter_2000_output, + is_low_precip, is_tropical_climate, + is_low_elevation, + natrl_forest_biomass_window): + # Equation for elevation <= 2000, precip <= 1000, bor/temp/trop = 1 (tropical) + condition_mask_1 = is_low_elevation & is_low_precip & is_tropical_climate + agb_masked_1 = np.ma.array(natrl_forest_biomass_window, + mask=np.invert(condition_mask_1)) + deadwood_masked = agb_masked_1 * 0.02 * cn.biomass_to_c_non_mangrove + deadwood_2000_output = deadwood_2000_output + deadwood_masked.filled(0) + litter_masked = agb_masked_1 * 0.04 * cn.biomass_to_c_non_mangrove_litter + litter_2000_output = litter_2000_output + litter_masked.filled(0) + return deadwood_2000_output, litter_2000_output + + def create_soil_emis_extent(tile_id, pattern): """ Creates soil carbon tiles in loss pixels only From 3c60ff64c6caab359ab1e0af91482722446ed41f Mon Sep 17 00:00:00 2001 From: Gary Tempus Jr Date: Fri, 30 Sep 2022 09:36:21 -0400 Subject: [PATCH 8/9] :white_check_mark: test(carbon pools): Mark failing test with `xfail` Holding off on making this test past. We have more work to do to decouple the function from some 3rd party dependencies. --- test/carbon_pools/test_carbon_pools.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/carbon_pools/test_carbon_pools.py b/test/carbon_pools/test_carbon_pools.py index a9886efb..25e9048b 100644 --- a/test/carbon_pools/test_carbon_pools.py +++ b/test/carbon_pools/test_carbon_pools.py @@ -1,7 +1,7 @@ import numpy as np import pytest -from ...carbon_pools.create_carbon_pools import create_deadwood_litter, deadwood_litter_equations +from carbon_pools.create_carbon_pools import create_deadwood_litter, deadwood_litter_equations def test_deadwood_litter_equations_can_be_called(): @@ -156,7 +156,7 @@ def test_deadwood_litter_equations_return_zero_litter__non_tropical_any_elev_any ) assert litter == np.array([1.48], dtype='float32') - +@pytest.mark.xfail def test_create_deadwood_litter(): result = create_deadwood_litter( tile_id="00N_000E", From facb24b3b4a685f9dc54fcf5740d593faad32592 Mon Sep 17 00:00:00 2001 From: Gary Tempus Jr Date: Fri, 30 Sep 2022 09:44:45 -0400 Subject: [PATCH 9/9] :memo: docs(carbon pools): Use docstring to aid documentation https://realpython.com/documenting-python-code/ --- carbon_pools/create_carbon_pools.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/carbon_pools/create_carbon_pools.py b/carbon_pools/create_carbon_pools.py index cef872da..b477a5fa 100644 --- a/carbon_pools/create_carbon_pools.py +++ b/carbon_pools/create_carbon_pools.py @@ -724,7 +724,7 @@ def low_elevation_low_precip_tropical(deadwood_2000_output, litter_2000_output, is_low_precip, is_tropical_climate, is_low_elevation, natrl_forest_biomass_window): - # Equation for elevation <= 2000, precip <= 1000, bor/temp/trop = 1 (tropical) + """ Equation for elevation <= 2000, precip <= 1000, bor/temp/trop = 1 (tropical) """ condition_mask_1 = is_low_elevation & is_low_precip & is_tropical_climate agb_masked_1 = np.ma.array(natrl_forest_biomass_window, mask=np.invert(condition_mask_1))