diff --git a/.pylintrc b/.pylintrc new file mode 100644 index 00000000..ab782eb3 --- /dev/null +++ b/.pylintrc @@ -0,0 +1,5 @@ +# .pylintrc + +[MASTER] + +disable=line-too-long, redefined-outer-name, invalid-name \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 3c6542bb..2032cce8 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,8 +1,6 @@ -# Use osgeo GDAL image. It builds off Ubuntu 18.04 and uses GDAL 3.0.4 -FROM osgeo/gdal:ubuntu-small-3.0.4 - -# # Use this if downloading hdf files for burn year analysis -# FROM osgeo/gdal:ubuntu-full-3.0.4 +# Use osgeo GDAL image. +#Ubuntu 20.04.4 LTS, Python 3.8.10, GDAL 3.4.2 +FROM osgeo/gdal:ubuntu-small-3.4.2 ENV DIR=/usr/local/app ENV TMP=/usr/local/tmp @@ -14,16 +12,17 @@ ENV SECRETS_PATH /usr/secrets RUN ln -fs /usr/share/zoneinfo/America/New_York /etc/localtime # Install dependencies +# PostGIS extension version based on https://computingforgeeks.com/how-to-install-postgis-on-ubuntu-linux/ RUN apt-get update -y && apt-get install -y \ make \ automake \ g++ \ gcc \ libpq-dev \ - postgresql-10 \ - postgresql-server-dev-10 \ - postgresql-contrib-10 \ - postgresql-10-postgis-2.4 \ + postgresql-12 \ + postgresql-server-dev-12 \ + postgresql-contrib-12 \ + postgresql-12-postgis-3 \ python3-pip \ wget \ nano \ @@ -57,7 +56,7 @@ ENV PGDATABASE=ubuntu # Commented out the start/restart commands because even with running them, postgres isn't running when the container is created. # So there's no point in starting posgres here if it's not active when the instance opens. ####################################### -RUN cp pg_hba.conf /etc/postgresql/10/main/ +RUN cp pg_hba.conf /etc/postgresql/12/main/ # RUN pg_ctlcluster 10 main start # RUN service postgresql restart @@ -68,9 +67,9 @@ RUN pip3 install -r requirements.txt # Link gdal libraries RUN cd /usr/include && ln -s ./ gdal -# Somehow, this makes gdal_calc.py accessible from anywhere in the Docker -#https://www.continualintegration.com/miscellaneous-articles/all/how-do-you-troubleshoot-usr-bin-env-python-no-such-file-or-directory/ -RUN ln -s /usr/bin/python3 /usr/bin/python +# # Somehow, this makes gdal_calc.py accessible from anywhere in the Docker +# #https://www.continualintegration.com/miscellaneous-articles/all/how-do-you-troubleshoot-usr-bin-env-python-no-such-file-or-directory/ +# RUN ln -s /usr/bin/python3 /usr/bin/python # Enable ec2 to interact with GitHub RUN git config --global user.email dagibbs22@gmail.com @@ -81,11 +80,5 @@ RUN git config --global user.email dagibbs22@gmail.com ## Makes sure the latest version of the current branch is downloaded #RUN git pull origin model_v_1.2.2 -## Compile C++ scripts -#RUN g++ /usr/local/app/emissions/cpp_util/calc_gross_emissions_generic.cpp -o /usr/local/app/emissions/cpp_util/calc_gross_emissions_generic.exe -lgdal && \ -# g++ /usr/local/app/emissions/cpp_util/calc_gross_emissions_soil_only.cpp -o /usr/local/app/emissions/cpp_util/calc_gross_emissions_soil_only.exe -lgdal && \ -# g++ /usr/local/app/emissions/cpp_util/calc_gross_emissions_no_shifting_ag.cpp -o /usr/local/app/emissions/cpp_util/calc_gross_emissions_no_shifting_ag.exe -lgdal && \ -# g++ /usr/local/app/emissions/cpp_util/calc_gross_emissions_convert_to_grassland.cpp -o /usr/local/app/emissions/cpp_util/calc_gross_emissions_convert_to_grassland.exe -lgdal - # Opens the Docker shell ENTRYPOINT ["/bin/bash"] \ No newline at end of file diff --git a/analyses/aggregate_results_to_4_km.py b/analyses/aggregate_results_to_4_km.py deleted file mode 100644 index a97a4db6..00000000 --- a/analyses/aggregate_results_to_4_km.py +++ /dev/null @@ -1,276 +0,0 @@ -''' -This script creates maps of model outputs at roughly 5km resolution (0.04x0.04 degrees), where each output pixel -represents the total value in the pixel (not the density) (hence, the aggregated results). -This is currently set up for annual removal rate, gross removals, gross emissions, and net flux. -It iterates through all the model outputs that are supplied. -The rewindowed pixel area tiles, tcd, Hansen gain, and mangrove biomass tiles must already be created and in s3 -(created using mp_rewindow_tiles.py). -First, this script rewindows the model output into 160x160 (0.04x0.04 degree) windows, instead of the native -40000x1 pixel windows. -Then it calculates the per pixel value for each model output pixel and sums those values within each 0.04x0.04 degree -aggregated pixel. -It converts emissions, removals, and net flux from totals over the model period to annual values. -For sensitivity analysis runs, it only processes outputs which actually have a sensitivity analysis version. -The user has to supply a tcd threshold for which forest pixels to include in the results. Defaults to cn.canopy_threshold. -For sensitivity analysis, the s3 folder with the aggregations for the standard model must be specified. -sample command: python mp_aggregate_results_to_4_km.py -tcd 30 -t no_shifting_ag -sagg s3://gfw2-data/climate/carbon_model/0_04deg_output_aggregation/biomass_soil/standard/20200901/net_flux_Mt_CO2e_biomass_soil_per_year_tcd30_0_4deg_modelv1_2_0_std_20200901.tif -''' - - -import numpy as np -from subprocess import Popen, PIPE, STDOUT, check_call -import os -import rasterio -from rasterio.transform import from_origin -import datetime -import sys -sys.path.append('../') -import constants_and_names as cn -import universal_util as uu - -# Converts the existing (per ha) values to per pixel values (e.g., emissions/ha to emissions/pixel) -# and sums those values in each 160x160 pixel window. -# The sum for each 160x160 pixel window is stored in a 2D array, which is then converted back into a raster at -# 0.1x0.1 degree resolution (approximately 10m in the tropics). -# Each pixel in that raster is the sum of the 30m pixels converted to value/pixel (instead of value/ha). -# The 0.1x0.1 degree tile is output. -def aggregate(tile, thresh, sensit_type, no_upload): - - # start time - start = datetime.datetime.now() - - # Extracts the tile id, tile type, and bounding box for the tile - tile_id = uu.get_tile_id(tile) - tile_type = uu.get_tile_type(tile) - xmin, ymin, xmax, ymax = uu.coords(tile_id) - - # Name of inputs - focal_tile_rewindow = '{0}_{1}_rewindow.tif'.format(tile_id, tile_type) - pixel_area_rewindow = '{0}_{1}.tif'.format(cn.pattern_pixel_area_rewindow, tile_id) - tcd_rewindow = '{0}_{1}.tif'.format(cn.pattern_tcd_rewindow, tile_id) - gain_rewindow = '{0}_{1}.tif'.format(cn.pattern_gain_rewindow, tile_id) - mangrove_rewindow = '{0}_{1}.tif'.format(tile_id, cn.pattern_mangrove_biomass_2000_rewindow) - - # Opens input tiles for rasterio - in_src = rasterio.open(focal_tile_rewindow) - pixel_area_src = rasterio.open(pixel_area_rewindow) - tcd_src = rasterio.open(tcd_rewindow) - gain_src = rasterio.open(gain_rewindow) - - try: - mangrove_src = rasterio.open(mangrove_rewindow) - uu.print_log(" Mangrove tile found for {}".format(tile_id)) - except: - uu.print_log(" No mangrove tile found for {}".format(tile_id)) - - uu.print_log(" Converting {} to per-pixel values...".format(tile)) - - # Grabs the windows of the tile (stripes) in order to iterate over the entire tif without running out of memory - windows = in_src.block_windows(1) - - #2D array in which the 0.04x0.04 deg aggregated sums will be stored - sum_array = np.zeros([250,250], 'float32') - - out_raster = "{0}_{1}_0_04deg.tif".format(tile_id, tile_type) - - uu.check_memory() - - # Iterates across the windows (160x160 30m pixels) of the input tile - for idx, window in windows: - - # Creates windows for each input tile - in_window = in_src.read(1, window=window) - pixel_area_window = pixel_area_src.read(1, window=window) - tcd_window = tcd_src.read(1, window=window) - gain_window = gain_src.read(1, window=window) - - try: - mangrove_window = mangrove_src.read(1, window=window) - except: - mangrove_window = np.zeros((window.height, window.width), dtype='uint8') - - # Applies the tree cover density threshold to the 30x30m pixels - if thresh > 0: - - # QCed this line before publication and then again afterwards in response to question from Lena Schulte-Uebbing at Wageningen Uni. - in_window = np.where((tcd_window > thresh) | (gain_window == 1) | (mangrove_window != 0), in_window, 0) - - # Calculates the per-pixel value from the input tile value (/ha to /pixel) - per_pixel_value = in_window * pixel_area_window / cn.m2_per_ha - - # Sums the pixels to create a total value for the 0.04x0.04 deg pixel - non_zero_pixel_sum = np.sum(per_pixel_value) - - # Stores the resulting value in the array - sum_array[idx[0], idx[1]] = non_zero_pixel_sum - - - # Converts the annual carbon removals values annual removals in megatonnes and makes negative (because removals are negative) - if cn.pattern_annual_gain_AGC_all_types in tile_type: - sum_array = sum_array / cn.tonnes_to_megatonnes * -1 - - # Converts the cumulative CO2 removals values to annualized CO2 in megatonnes and makes negative (because removals are negative) - if cn.pattern_cumul_gain_AGCO2_BGCO2_all_types in tile_type: - sum_array = sum_array / cn.loss_years / cn.tonnes_to_megatonnes * -1 - - # # Converts the cumulative gross emissions CO2 only values to annualized gross emissions CO2e in megatonnes - # if cn.pattern_gross_emis_co2_only_all_drivers_biomass_soil in tile_type: - # sum_array = sum_array / cn.loss_years / cn.tonnes_to_megatonnes - # - # # Converts the cumulative gross emissions non-CO2 values to annualized gross emissions CO2e in megatonnes - # if cn.pattern_gross_emis_non_co2_all_drivers_biomass_soil in tile_type: - # sum_array = sum_array / cn.loss_years / cn.tonnes_to_megatonnes - - # Converts the cumulative gross emissions all gases CO2e values to annualized gross emissions CO2e in megatonnes - if cn.pattern_gross_emis_all_gases_all_drivers_biomass_soil in tile_type: - sum_array = sum_array / cn.loss_years / cn.tonnes_to_megatonnes - - # Converts the cumulative net flux CO2 values to annualized net flux CO2 in megatonnes - if cn.pattern_net_flux in tile_type: - sum_array = sum_array / cn.loss_years / cn.tonnes_to_megatonnes - - uu.print_log(" Creating aggregated tile for {}...".format(tile)) - - # Converts array to the same output type as the raster that is created below - sum_array = np.float32(sum_array) - - # Creates a tile at 0.04x0.04 degree resolution (approximately 10x10 km in the tropics) where the values are - # from the 2D array created by rasterio above - # https://gis.stackexchange.com/questions/279953/numpy-array-to-gtiff-using-rasterio-without-source-raster - with rasterio.open(out_raster, 'w', - driver='GTiff', compress='DEFLATE', nodata='0', dtype='float32', count=1, - height=250, width=250, - crs='EPSG:4326', transform=from_origin(xmin,ymax,0.04,0.04)) as aggregated: - aggregated.write(sum_array, 1) - ### I don't know why, but update_tags() is adding the tags to the raster but not saving them. - ### That is, the tags are printed but not showing up when I do gdalinfo on the raster. - ### Instead, I'm using gdal_edit - # print(aggregated) - # aggregated.update_tags(a="1") - # print(aggregated.tags()) - # uu.add_rasterio_tags(aggregated, sensit_type) - # print(aggregated.tags()) - # if cn.pattern_annual_gain_AGC_all_types in tile_type: - # aggregated.update_tags(units='Mg aboveground carbon/pixel, where pixels are 0.04x0.04 degrees)', - # source='per hectare version of the same model output, aggregated from 0.00025x0.00025 degree pixels', - # extent='Global', - # treecover_density_threshold='{0} (only model pixels with canopy cover > {0} are included in aggregation'.format(thresh)) - # if cn.pattern_cumul_gain_AGCO2_BGCO2_all_types: - # aggregated.update_tags(units='Mg CO2/yr/pixel, where pixels are 0.04x0.04 degrees)', - # source='per hectare version of the same model output, aggregated from 0.00025x0.00025 degree pixels', - # extent='Global', - # treecover_density_threshold='{0} (only model pixels with canopy cover > {0} are included in aggregation'.format(thresh)) - # # if cn.pattern_gross_emis_co2_only_all_drivers_biomass_soil in tile_type: - # # aggregated.update_tags(units='Mg CO2e/yr/pixel, where pixels are 0.04x0.04 degrees)', - # # source='per hectare version of the same model output, aggregated from 0.00025x0.00025 degree pixels', - # # extent='Global', gases_included='CO2 only', - # # treecover_density_threshold = '{0} (only model pixels with canopy cover > {0} are included in aggregation'.format(thresh)) - # # if cn.pattern_gross_emis_non_co2_all_drivers_biomass_soil in tile_type: - # # aggregated.update_tags(units='Mg CO2e/yr/pixel, where pixels are 0.04x0.04 degrees)', - # # source='per hectare version of the same model output, aggregated from 0.00025x0.00025 degree pixels', - # # extent='Global', gases_included='CH4, N20', - # # treecover_density_threshold='{0} (only model pixels with canopy cover > {0} are included in aggregation'.format(thresh)) - # if cn.pattern_gross_emis_all_gases_all_drivers_biomass_soil in tile_type: - # aggregated.update_tags(units='Mg CO2e/yr/pixel, where pixels are 0.04x0.04 degrees)', - # source='per hectare version of the same model output, aggregated from 0.00025x0.00025 degree pixels', - # extent='Global', - # treecover_density_threshold='{0} (only model pixels with canopy cover > {0} are included in aggregation'.format(thresh)) - # if cn.pattern_net_flux in tile_type: - # aggregated.update_tags(units='Mg CO2e/yr/pixel, where pixels are 0.04x0.04 degrees)', - # scale='Negative values are net sinks. Positive values are net sources.', - # source='per hectare version of the same model output, aggregated from 0.00025x0.00025 degree pixels', - # extent='Global', - # treecover_density_threshold='{0} (only model pixels with canopy cover > {0} are included in aggregation'.format(thresh)) - # print(aggregated.tags()) - # aggregated.close() - - # Prints information about the tile that was just processed - uu.end_of_fx_summary(start, tile_id, '{}_0_04deg'.format(tile_type), no_upload) - - -# Calculates the percent difference between the standard model's net flux output -# and the sensitivity model's net flux output -def percent_diff(std_aggreg_flux, sensit_aggreg_flux, sensit_type, no_upload): - - # start time - start = datetime.datetime.now() - date = datetime.datetime.now() - date_formatted = date.strftime("%Y_%m_%d") - - uu.print_log(sensit_aggreg_flux) - uu.print_log(std_aggreg_flux) - - # This produces errors about dividing by 0. As far as I can tell, those are fine. It's just trying to divide NoData - # pixels by NoData pixels, and it doesn't affect the output. - # For model v1.2.0, this kept producing incorrect values for the biomass_swap analysis. I don't know why. I ended - # up just using raster calculator in ArcMap to create the percent diff raster for biomass_swap. It worked - # fine for all the other analyses, though (including legal_Amazon_loss). - # Maybe that divide by 0 is throwing off other values now. - perc_diff_calc = '--calc=(A-B)/absolute(B)*100' - perc_diff_outfilename = '{0}_{1}_{2}.tif'.format(cn.pattern_aggreg_sensit_perc_diff, sensit_type, date_formatted) - perc_diff_outfilearg = '--outfile={}'.format(perc_diff_outfilename) - # cmd = ['gdal_calc.py', '-A', sensit_aggreg_flux, '-B', std_aggreg_flux, perc_diff_calc, perc_diff_outfilearg, - # '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=DEFLATE', '--quiet'] - cmd = ['gdal_calc.py', '-A', sensit_aggreg_flux, '-B', std_aggreg_flux, perc_diff_calc, perc_diff_outfilearg, - '--overwrite', '--co', 'COMPRESS=DEFLATE', '--quiet'] - uu.log_subprocess_output_full(cmd) - - # Prints information about the tile that was just processed - uu.end_of_fx_summary(start, 'global', sensit_aggreg_flux, no_upload) - - -# Maps where the sources stay sources, sinks stay sinks, sources become sinks, and sinks become sources -def sign_change(std_aggreg_flux, sensit_aggreg_flux, sensit_type, no_upload): - - # start time - start = datetime.datetime.now() - - # Date for the output raster name - date = datetime.datetime.now() - date_formatted = date.strftime("%Y_%m_%d") - - # Opens the standard net flux output in rasterio - with rasterio.open(std_aggreg_flux) as std_src: - - kwargs = std_src.meta - - windows = std_src.block_windows(1) - - # Opens the sensitivity analysis net flux output in rasterio - sensit_src = rasterio.open(sensit_aggreg_flux) - - # Creates the sign change raster - dst = rasterio.open('{0}_{1}_{2}.tif'.format(cn.pattern_aggreg_sensit_sign_change, sensit_type, date_formatted), 'w', **kwargs) - - # Adds metadata tags to the output raster - uu.add_rasterio_tags(dst, sensit_type) - dst.update_tags( - key='1=stays net source. 2=stays net sink. 3=changes from net source to net sink. 4=changes from net sink to net source.') - dst.update_tags( - source='Comparison of net flux at 0.04x0.04 degrees from standard model to net flux from {} sensitivity analysis'.format(sensit_type)) - dst.update_tags( - extent='Global') - - # Iterates through the windows in the standard net flux output - for idx, window in windows: - - std_window = std_src.read(1, window=window) - sensit_window = sensit_src.read(1, window=window) - - # Defaults the sign change output raster to 0 - dst_data = np.zeros((window.height, window.width), dtype='Float32') - - # Assigns the output value based on the signs (source, sink) of the standard and sensitivity analysis. - # No option has both windows equaling 0 because that results in the NoData values getting assigned whatever - # output corresponds to that - # (e.g., if dst_data[np.where((sensit_window >= 0) & (std_window >= 0))] = 1, NoData values (0s) would become 1s. - dst_data[np.where((sensit_window > 0) & (std_window >= 0))] = 1 # stays net source - dst_data[np.where((sensit_window < 0) & (std_window < 0))] = 2 # stays net sink - dst_data[np.where((sensit_window >= 0) & (std_window < 0))] = 3 # changes from sink to source - dst_data[np.where((sensit_window < 0) & (std_window >= 0))] = 4 # changes from source to sink - - dst.write_band(1, dst_data, window=window) - - - # Prints information about the tile that was just processed - uu.end_of_fx_summary(start, 'global', sensit_aggreg_flux, no_upload) diff --git a/analyses/create_supplementary_outputs.py b/analyses/create_supplementary_outputs.py deleted file mode 100644 index 244cec63..00000000 --- a/analyses/create_supplementary_outputs.py +++ /dev/null @@ -1,149 +0,0 @@ -''' -Script to create three supplementary tiled outputs for each main model output (gross emissions, gross removals, net flux), -which are already in per hectare values for full model extent: -1. per pixel values for full model extent (all pixels included in model extent) -2. per hectare values for forest extent (within the model extent, pixels that have TCD>30 OR Hansen gain OR mangrove biomass) -3. per pixel values for forest extent -The forest extent outputs are for sharing with partners because they limit the model to just the relevant pixels -(those within forests). -Forest extent is defined in the methods section of Harris et al. 2021 Nature Climate Change. -It is roughly implemented in mp_model_extent.py but using TCD>0 rather thant TCD>30. Here, the TCD>30 requirement -is implemented instead as a subset of the full model extent pixels. -Forest extent is: ((TCD2000>30 AND WHRC AGB2000>0) OR Hansen gain=1 OR mangrove AGB2000>0) NOT IN pre-2000 plantations. -The WHRC AGB2000 and pre-2000 plantations conditions were set in mp_model_extent.py, so they don't show up here. -''' - -import numpy as np -from subprocess import Popen, PIPE, STDOUT, check_call -import os -import rasterio -from rasterio.transform import from_origin -import datetime -import sys -sys.path.append('../') -import constants_and_names as cn -import universal_util as uu - -def create_supplementary_outputs(tile_id, input_pattern, output_patterns, sensit_type, no_upload): - - # start time - start = datetime.datetime.now() - - # Extracts the tile id, tile type, and bounding box for the tile - tile_id = uu.get_tile_id(tile_id) - - # Names of inputs - focal_tile = '{0}_{1}.tif'.format(tile_id, input_pattern) - pixel_area = '{0}_{1}.tif'.format(cn.pattern_pixel_area, tile_id) - tcd = '{0}_{1}.tif'.format(cn.pattern_tcd, tile_id) - gain = '{0}_{1}.tif'.format(cn.pattern_gain, tile_id) - mangrove = '{0}_{1}.tif'.format(tile_id, cn.pattern_mangrove_biomass_2000) - - # Names of outputs. - # Requires that output patterns be listed in main script in the correct order for here - # (currently, per pixel full extent, per hectare forest extent, per pixel forest extent). - per_pixel_full_extent = '{0}_{1}.tif'.format(tile_id, output_patterns[0]) - per_hectare_forest_extent = '{0}_{1}.tif'.format(tile_id, output_patterns[1]) - per_pixel_forest_extent = '{0}_{1}.tif'.format(tile_id, output_patterns[2]) - - # Opens input tiles for rasterio - in_src = rasterio.open(focal_tile) - # Grabs metadata about the tif, like its location/projection/cellsize - kwargs = in_src.meta - # Grabs the windows of the tile (stripes) so we can iterate over the entire tif without running out of memory - windows = in_src.block_windows(1) - - pixel_area_src = rasterio.open(pixel_area) - tcd_src = rasterio.open(tcd) - gain_src = rasterio.open(gain) - - try: - mangrove_src = rasterio.open(mangrove) - uu.print_log(" Mangrove tile found for {}".format(tile_id)) - except: - uu.print_log(" No mangrove tile found for {}".format(tile_id)) - - uu.print_log(" Creating outputs for {}...".format(focal_tile)) - - kwargs.update( - driver='GTiff', - count=1, - compress='DEFLATE', - nodata=0, - dtype='float32' - ) - - # Opens output tiles, giving them the arguments of the input tiles - per_pixel_full_extent_dst = rasterio.open(per_pixel_full_extent, 'w', **kwargs) - per_hectare_forest_extent_dst = rasterio.open(per_hectare_forest_extent, 'w', **kwargs) - per_pixel_forest_extent_dst = rasterio.open(per_pixel_forest_extent, 'w', **kwargs) - - # Adds metadata tags to the output rasters - - uu.add_rasterio_tags(per_pixel_full_extent_dst, sensit_type) - per_pixel_full_extent_dst.update_tags( - units='Mg CO2e/pixel over model duration (2001-20{})'.format(cn.loss_years)) - per_pixel_full_extent_dst.update_tags( - source='per hectare full model extent tile') - per_pixel_full_extent_dst.update_tags( - extent='Full model extent: ((TCD2000>0 AND WHRC AGB2000>0) OR Hansen gain=1 OR mangrove AGB2000>0) NOT IN pre-2000 plantations') - - uu.add_rasterio_tags(per_hectare_forest_extent_dst, sensit_type) - per_hectare_forest_extent_dst.update_tags( - units='Mg CO2e/hectare over model duration (2001-20{})'.format(cn.loss_years)) - per_hectare_forest_extent_dst.update_tags( - source='per hectare full model extent tile') - per_hectare_forest_extent_dst.update_tags( - extent='Forest extent: ((TCD2000>30 AND WHRC AGB2000>0) OR Hansen gain=1 OR mangrove AGB2000>0) NOT IN pre-2000 plantations') - - uu.add_rasterio_tags(per_pixel_forest_extent_dst, sensit_type) - per_pixel_forest_extent_dst.update_tags( - units='Mg CO2e/pixel over model duration (2001-20{})'.format(cn.loss_years)) - per_pixel_forest_extent_dst.update_tags( - source='per hectare forest model extent tile') - per_pixel_forest_extent_dst.update_tags( - extent='Forest extent: ((TCD2000>30 AND WHRC AGB2000>0) OR Hansen gain=1 OR mangrove AGB2000>0) NOT IN pre-2000 plantations') - - if "net_flux" in focal_tile: - per_pixel_full_extent_dst.update_tags( - scale='Negative values are net sinks. Positive values are net sources.') - per_hectare_forest_extent_dst.update_tags( - scale='Negative values are net sinks. Positive values are net sources.') - per_pixel_forest_extent_dst.update_tags( - scale='Negative values are net sinks. Positive values are net sources.') - - uu.check_memory() - - # Iterates across the windows of the input tiles - for idx, window in windows: - - # Creates windows for each input tile - in_window = in_src.read(1, window=window) - pixel_area_window = pixel_area_src.read(1, window=window) - tcd_window = tcd_src.read(1, window=window) - gain_window = gain_src.read(1, window=window) - - try: - mangrove_window = mangrove_src.read(1, window=window) - except: - mangrove_window = np.zeros((window.height, window.width), dtype='uint8') - - # Output window for per pixel full extent raster - dst_window_per_pixel_full_extent = in_window * pixel_area_window / cn.m2_per_ha - - # Output window for per hectare forest extent raster - # QCed this line before publication and then again afterwards in response to question from Lena Schulte-Uebbing at Wageningen Uni. - dst_window_per_hectare_forest_extent = np.where((tcd_window > cn.canopy_threshold) | (gain_window == 1) | (mangrove_window != 0), in_window, 0) - - # Output window for per pixel forest extent raster - dst_window_per_pixel_forest_extent = dst_window_per_hectare_forest_extent * pixel_area_window / cn.m2_per_ha - - # Writes arrays to output raster - per_pixel_full_extent_dst.write_band(1, dst_window_per_pixel_full_extent, window=window) - per_hectare_forest_extent_dst.write_band(1, dst_window_per_hectare_forest_extent, window=window) - per_pixel_forest_extent_dst.write_band(1, dst_window_per_pixel_forest_extent, window=window) - - uu.print_log(" Output tiles created for {}...".format(tile_id)) - - # Prints information about the tile that was just processed - uu.end_of_fx_summary(start, tile_id, output_patterns[0], no_upload) \ No newline at end of file diff --git a/analyses/derivative_outputs.py b/analyses/derivative_outputs.py new file mode 100644 index 00000000..ffe0056d --- /dev/null +++ b/analyses/derivative_outputs.py @@ -0,0 +1,314 @@ +""" +Final step of the flux model. This creates various derivative outputs which are used on the GFW platform and for +supplemental analyses. Derivative outputs for gross emissions, gross removals, and net flux at 0.00025x0.000025 deg +resolution for full model extent (all pixels included in mp_model_extent.py): +1. Full extent flux Mg per pixel at 0.00025x0.00025 deg (all pixels included in mp_model_extent.py) +2. Forest extent flux Mg per hectare at 0.00025x0.00025 deg (forest extent defined below) +3. Forest extent flux Mg per pixel at 0.00025x0.00025 deg (forest extent defined below) +4. Forest extent flux Mt at 0.04x0.04 deg (aggregated output, ~ 4x4 km at equator) +For sensitivity analyses only: +5. Percent difference between standard model and sensitivity analysis for aggregated map +6. Pixels with sign changes between standard model and sensitivity analysis for aggregated map + +The forest extent outputs are for sharing with partners because they limit the model to just the relevant pixels +(those within forests, as defined below). +Forest extent is defined in the methods section of Harris et al. 2021 Nature Climate Change: +within the model extent, pixels that have TCD>30 OR Hansen gain OR mangrove biomass. +More formally, forest extent is: +((TCD2000>30 AND WHRC AGB2000>0) OR Hansen gain=1 OR mangrove AGB2000>0) NOT IN pre-2000 plantations. +The WHRC AGB2000 condition was set in mp_model_extent.py, so it doesn't show up here. +""" + +import numpy as np +import os +import rasterio +from rasterio.transform import from_origin +import datetime +import sys + +import constants_and_names as cn +import universal_util as uu + + +def forest_extent_per_pixel_outputs(tile_id, input_pattern, output_patterns): + """ + Creates derivative outputs at 0.00025x0.00025 deg resolution + :param tile_id: tile to be processed, identified by its tile id + :param input_pattern: pattern for input tile + :param output_patterns: patterns for output tile names (list of patterns because three derivative outputs) + :return: Three tiles: full extent Mg per pixel, forest extent Mg per hectare, forest extent Mg per pixel + """ + + # start time + start = datetime.datetime.now() + + # Names of inputs + focal_tile = f'{tile_id}_{input_pattern}.tif' + pixel_area = f'{cn.pattern_pixel_area}_{tile_id}.tif' + tcd = f'{cn.pattern_tcd}_{tile_id}.tif' + gain = f'{tile_id}_{cn.pattern_gain_ec2}.tif' + mangrove = f'{tile_id}_{cn.pattern_mangrove_biomass_2000}.tif' + pre_2000_plantations = f'{tile_id}_{cn.pattern_plant_pre_2000}.tif' + + # Names of outputs. + # Requires that output patterns be listed in main script in the correct order for here + # (currently, per pixel full extent, per hectare forest extent, per pixel forest extent). + per_pixel_full_extent = f'{tile_id}_{output_patterns[0]}.tif' + per_hectare_forest_extent = f'{tile_id}_{output_patterns[1]}.tif' + per_pixel_forest_extent = f'{tile_id}_{output_patterns[2]}.tif' + + # Opens input tiles for rasterio + in_src = rasterio.open(focal_tile) + # Grabs metadata about the tif, like its location/projection/cellsize + kwargs = in_src.meta + # Grabs the windows of the tile (stripes) so we can iterate over the entire tif without running out of memory + windows = in_src.block_windows(1) + + pixel_area_src = rasterio.open(pixel_area) + tcd_src = rasterio.open(tcd) + + try: + gain_src = rasterio.open(gain) + uu.print_log(f' Gain tile found for {tile_id}') + except: + uu.print_log(f' Gain tile not found for {tile_id}') + + try: + mangrove_src = rasterio.open(mangrove) + uu.print_log(f' Mangrove tile found for {tile_id}') + except: + uu.print_log(f' Mangrove tile not found for {tile_id}') + + try: + pre_2000_plantations_src = rasterio.open(pre_2000_plantations) + uu.print_log(f' Pre-2000 plantation tile found for {tile_id}') + except rasterio.errors.RasterioIOError: + uu.print_log(f' Pre-2000 plantation tile not found for {tile_id}') + + uu.print_log(f' Creating outputs for {focal_tile}...') + + kwargs.update( + driver='GTiff', + count=1, + compress='DEFLATE', + nodata=0, + dtype='float32' + ) + + # Opens output tiles, giving them the arguments of the input tiles + per_pixel_full_extent_dst = rasterio.open(per_pixel_full_extent, 'w', **kwargs) + per_hectare_forest_extent_dst = rasterio.open(per_hectare_forest_extent, 'w', **kwargs) + per_pixel_forest_extent_dst = rasterio.open(per_pixel_forest_extent, 'w', **kwargs) + + # Adds metadata tags to the output rasters + uu.add_universal_metadata_rasterio(per_pixel_full_extent_dst) + per_pixel_full_extent_dst.update_tags( + units=f'Mg CO2e/pixel over model duration (2001-20{cn.loss_years})') + per_pixel_full_extent_dst.update_tags( + source='per hectare full model extent tile') + per_pixel_full_extent_dst.update_tags( + extent='Full model extent: ((TCD2000>0 AND WHRC AGB2000>0) OR Hansen gain=1 OR mangrove AGB2000>0)') + + uu.add_universal_metadata_rasterio(per_hectare_forest_extent_dst) + per_hectare_forest_extent_dst.update_tags( + units=f'Mg CO2e/hectare over model duration (2001-20{cn.loss_years})') + per_hectare_forest_extent_dst.update_tags( + source='per hectare full model extent tile') + per_hectare_forest_extent_dst.update_tags( + extent='Forest extent: ((TCD2000>30 AND WHRC AGB2000>0) OR Hansen gain=1 OR mangrove AGB2000>0) NOT IN pre-2000 plantations') + + uu.add_universal_metadata_rasterio(per_pixel_forest_extent_dst) + per_pixel_forest_extent_dst.update_tags( + units=f'Mg CO2e/pixel over model duration (2001-20{cn.loss_years})') + per_pixel_forest_extent_dst.update_tags( + source='per hectare forest model extent tile') + per_pixel_forest_extent_dst.update_tags( + extent='Forest extent: ((TCD2000>30 AND WHRC AGB2000>0) OR Hansen gain=1 OR mangrove AGB2000>0) NOT IN pre-2000 plantations') + + if "net_flux" in focal_tile: + per_pixel_full_extent_dst.update_tags( + scale='Negative values are net sinks. Positive values are net sources.') + per_hectare_forest_extent_dst.update_tags( + scale='Negative values are net sinks. Positive values are net sources.') + per_pixel_forest_extent_dst.update_tags( + scale='Negative values are net sinks. Positive values are net sources.') + + uu.check_memory() + + # Iterates across the windows of the input tiles + for idx, window in windows: + + # Creates windows for each input tile + in_window = in_src.read(1, window=window) + pixel_area_window = pixel_area_src.read(1, window=window) + tcd_window = tcd_src.read(1, window=window) + + try: + gain_window = gain_src.read(1, window=window) + except: + gain_window = np.zeros((window.height, window.width), dtype='uint8') + + try: + mangrove_window = mangrove_src.read(1, window=window) + except: + mangrove_window = np.zeros((window.height, window.width), dtype='uint8') + + try: + pre_2000_plantations_window = pre_2000_plantations_src.read(1, window=window) + except UnboundLocalError: + pre_2000_plantations_window = np.zeros((window.height, window.width), dtype=int) + + # Output window for per pixel full extent raster + dst_window_per_pixel_full_extent = in_window * pixel_area_window / cn.m2_per_ha + + # Output window for per hectare forest extent raster + # QCed this line before publication and then again afterwards in response to question from Lena Schulte-Uebbing at Wageningen Uni. + dst_window_per_hectare_forest_extent = \ + np.where(((tcd_window > cn.canopy_threshold) | (gain_window == 1) | (mangrove_window != 0)) & (pre_2000_plantations_window == 0), in_window, 0) + + # Output window for per pixel forest extent raster + dst_window_per_pixel_forest_extent = dst_window_per_hectare_forest_extent * pixel_area_window / cn.m2_per_ha + + # Writes arrays to output raster + per_pixel_full_extent_dst.write_band(1, dst_window_per_pixel_full_extent, window=window) + per_hectare_forest_extent_dst.write_band(1, dst_window_per_hectare_forest_extent, window=window) + per_pixel_forest_extent_dst.write_band(1, dst_window_per_pixel_forest_extent, window=window) + + uu.print_log(f' Output tiles created for {tile_id}...') + + # Prints information about the tile that was just processed + uu.end_of_fx_summary(start, tile_id, output_patterns[0]) + + +def aggregate_within_tile(tile_id, download_pattern_name): + """ + Aggregates 0.00025x0.00025 deg per pixel forest extent raster to 0.04x0.04 deg raster + :param tile_id: tile to be processed, identified by its tile id + :param download_pattern_name: pattern for input tile, in this case the forest extent per-pixel version + :return: Raster with values aggregated to Mt per 0.04x0.04 deg cells + """ + + # start time + start = datetime.datetime.now() + + # Name of inputs + focal_tile_rewindowed = f'{tile_id}_{download_pattern_name}_rewindow.tif' + + xmin, ymin, xmax, ymax = uu.coords(focal_tile_rewindowed) + + try: + in_src = rasterio.open(focal_tile_rewindowed) + uu.print_log(f' Tile found for {tile_id}. Rewindowing.') + except rasterio.errors.RasterioIOError: + uu.print_log(f' Tile not found for {tile_id}. Skipping tile.') + return + + # Grabs the windows of the tile (stripes) in order to iterate over the entire tif without running out of memory + windows = in_src.block_windows(1) + + # 2D array (250x250 cells) in which the 0.04x0.04 deg aggregated sums will be stored. + sum_array = np.zeros([int(cn.tile_width/cn.agg_pixel_window),int(cn.tile_width/cn.agg_pixel_window)], 'float32') + + out_raster = f'{tile_id}_{download_pattern_name}_{cn.agg_pixel_res_filename}deg.tif' + + uu.check_memory() + + # Iterates across the windows (160x160 30m pixels) of the input tile + for idx, window in windows: + + # Creates windows for each input tile + in_window = in_src.read(1, window=window) + + # Sums the pixels to create a total value for the 0.04x0.04 deg pixel + non_zero_pixel_sum = np.sum(in_window) + + # Stores the resulting value in the array + sum_array[idx[0], idx[1]] = non_zero_pixel_sum + + + # Converts the cumulative CO2 removals values to annualized CO2 in megatonnes and makes negative (because removals are negative) + # [0:15] limits the pattern to the part of the download_pattern_name shared by the full extent per-hectare version + # and the forest extent per-pixel version. It's hacky. + if cn.pattern_cumul_gain_AGCO2_BGCO2_all_types[0:15] in download_pattern_name: + sum_array = sum_array / cn.loss_years / cn.tonnes_to_megatonnes * -1 + + # Converts the cumulative gross emissions all gases CO2e values to annualized gross emissions CO2e in megatonnes. + # [0:15] limits the pattern to the part of the download_pattern_name shared by the full extent per-hectare version + # and the forest extent per-pixel version. It's hacky. + if cn.pattern_gross_emis_all_gases_all_drivers_biomass_soil[0:15] in download_pattern_name: + sum_array = sum_array / cn.loss_years / cn.tonnes_to_megatonnes + + # Converts the cumulative net flux CO2 values to annualized net flux CO2 in megatonnes. + # [0:15] limits the pattern to the part of the download_pattern_name shared by the full extent per-hectare version + # and the forest extent per-pixel version. It's hacky. + if cn.pattern_net_flux[0:15] in download_pattern_name: + sum_array = sum_array / cn.loss_years / cn.tonnes_to_megatonnes + + uu.print_log(f' Creating aggregated tile for {tile_id}...') + + # Converts array to the same output type as the raster that is created below + sum_array = np.float32(sum_array) + + # Creates a tile at 0.04x0.04 degree resolution (approximately 10x10 km in the tropics) where the values are + # from the 2D array created by rasterio above + # https://gis.stackexchange.com/questions/279953/numpy-array-to-gtiff-using-rasterio-without-source-raster + with rasterio.open(out_raster, 'w', + driver='GTiff', compress='DEFLATE', nodata='0', dtype='float32', count=1, + height=int(cn.tile_width/cn.agg_pixel_window), + width=int(cn.tile_width/cn.agg_pixel_window), + crs='EPSG:4326', + transform=from_origin(xmin,ymax,cn.agg_pixel_res,cn.agg_pixel_res)) as aggregated: + aggregated.write(sum_array, 1) + + # Prints information about the tile that was just processed + uu.end_of_fx_summary(start, tile_id, f'{download_pattern_name}_{cn.agg_pixel_res_filename}deg') + + +def aggregate_tiles(basic_pattern, per_pixel_forest_pattern): + """ + Aggregates all 0.04x0.04 deg resolution 10x10 deg tiles into a global 0.04x0.04 deg map + :param basic_pattern: pattern for per hectare full extent tiles (used as basis for aggregated output file name) + :param per_pixel_forest_pattern: pattern for per pixel forest extent tiles + :return: global aggregated 0.04x0.04 deg map with fluxes of Mt/year/pixel + """ + + # Makes a vrt of all the output 10x10 tiles (0.04 degree resolution) + out_vrt = f'{per_pixel_forest_pattern}_{cn.agg_pixel_res_filename}deg.vrt' + os.system(f'gdalbuildvrt -tr {str(cn.agg_pixel_res)} {str(cn.agg_pixel_res)} {out_vrt} *{per_pixel_forest_pattern}_{cn.agg_pixel_res_filename}deg.tif') + + # Creates the output name for the aggregated map + out_aggregated_pattern = uu.name_aggregated_output(basic_pattern) + uu.print_log(f'Aggregated raster pattern is {out_aggregated_pattern}') + + # Produces a single raster of all the 10x10 tiles + cmd = ['gdalwarp', '-t_srs', "EPSG:4326", '-overwrite', '-dstnodata', '0', '-co', 'COMPRESS=DEFLATE', + '-tr', str(cn.agg_pixel_res), str(cn.agg_pixel_res), + out_vrt, f'{out_aggregated_pattern}.tif'] + uu.log_subprocess_output_full(cmd) + + # Adds metadata tags to output rasters + uu.add_universal_metadata_gdal(f'{out_aggregated_pattern}.tif') + + # Units are different for annual removal factor, so metadata has to reflect that + if 'annual_removal_factor' in out_aggregated_pattern: + cmd = ['gdal_edit.py', + '-mo', f'units=Mg aboveground carbon/yr/pixel, where pixels are {cn.agg_pixel_res}x{cn.agg_pixel_res} degrees', + '-mo', + 'source=per hectare version of the same model output, aggregated from 0.00025x0.00025 degree pixels', + '-mo', 'extent=Global', + '-mo', 'scale=negative values are removals', + '-mo', + f'treecover_density_threshold={cn.canopy_threshold} (only model pixels with canopy cover > {cn.canopy_threshold} are included in aggregation', + f'{out_aggregated_pattern}.tif'] + uu.log_subprocess_output_full(cmd) + + else: + cmd = ['gdal_edit.py', + '-mo', f'units=Mg CO2e/yr/pixel, where pixels are {cn.agg_pixel_res}x{cn.agg_pixel_res} degrees', + '-mo', + 'source=per hectare version of the same model output, aggregated from 0.00025x0.00025 degree pixels', + '-mo', 'extent=Global', + '-mo', + f'treecover_density_threshold={cn.canopy_threshold} (only model pixels with canopy cover > {cn.canopy_threshold} are included in aggregation', + f'{out_aggregated_pattern}.tif'] + uu.log_subprocess_output_full(cmd) \ No newline at end of file diff --git a/analyses/download_tile_set.py b/analyses/download_tile_set.py index 9d174d37..b4d5930d 100644 --- a/analyses/download_tile_set.py +++ b/analyses/download_tile_set.py @@ -2,6 +2,9 @@ This script downloads the listed tiles and creates overviews for them for easy viewing in ArcMap. It must be run in the Docker container, and so tiles are downloaded to and overviewed in the folder of the Docker container where all other tiles are downloaded. + +python -m analyses.download_tile_set -t std -l 00N_000E +python -m analyses.download_tile_set -t std -l 00N_000E,00N_110E ''' import multiprocessing @@ -11,28 +14,34 @@ import datetime import argparse import glob -from subprocess import Popen, PIPE, STDOUT, check_call import os import sys -sys.path.append('../') + import constants_and_names as cn import universal_util as uu -def download_tile_set(sensit_type, tile_id_list): +def download_tile_set(tile_id_list): uu.print_log("Downloading all tiles for: ", tile_id_list) - wd = os.path.join(cn.docker_base_dir,"spot_download") + wd = os.path.join(cn.docker_tile_dir, "spot_download") os.chdir(wd) download_dict = { + cn.gain_dir: [cn.pattern_gain_data_lake], + cn.loss_dir: [cn.pattern_loss], + cn.tcd_dir: [cn.pattern_tcd], + cn.WHRC_biomass_2000_unmasked_dir: [cn.pattern_WHRC_biomass_2000_unmasked], + cn.plant_pre_2000_processed_dir: [cn.pattern_plant_pre_2000], + cn.model_extent_dir: [cn.pattern_model_extent], cn.age_cat_IPCC_dir: [cn.pattern_age_cat_IPCC], cn.annual_gain_AGB_IPCC_defaults_dir: [cn.pattern_annual_gain_AGB_IPCC_defaults], cn.annual_gain_BGB_IPCC_defaults_dir: [cn.pattern_annual_gain_BGB_IPCC_defaults], cn.stdev_annual_gain_AGB_IPCC_defaults_dir: [cn.pattern_stdev_annual_gain_AGB_IPCC_defaults], cn.removal_forest_type_dir: [cn.pattern_removal_forest_type], + cn.BGB_AGB_ratio_dir: [cn.pattern_BGB_AGB_ratio], cn.annual_gain_AGC_all_types_dir: [cn.pattern_annual_gain_AGC_all_types], cn.annual_gain_BGC_all_types_dir: [cn.pattern_annual_gain_BGC_all_types], cn.annual_gain_AGC_BGC_all_types_dir: [cn.pattern_annual_gain_AGC_BGC_all_types], @@ -40,7 +49,6 @@ def download_tile_set(sensit_type, tile_id_list): cn.gain_year_count_dir: [cn.pattern_gain_year_count], cn.cumul_gain_AGCO2_all_types_dir: [cn.pattern_cumul_gain_AGCO2_all_types], cn.cumul_gain_BGCO2_all_types_dir: [cn.pattern_cumul_gain_BGCO2_all_types], - cn.cumul_gain_AGCO2_BGCO2_all_types_dir: [cn.pattern_cumul_gain_AGCO2_BGCO2_all_types], cn.AGC_emis_year_dir: [cn.pattern_AGC_emis_year], cn.BGC_emis_year_dir: [cn.pattern_BGC_emis_year], cn.deadwood_emis_year_2000_dir: [cn.pattern_deadwood_emis_year_2000], @@ -60,6 +68,7 @@ def download_tile_set(sensit_type, tile_id_list): cn.gross_emis_non_co2_all_drivers_biomass_soil_dir: [cn.pattern_gross_emis_non_co2_all_drivers_biomass_soil], cn.gross_emis_nodes_biomass_soil_dir: [cn.pattern_gross_emis_nodes_biomass_soil], cn.net_flux_dir: [cn.pattern_net_flux], + cn.cumul_gain_AGCO2_BGCO2_all_types_dir: [cn.pattern_cumul_gain_AGCO2_BGCO2_all_types], cn.cumul_gain_AGCO2_BGCO2_all_types_per_pixel_full_extent_dir: [cn.pattern_cumul_gain_AGCO2_BGCO2_all_types_per_pixel_full_extent], cn.cumul_gain_AGCO2_BGCO2_all_types_forest_extent_dir: [cn.pattern_cumul_gain_AGCO2_BGCO2_all_types_forest_extent], cn.cumul_gain_AGCO2_BGCO2_all_types_per_pixel_forest_extent_dir: [cn.pattern_cumul_gain_AGCO2_BGCO2_all_types_per_pixel_forest_extent], @@ -75,9 +84,9 @@ def download_tile_set(sensit_type, tile_id_list): for key, values in download_dict.items(): dir = key pattern = values[0] - uu.s3_flexible_download(dir, pattern, wd, sensit_type, tile_id_list) + uu.s3_flexible_download(dir, pattern, wd, cn.SENSIT_TYPE, tile_id_list) - cmd = ['aws', 's3', 'cp', cn.output_aggreg_dir, wd, '--recursive'] + cmd = ['aws', 's3', 'cp', cn.output_aggreg_dir, wd] uu.log_subprocess_output_full(cmd) tile_list = glob.glob('*tif') @@ -103,21 +112,21 @@ def download_tile_set(sensit_type, tile_id_list): parser = argparse.ArgumentParser( description='Download model outputs for specific tile') parser.add_argument('--model-type', '-t', required=True, - help='{}'.format(cn.model_type_arg_help)) + help=f'{cn.model_type_arg_help}') parser.add_argument('--tile_id_list', '-l', required=True, help='List of tile ids to use in the model. Should be of form 00N_110E or 00N_110E,00N_120E or all.') - parser.add_argument('--run-date', '-d', required=False, - help='Date of run. Must be format YYYYMMDD.') args = parser.parse_args() - sensit_type = args.model_type + + # Sets global variables to the command line arguments + cn.SENSIT_TYPE = args.model_type + tile_id_list = args.tile_id_list - run_date = args.run_date # Create the output log - uu.initiate_log(tile_id_list=tile_id_list, sensit_type=sensit_type, run_date=run_date) + uu.initiate_log(tile_id_list) # Checks whether the sensitivity analysis and tile_id_list arguments are valid - uu.check_sensit_type(sensit_type) + uu.check_sensit_type(cn.SENSIT_TYPE) tile_id_list = uu.tile_id_list_check(tile_id_list) - download_tile_set(sensit_type=sensit_type, tile_id_list=tile_id_list) \ No newline at end of file + download_tile_set(tile_id_list) \ No newline at end of file diff --git a/analyses/mp_aggregate_results_to_4_km.py b/analyses/mp_aggregate_results_to_4_km.py deleted file mode 100644 index e8713e1b..00000000 --- a/analyses/mp_aggregate_results_to_4_km.py +++ /dev/null @@ -1,315 +0,0 @@ -''' -This script creates maps of model outputs at roughly 5km resolution (0.04x0.04 degrees), where each output pixel -represents the total value in the pixel (not the density) (hence, the aggregated results). -This is currently set up for annual removal rate, gross removals, gross emissions, and net flux. -It iterates through all the model outputs that are supplied. -The rewindowed pixel area tiles, tcd, Hansen gain, and mangrove biomass tiles must already be created and in s3 -(created using mp_rewindow_tiles.py). -First, this script rewindows the model output into 160x160 (0.04x0.04 degree) windows, instead of the native -40000x1 pixel windows. -Then it calculates the per pixel value for each model output pixel and sums those values within each 0.04x0.04 degree -aggregated pixel. -It converts emissions, removals, and net flux from totals over the model period to annual values. -For sensitivity analysis runs, it only processes outputs which actually have a sensitivity analysis version. -The user has to supply a tcd threshold for which forest pixels to include in the results. Defaults to cn.canopy_threshold. -For sensitivity analysis, the s3 folder with the aggregations for the standard model must be specified. -sample command: python mp_aggregate_results_to_4_km.py -tcd 30 -t no_shifting_ag -sagg s3://gfw2-data/climate/carbon_model/0_04deg_output_aggregation/biomass_soil/standard/20200901/net_flux_Mt_CO2e_biomass_soil_per_year_tcd30_0_4deg_modelv1_2_0_std_20200901.tif -''' - - -import multiprocessing -from subprocess import Popen, PIPE, STDOUT, check_call -from functools import partial -import datetime -import argparse -import os -import glob -import sys -sys.path.append('../') -import constants_and_names as cn -import universal_util as uu -sys.path.append(os.path.join(cn.docker_app,'analyses')) -import aggregate_results_to_4_km - - -def mp_aggregate_results_to_4_km(sensit_type, thresh, tile_id_list, std_net_flux = None, run_date = None, no_upload = None): - - os.chdir(cn.docker_base_dir) - - # Files to download for this script - download_dict = { - cn.annual_gain_AGC_all_types_dir: [cn.pattern_annual_gain_AGC_all_types], - cn.cumul_gain_AGCO2_BGCO2_all_types_dir: [cn.pattern_cumul_gain_AGCO2_BGCO2_all_types], - cn.gross_emis_all_gases_all_drivers_biomass_soil_dir: [cn.pattern_gross_emis_all_gases_all_drivers_biomass_soil], - cn.net_flux_dir: [cn.pattern_net_flux] - } - - # Checks whether the canopy cover argument is valid - if thresh < 0 or thresh > 99: - uu.exception_log(no_upload, 'Invalid tcd. Please provide an integer between 0 and 99.') - - - # Pixel area tiles-- necessary for calculating sum of pixels for any set of tiles - uu.s3_flexible_download(cn.pixel_area_rewindow_dir, cn.pattern_pixel_area_rewindow, cn.docker_base_dir, sensit_type, tile_id_list) - # Tree cover density, Hansen gain, and mangrove biomass tiles-- necessary for filtering sums to model extent - uu.s3_flexible_download(cn.tcd_rewindow_dir, cn.pattern_tcd_rewindow, cn.docker_base_dir, sensit_type, tile_id_list) - uu.s3_flexible_download(cn.gain_rewindow_dir, cn.pattern_gain_rewindow, cn.docker_base_dir, sensit_type, tile_id_list) - uu.s3_flexible_download(cn.mangrove_biomass_2000_rewindow_dir, cn.pattern_mangrove_biomass_2000_rewindow, cn.docker_base_dir, sensit_type, tile_id_list) - - uu.print_log("Model outputs to process are:", download_dict) - - # List of output directories. Modified later for sensitivity analysis. - # Output pattern is determined later. - output_dir_list = [cn.output_aggreg_dir] - - # If the model run isn't the standard one, the output directory is changed - if sensit_type != 'std': - uu.print_log("Changing output directory and file name pattern based on sensitivity analysis") - output_dir_list = uu.alter_dirs(sensit_type, output_dir_list) - - # A date can optionally be provided by the full model script or a run of this script. - # This replaces the date in constants_and_names. - # Only done if output upload is enabled. - if run_date is not None and no_upload is not None: - output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date) - - - # Iterates through the types of tiles to be processed - for dir, download_pattern in list(download_dict.items()): - - download_pattern_name = download_pattern[0] - - # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list - uu.s3_flexible_download(dir, download_pattern_name, cn.docker_base_dir, sensit_type, tile_id_list) - - - if tile_id_list == 'all': - # List of tiles to run in the model - tile_id_list = uu.tile_list_s3(dir, sensit_type) - - # Gets an actual tile id to use as a dummy in creating the actual tile pattern - local_tile_list = uu.tile_list_spot_machine(cn.docker_base_dir, download_pattern_name) - sample_tile_id = uu.get_tile_id(local_tile_list[0]) - - # Renames the tiles according to the sensitivity analysis before creating dummy tiles. - # The renaming function requires a whole tile name, so this passes a dummy time name that is then stripped a few - # lines later. - tile_id = sample_tile_id # a dummy tile id (but it has to be a real tile id). It is removed later. - output_pattern = uu.sensit_tile_rename(sensit_type, tile_id, download_pattern_name) - pattern = output_pattern[9:-4] - - # For sensitivity analysis runs, only aggregates the tiles if they were created as part of the sensitivity analysis - if (sensit_type != 'std') & (sensit_type not in pattern): - uu.print_log("{} not a sensitivity analysis output. Skipping aggregation...".format(pattern) + "\n") - - continue - - # Lists the tiles of the particular type that is being iterated through. - # Excludes all intermediate files - tile_list = uu.tile_list_spot_machine(".", "{}.tif".format(pattern)) - # from https://stackoverflow.com/questions/12666897/removing-an-item-from-list-matching-a-substring - tile_list = [i for i in tile_list if not ('hanson_2013' in i)] - tile_list = [i for i in tile_list if not ('rewindow' in i)] - tile_list = [i for i in tile_list if not ('0_04deg' in i)] - tile_list = [i for i in tile_list if not ('.ovr' in i)] - - # tile_list = ['00N_070W_cumul_gain_AGCO2_BGCO2_t_ha_all_forest_types_2001_15_biomass_swap.tif'] # test tiles - - uu.print_log("There are {0} tiles to process for pattern {1}".format(str(len(tile_list)), download_pattern_name) + "\n") - uu.print_log("Processing:", dir, "; ", pattern) - - # Converts the 10x10 degree Hansen tiles that are in windows of 40000x1 pixels to windows of 160x160 pixels, - # which is the resolution of the output tiles. This will allow the 30x30 m pixels in each window to be summed. - if cn.count == 96: - if sensit_type == 'biomass_swap': - processes = 12 # 12 processors = XXX GB peak - else: - processes = 16 # 16 processors = XXX GB peak - else: - processes = 8 - uu.print_log('Rewindow max processors=', processes) - pool = multiprocessing.Pool(processes) - pool.map(partial(uu.rewindow, download_pattern_name=download_pattern_name, no_upload=no_upload), tile_id_list) - # Added these in response to error12: Cannot allocate memory error. - # This fix was mentioned here: of https://stackoverflow.com/questions/26717120/python-cannot-allocate-memory-using-multiprocessing-pool - # Could also try this: https://stackoverflow.com/questions/42584525/python-multiprocessing-debugging-oserror-errno-12-cannot-allocate-memory - pool.close() - pool.join() - - # # For single processor use - # for tile_id in tile_id_list: - # - # uu.rewindow(tile_id, download_pattern_name,no_upload) - - - # Converts the existing (per ha) values to per pixel values (e.g., emissions/ha to emissions/pixel) - # and sums those values in each 160x160 pixel window. - # The sum for each 160x160 pixel window is stored in a 2D array, which is then converted back into a raster at - # 0.04x0.04 degree resolution (approximately 10m in the tropics). - # Each pixel in that raster is the sum of the 30m pixels converted to value/pixel (instead of value/ha). - # The 0.04x0.04 degree tile is output. - # For multiprocessor use. This used about 450 GB of memory with count/2, it's okay on an r4.16xlarge - if cn.count == 96: - if sensit_type == 'biomass_swap': - processes = 10 # 10 processors = XXX GB peak - else: - processes = 12 # 16 processors = 180 GB peak; 16 = XXX GB peak; 20 = >750 GB (maxed out) - else: - processes = 8 - uu.print_log('Conversion to per pixel and aggregate max processors=', processes) - pool = multiprocessing.Pool(processes) - pool.map(partial(aggregate_results_to_4_km.aggregate, thresh=thresh, sensit_type=sensit_type, - no_upload=no_upload), tile_list) - pool.close() - pool.join() - - # # For single processor use - # for tile in tile_list: - # - # aggregate_results_to_4_km.aggregate(tile, thresh, sensit_type, no_upload) - - # Makes a vrt of all the output 10x10 tiles (10 km resolution) - out_vrt = "{}_0_04deg.vrt".format(pattern) - os.system('gdalbuildvrt -tr 0.04 0.04 {0} *{1}_0_04deg*.tif'.format(out_vrt, pattern)) - - # Creates the output name for the 10km map - out_pattern = uu.name_aggregated_output(download_pattern_name, thresh, sensit_type) - uu.print_log(out_pattern) - - # Produces a single raster of all the 10x10 tiles (0.04 degree resolution) - cmd = ['gdalwarp', '-t_srs', "EPSG:4326", '-overwrite', '-dstnodata', '0', '-co', 'COMPRESS=DEFLATE', - '-tr', '0.04', '0.04', - out_vrt, '{}.tif'.format(out_pattern)] - uu.log_subprocess_output_full(cmd) - - - # Adds metadata tags to output rasters - uu.add_universal_metadata_tags('{0}.tif'.format(out_pattern), sensit_type) - - # Units are different for annual removal factor, so metadata has to reflect that - if 'annual_removal_factor' in out_pattern: - cmd = ['gdal_edit.py', - '-mo', 'units=Mg aboveground carbon/yr/pixel, where pixels are 0.04x0.04 degrees', - '-mo', 'source=per hectare version of the same model output, aggregated from 0.00025x0.00025 degree pixels', - '-mo', 'extent=Global', - '-mo', 'scale=negative values are removals', - '-mo', 'treecover_density_threshold={0} (only model pixels with canopy cover > {0} are included in aggregation'.format(thresh), - '{0}.tif'.format(out_pattern)] - uu.log_subprocess_output_full(cmd) - - else: - cmd = ['gdal_edit.py', - '-mo', 'units=Mg CO2e/yr/pixel, where pixels are 0.04x0.04 degrees', - '-mo', 'source=per hectare version of the same model output, aggregated from 0.00025x0.00025 degree pixels', - '-mo', 'extent=Global', - '-mo', 'treecover_density_threshold={0} (only model pixels with canopy cover > {0} are included in aggregation'.format(thresh), - '{0}.tif'.format(out_pattern)] - uu.log_subprocess_output_full(cmd) - - - # If no_upload flag is not activated (by choice or by lack of AWS credentials), output is uploaded - if not no_upload: - - uu.print_log("Tiles processed. Uploading to s3 now...") - uu.upload_final_set(output_dir_list[0], out_pattern) - - # Cleans up the folder before starting on the next raster type - vrtList = glob.glob('*vrt') - for vrt in vrtList: - os.remove(vrt) - - for tile_name in tile_list: - tile_id = uu.get_tile_id(tile_name) - os.remove('{0}_{1}_rewindow.tif'.format(tile_id, pattern)) - os.remove('{0}_{1}_0_04deg.tif'.format(tile_id, pattern)) - - # Need to delete rewindowed tiles so they aren't confused with the normal tiles for creation of supplementary outputs - rewindow_list = glob.glob('*rewindow*tif') - for rewindow_tile in rewindow_list: - os.remove(rewindow_tile) - uu.print_log("Deleted all rewindowed tiles") - - - # Compares the net flux from the standard model and the sensitivity analysis in two ways. - # This does not work for compariing the raw outputs of the biomass_swap and US_removals sensitivity models because their - # extents are different from the standard model's extent (tropics and US tiles vs. global). - # Thus, in order to do this comparison, you need to clip the standard model net flux and US_removals net flux to - # the outline of the US and clip the standard model net flux to the extent of JPL AGB2000. - # Then, manually upload the clipped US_removals and biomass_swap net flux rasters to the spot machine and the - # code below should work. - if sensit_type not in ['std', 'biomass_swap', 'US_removals', 'legal_Amazon_loss']: - - if std_net_flux: - - uu.print_log("Standard aggregated flux results provided. Creating comparison maps.") - - # Copies the standard model aggregation outputs to s3. Only net flux is used, though. - uu.s3_file_download(std_net_flux, cn.docker_base_dir, sensit_type) - - # Identifies the standard model net flux map - std_aggreg_flux = os.path.split(std_net_flux)[1] - - try: - # Identifies the sensitivity model net flux map - sensit_aggreg_flux = glob.glob('net_flux_Mt_CO2e_*{}*'.format(sensit_type))[0] - - uu.print_log("Standard model net flux:", std_aggreg_flux) - uu.print_log("Sensitivity model net flux:", sensit_aggreg_flux) - - except: - uu.print_log('Cannot do comparison. One of the input flux tiles is not valid. Verify that both net flux rasters are on the spot machine.') - - uu.print_log("Creating map of percent difference between standard and {} net flux".format(sensit_type)) - aggregate_results_to_4_km.percent_diff(std_aggreg_flux, sensit_aggreg_flux, sensit_type, no_upload) - - uu.print_log("Creating map of which pixels change sign and which stay the same between standard and {}".format(sensit_type)) - aggregate_results_to_4_km.sign_change(std_aggreg_flux, sensit_aggreg_flux, sensit_type, no_upload) - - # If no_upload flag is not activated (by choice or by lack of AWS credentials), output is uploaded - if not no_upload: - - uu.upload_final_set(output_dir_list[0], cn.pattern_aggreg_sensit_perc_diff) - uu.upload_final_set(output_dir_list[0], cn.pattern_aggreg_sensit_sign_change) - - else: - - uu.print_log("No standard aggregated flux results provided. Not creating comparison maps.") - - -if __name__ == '__main__': - - # The argument for what kind of model run is being done: standard conditions or a sensitivity analysis run - parser = argparse.ArgumentParser( - description='Create maps of model outputs at aggregated/coarser resolution') - parser.add_argument('--model-type', '-t', required=True, - help='{}'.format(cn.model_type_arg_help)) - parser.add_argument('--tile_id_list', '-l', required=True, - help='List of tile ids to use in the model. Should be of form 00N_110E or 00N_110E,00N_120E or all.') - parser.add_argument('--tcd-threshold', '-tcd', required=False, default=cn.canopy_threshold, - help='Tree cover density threshold above which pixels will be included in the aggregation. Default is 30.') - parser.add_argument('--std-net-flux-aggreg', '-sagg', required=False, - help='The s3 standard model net flux aggregated tif, for comparison with the sensitivity analysis map') - parser.add_argument('--no-upload', '-nu', action='store_true', - help='Disables uploading of outputs to s3') - args = parser.parse_args() - sensit_type = args.model_type - tile_id_list = args.tile_id_list - std_net_flux = args.std_net_flux_aggreg - thresh = args.tcd_threshold - thresh = int(thresh) - no_upload = args.no_upload - - # Disables upload to s3 if no AWS credentials are found in environment - if not uu.check_aws_creds(): - no_upload = True - - # Create the output log - uu.initiate_log(tile_id_list=tile_id_list, sensit_type=sensit_type, thresh=thresh, std_net_flux=std_net_flux, - no_upload=no_upload) - - # Checks whether the sensitivity analysis and tile_id_list arguments are valid - uu.check_sensit_type(sensit_type) - tile_id_list = uu.tile_id_list_check(tile_id_list) - - mp_aggregate_results_to_4_km(sensit_type=sensit_type, tile_id_list=tile_id_list, thresh=thresh, - std_net_flux=std_net_flux, no_upload=no_upload) \ No newline at end of file diff --git a/analyses/mp_create_supplementary_outputs.py b/analyses/mp_create_supplementary_outputs.py deleted file mode 100644 index e08892d2..00000000 --- a/analyses/mp_create_supplementary_outputs.py +++ /dev/null @@ -1,215 +0,0 @@ -''' -Script to create three supplementary tiled outputs for each main model output (gross emissions, gross removals, net flux), -which are already in per hectare values for full model extent: -1. per pixel values for full model extent (all pixels included in model extent) -2. per hectare values for forest extent (within the model extent, pixels that have TCD>30 OR Hansen gain OR mangrove biomass) -3. per pixel values for forest extent -The forest extent outputs are for sharing with partners because they limit the model to just the relevant pixels -(those within forests). -Forest extent is defined in the methods section of Harris et al. 2021 Nature Climate Change. -It is roughly implemented in mp_model_extent.py but using TCD>0 rather thant TCD>30. Here, the TCD>30 requirement -is implemented instead as a subset of the full model extent pixels. -Forest extent is: ((TCD2000>30 AND WHRC AGB2000>0) OR Hansen gain=1 OR mangrove AGB2000>0) NOT IN pre-2000 plantations. -The WHRC AGB2000 and pre-2000 plantations conditions were set in mp_model_extent.py, so they don't show up here. -''' - - -import multiprocessing -from subprocess import Popen, PIPE, STDOUT, check_call -from functools import partial -import datetime -import argparse -import os -import glob -import sys -sys.path.append('../') -import constants_and_names as cn -import universal_util as uu -sys.path.append(os.path.join(cn.docker_app,'analyses')) -import create_supplementary_outputs - -def mp_create_supplementary_outputs(sensit_type, tile_id_list, run_date = None, no_upload = None): - - os.chdir(cn.docker_base_dir) - - tile_id_list_outer = tile_id_list - - # If a full model run is specified, the correct set of tiles for the particular script is listed - if tile_id_list_outer == 'all': - # List of tiles to run in the model - tile_id_list_outer = uu.tile_list_s3(cn.net_flux_dir, sensit_type) - - uu.print_log(tile_id_list_outer) - uu.print_log("There are {} tiles to process".format(str(len(tile_id_list_outer))) + "\n") - - - # Files to download for this script - download_dict = { - cn.cumul_gain_AGCO2_BGCO2_all_types_dir: [cn.pattern_cumul_gain_AGCO2_BGCO2_all_types], - cn.gross_emis_all_gases_all_drivers_biomass_soil_dir: [cn.pattern_gross_emis_all_gases_all_drivers_biomass_soil], - cn.net_flux_dir: [cn.pattern_net_flux] - } - - # List of output directories and output file name patterns. - # Outputs must be in the same order as the download dictionary above, and then follow the same order for all outputs. - # Currently, it's: per pixel full extent, per hectare forest extent, per pixel forest extent. - output_dir_list = [ - cn.cumul_gain_AGCO2_BGCO2_all_types_per_pixel_full_extent_dir, - cn.cumul_gain_AGCO2_BGCO2_all_types_forest_extent_dir, - cn.cumul_gain_AGCO2_BGCO2_all_types_per_pixel_forest_extent_dir, - cn.gross_emis_all_gases_all_drivers_biomass_soil_per_pixel_full_extent_dir, - cn.gross_emis_all_gases_all_drivers_biomass_soil_forest_extent_dir, - cn.gross_emis_all_gases_all_drivers_biomass_soil_per_pixel_forest_extent_dir, - cn.net_flux_per_pixel_full_extent_dir, - cn.net_flux_forest_extent_dir, - cn.net_flux_per_pixel_forest_extent_dir] - output_pattern_list = [ - cn.pattern_cumul_gain_AGCO2_BGCO2_all_types_per_pixel_full_extent, - cn.pattern_cumul_gain_AGCO2_BGCO2_all_types_forest_extent, - cn.pattern_cumul_gain_AGCO2_BGCO2_all_types_per_pixel_forest_extent, - cn.pattern_gross_emis_all_gases_all_drivers_biomass_soil_per_pixel_full_extent, - cn.pattern_gross_emis_all_gases_all_drivers_biomass_soil_forest_extent, - cn.pattern_gross_emis_all_gases_all_drivers_biomass_soil_per_pixel_forest_extent, - cn.pattern_net_flux_per_pixel_full_extent, - cn.pattern_net_flux_forest_extent, - cn.pattern_net_flux_per_pixel_forest_extent - ] - - # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list - # Pixel area tiles-- necessary for calculating per pixel values - uu.s3_flexible_download(cn.pixel_area_dir, cn.pattern_pixel_area, cn.docker_base_dir, sensit_type, tile_id_list_outer) - # Tree cover density, Hansen gain, and mangrove biomass tiles-- necessary for masking to forest extent - uu.s3_flexible_download(cn.tcd_dir, cn.pattern_tcd, cn.docker_base_dir, sensit_type, tile_id_list_outer) - uu.s3_flexible_download(cn.gain_dir, cn.pattern_gain, cn.docker_base_dir, sensit_type, tile_id_list_outer) - uu.s3_flexible_download(cn.mangrove_biomass_2000_dir, cn.pattern_mangrove_biomass_2000, cn.docker_base_dir, sensit_type, tile_id_list_outer) - - uu.print_log("Model outputs to process are:", download_dict) - - # If the model run isn't the standard one, the output directory is changed - if sensit_type != 'std': - uu.print_log("Changing output directory and file name pattern based on sensitivity analysis") - output_dir_list = uu.alter_dirs(sensit_type, output_dir_list) - - # A date can optionally be provided by the full model script or a run of this script. - # This replaces the date in constants_and_names. - # Only done if output upload is enabled. - if run_date is not None and no_upload is not None: - output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date) - - - # Iterates through input tile sets - for key, values in download_dict.items(): - - # Sets the directory and pattern for the input being processed - input_dir = key - input_pattern = values[0] - - # If a full model run is specified, the correct set of tiles for the particular script is listed. - # A new list is named so that tile_id_list stays as the command line argument. - if tile_id_list == 'all': - # List of tiles to run in the model - tile_id_list_input = uu.tile_list_s3(input_dir, sensit_type) - else: - tile_id_list_input = tile_id_list_outer - - uu.print_log(tile_id_list_input) - uu.print_log("There are {} tiles to process".format(str(len(tile_id_list_input))) + "\n") - - # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list - uu.print_log("Downloading tiles from", input_dir) - uu.s3_flexible_download(input_dir, input_pattern, cn.docker_base_dir, sensit_type, tile_id_list_input) - - # Blank list of output patterns, populated below - output_patterns = [] - - # Matches the output patterns with the input pattern. - # This requires that the output patterns be grouped by input pattern and be in the order described in - # the comment above. - if "gross_removals" in input_pattern: - output_patterns = output_pattern_list[0:3] - elif "gross_emis" in input_pattern: - output_patterns = output_pattern_list[3:6] - elif "net_flux" in input_pattern: - output_patterns = output_pattern_list[6:9] - else: - uu.exception_log(no_upload, "No output patterns found for input pattern. Please check.") - - uu.print_log("Input pattern:", input_pattern) - uu.print_log("Output patterns:", output_patterns) - - # Gross removals: 20 processors = >740 GB peak; 15 = 570 GB peak; 17 = 660 GB peak; 18 = 670 GB peak - # Gross emissions: 17 processors = 660 GB peak; 18 = 710 GB peak - if cn.count == 96: - processes = 18 - else: - processes = 2 - uu.print_log("Creating derivative outputs for {0} with {1} processors...".format(input_pattern, processes)) - pool = multiprocessing.Pool(processes) - pool.map(partial(create_supplementary_outputs.create_supplementary_outputs, input_pattern=input_pattern, - output_patterns=output_patterns, sensit_type=sensit_type, no_upload=no_upload), tile_id_list_input) - pool.close() - pool.join() - - # # For single processor use - # for tile_id in tile_id_list_input: - # create_supplementary_outputs.create_supplementary_outputs(tile_id, input_pattern, output_patterns, sensit_type, no_upload) - - # Checks the two forest extent output tiles created from each input tile for whether there is data in them. - # Because the extent is restricted in the forest extent pixels, some tiles with pixels in the full extent - # version may not have pixels in the forest extent version. - for output_pattern in output_patterns[1:3]: - if cn.count <= 2: # For local tests - processes = 1 - uu.print_log("Checking for empty tiles of {0} pattern with {1} processors using light function...".format(output_pattern, processes)) - pool = multiprocessing.Pool(processes) - pool.map(partial(uu.check_and_delete_if_empty_light, output_pattern=output_pattern), tile_id_list_input) - pool.close() - pool.join() - else: - processes = 55 # 50 processors = 560 GB peak for gross removals; 55 = XXX GB peak - uu.print_log("Checking for empty tiles of {0} pattern with {1} processors...".format(output_pattern, processes)) - pool = multiprocessing.Pool(processes) - pool.map(partial(uu.check_and_delete_if_empty, output_pattern=output_pattern), tile_id_list_input) - pool.close() - pool.join() - - - # If no_upload flag is not activated (by choice or by lack of AWS credentials), output is uploaded - if not no_upload: - - for i in range(0, len(output_dir_list)): - uu.upload_final_set(output_dir_list[i], output_pattern_list[i]) - - -if __name__ == '__main__': - - # The argument for what kind of model run is being done: standard conditions or a sensitivity analysis run - parser = argparse.ArgumentParser( - description='Create tiles of model outputs at forest extent and per-pixel values') - parser.add_argument('--model-type', '-t', required=True, - help='{}'.format(cn.model_type_arg_help)) - parser.add_argument('--tile_id_list', '-l', required=True, - help='List of tile ids to use in the model. Should be of form 00N_110E or 00N_110E,00N_120E or all.') - parser.add_argument('--run-date', '-d', required=False, - help='Date of run. Must be format YYYYMMDD.') - parser.add_argument('--no-upload', '-nu', action='store_true', - help='Disables uploading of outputs to s3') - args = parser.parse_args() - sensit_type = args.model_type - tile_id_list = args.tile_id_list - run_date = args.run_date - no_upload = args.no_upload - - # Disables upload to s3 if no AWS credentials are found in environment - if not uu.check_aws_creds(): - no_upload = True - - # Create the output log - uu.initiate_log(tile_id_list=tile_id_list, sensit_type=sensit_type, run_date=run_date, no_upload=no_upload) - - # Checks whether the sensitivity analysis and tile_id_list arguments are valid - uu.check_sensit_type(sensit_type) - tile_id_list = uu.tile_id_list_check(tile_id_list) - - mp_create_supplementary_outputs(sensit_type=sensit_type, tile_id_list=tile_id_list, - run_date=run_date, no_upload=no_upload) \ No newline at end of file diff --git a/analyses/mp_derivative_outputs.py b/analyses/mp_derivative_outputs.py new file mode 100644 index 00000000..64fcd654 --- /dev/null +++ b/analyses/mp_derivative_outputs.py @@ -0,0 +1,362 @@ +""" +Final step of the flux model. This creates various derivative outputs which are used on the GFW platform and for +supplemental analyses. Derivative outputs for gross emissions, gross removals, and net flux at 0.00025x0.000025 deg +resolution for full model extent (all pixels included in mp_model_extent.py): +1. Full extent flux Mg per pixel at 0.00025x0.00025 deg (all pixels included in mp_model_extent.py) +2. Forest extent flux Mg per hectare at 0.00025x0.00025 deg (forest extent defined below) +3. Forest extent flux Mg per pixel at 0.00025x0.00025 deg (forest extent defined below) +4. Forest extent flux Mt at 0.04x0.04 deg (aggregated output, ~ 4x4 km at equator) +For sensitivity analyses only: +5. Percent difference between standard model and sensitivity analysis for aggregated map +6. Pixels with sign changes between standard model and sensitivity analysis for aggregated map + +The forest extent outputs are for sharing with partners because they limit the model to just the relevant pixels +(those within forests, as defined below). +Forest extent is defined in the methods section of Harris et al. 2021 Nature Climate Change: +within the model extent, pixels that have TCD>30 OR Hansen gain OR mangrove biomass. +More formally, forest extent is: +((TCD2000>30 AND WHRC AGB2000>0) OR Hansen gain=1 OR mangrove AGB2000>0) NOT IN pre-2000 plantations. +The WHRC AGB2000 condition was set in mp_model_extent.py, so it doesn't show up here. + +python -m analyses.mp_derivative_outputs -t std -l 00N_000E -nu +python -m analyses.mp_derivative_outputs -t std -l all +""" + +import multiprocessing +from functools import partial +import datetime +import argparse +import os +import glob +import sys + +import constants_and_names as cn +import universal_util as uu + +from . import derivative_outputs + +def mp_derivative_outputs(tile_id_list): + """ + :param tile_id_list: list of tile ids to process + :return: derivative outputs at native and aggregated resolution for emissions, removals, and net flux + """ + + os.chdir(cn.docker_tile_dir) + + # Keeps tile_id_list as its own variable for referencing in the tile set for loop + tile_id_list_outer = tile_id_list + + # If a full model run is specified, the correct set of tiles for the particular script is listed + if tile_id_list_outer == 'all': + # List of tiles to run in the model + tile_id_list_outer = uu.tile_list_s3(cn.net_flux_dir, cn.SENSIT_TYPE) + + uu.print_log(tile_id_list_outer) + uu.print_log(f'There are {str(len(tile_id_list_outer))} tiles to process', "\n") + + # Tile sets to be processed for this script. The three main outputs from the model. + download_dict = { + cn.cumul_gain_AGCO2_BGCO2_all_types_dir: [cn.pattern_cumul_gain_AGCO2_BGCO2_all_types], + cn.gross_emis_all_gases_all_drivers_biomass_soil_dir: [cn.pattern_gross_emis_all_gases_all_drivers_biomass_soil], + cn.net_flux_dir: [cn.pattern_net_flux] + } + + uu.print_log(f'Model outputs to process are: {download_dict}') + + # List of output directories and output file name patterns. + # Outputs must be in the same order as the download dictionary above, and then follow the following order for all outputs: + # per pixel full extent, per hectare forest extent, per pixel forest extent. + # Aggregated output comes at the end. + output_dir_list = [ + cn.cumul_gain_AGCO2_BGCO2_all_types_per_pixel_full_extent_dir, + cn.cumul_gain_AGCO2_BGCO2_all_types_forest_extent_dir, + cn.cumul_gain_AGCO2_BGCO2_all_types_per_pixel_forest_extent_dir, + cn.gross_emis_all_gases_all_drivers_biomass_soil_per_pixel_full_extent_dir, + cn.gross_emis_all_gases_all_drivers_biomass_soil_forest_extent_dir, + cn.gross_emis_all_gases_all_drivers_biomass_soil_per_pixel_forest_extent_dir, + cn.net_flux_per_pixel_full_extent_dir, + cn.net_flux_forest_extent_dir, + cn.net_flux_per_pixel_forest_extent_dir, + cn.output_aggreg_dir] + output_pattern_list = [ + cn.pattern_cumul_gain_AGCO2_BGCO2_all_types_per_pixel_full_extent, + cn.pattern_cumul_gain_AGCO2_BGCO2_all_types_forest_extent, + cn.pattern_cumul_gain_AGCO2_BGCO2_all_types_per_pixel_forest_extent, + cn.pattern_gross_emis_all_gases_all_drivers_biomass_soil_per_pixel_full_extent, + cn.pattern_gross_emis_all_gases_all_drivers_biomass_soil_forest_extent, + cn.pattern_gross_emis_all_gases_all_drivers_biomass_soil_per_pixel_forest_extent, + cn.pattern_net_flux_per_pixel_full_extent, + cn.pattern_net_flux_forest_extent, + cn.pattern_net_flux_per_pixel_forest_extent, + f'tcd{cn.canopy_threshold}_{cn.pattern_aggreg}'] + + # If the model run isn't the standard one, the output directory is changed + if cn.SENSIT_TYPE != 'std': + uu.print_log('Changing output directory and file name pattern based on sensitivity analysis') + output_dir_list = uu.alter_dirs(cn.SENSIT_TYPE, output_dir_list) + + # A date can optionally be provided by the full model script or a run of this script. + # This replaces the date in constants_and_names. + # Only done if output upload is enabled. + if cn.RUN_DATE is not None and cn.NO_UPLOAD is not None: + output_dir_list = uu.replace_output_dir_date(output_dir_list, cn.RUN_DATE) + + # Pixel area tiles-- necessary for calculating per pixel values + uu.s3_flexible_download(cn.pixel_area_dir, cn.pattern_pixel_area, cn.docker_tile_dir, cn.SENSIT_TYPE, tile_id_list_outer) + # Tree cover density, Hansen gain, and mangrove biomass tiles-- necessary for masking to forest extent + uu.s3_flexible_download(cn.tcd_dir, cn.pattern_tcd, cn.docker_tile_dir, cn.SENSIT_TYPE, tile_id_list_outer) + uu.s3_flexible_download(cn.gain_dir, cn.pattern_gain_data_lake, cn.docker_tile_dir, cn.SENSIT_TYPE, tile_id_list_outer) + uu.s3_flexible_download(cn.mangrove_biomass_2000_dir, cn.pattern_mangrove_biomass_2000, cn.docker_tile_dir, cn.SENSIT_TYPE, tile_id_list_outer) + uu.s3_flexible_download(cn.plant_pre_2000_processed_dir, cn.pattern_plant_pre_2000, cn.docker_tile_dir, cn.SENSIT_TYPE, tile_id_list_outer) + + # Iterates through the types of tiles to be processed + for input_dir, download_pattern_name in download_dict.items(): + + # Pattern for tile set being processed + input_pattern = download_pattern_name[0] + + # If a full model run is specified, the correct set of tiles for the particular script is listed. + # A new list is named so that tile_id_list stays as the command line argument. + if tile_id_list == 'all': + # List of tiles to run in the model + tile_id_list_inner = uu.tile_list_s3(input_dir, cn.SENSIT_TYPE) + else: + tile_id_list_inner = tile_id_list_outer + + uu.print_log(tile_id_list_inner) + uu.print_log(f'There are {str(len(tile_id_list_inner))} tiles to process for pattern {input_pattern}', "\n") + uu.print_log(f'Processing: {input_dir}; {input_pattern}') + + # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list + uu.print_log(f'Downloading tiles from {input_dir}') + uu.s3_flexible_download(input_dir, input_pattern, cn.docker_tile_dir, cn.SENSIT_TYPE, tile_id_list_inner) + + # Blank list of output patterns, populated below + output_patterns = [] + + # Matches the output patterns with the input pattern. + # This requires that the output patterns be grouped by input pattern and be in the order described in + # the comment above. + if "gross_removals" in input_pattern: + output_patterns = output_pattern_list[0:3] + elif "gross_emis" in input_pattern: + output_patterns = output_pattern_list[3:6] + elif "net_flux" in input_pattern: + output_patterns = output_pattern_list[6:9] + else: + uu.exception_log('No output patterns found for input pattern. Please check.') + + + ### STEP 1: Creates the full extent per-pixel, forest extent per hectare + ### and forest extent per pixel 0.00025x0.00025 deg derivative outputs + uu.print_log("STEP 1: Creating derivative per-pixel and forest extent outputs") + uu.print_log(f'Input pattern: {input_pattern}') + uu.print_log(f'Output patterns: {output_patterns}') + + if cn.SINGLE_PROCESSOR: + for tile_id in tile_id_list_inner: + derivative_outputs.forest_extent_per_pixel_outputs(tile_id, input_pattern, output_patterns) + else: + # 18 = >740 GB peak; 15=XXX GB peak + if cn.count == 96: + processes = 15 + else: + processes = 2 + uu.print_log(f'Creating derivative outputs for {input_pattern} with {processes} processors...') + pool = multiprocessing.Pool(processes) + pool.map(partial(derivative_outputs.forest_extent_per_pixel_outputs, input_pattern=input_pattern, + output_patterns=output_patterns), + tile_id_list_inner) + pool.close() + pool.join() + + + ### STEP 2: Converts the forest extent 10x10 degree Hansen tiles that + ### are in windows of 40000x1 pixels to windows of 160x160 pixels. + ### This will allow the 0.00025x0.00025 deg pixels in each window to be summed into the aggregated pixels + ### in the next step. + uu.print_log("STEP 2: Rewindow tiles") + + # The forest extent per-pixel pattern for that model output. This derivative output is used for aggregation + # because aggregation is just for forest extent and sums the per-pixel values within each aggregated pixel. + download_pattern_name = output_patterns[2] + + if cn.SINGLE_PROCESSOR: + for tile_id in tile_id_list_inner: + uu.rewindow(tile_id, download_pattern_name) + else: + if cn.count == 96: + if cn.SENSIT_TYPE == 'biomass_swap': + processes = 12 # 12 processors = XXX GB peak + else: + processes = 14 # 14 processors = XXX GB peak + else: + processes = 8 + uu.print_log(f'Rewindow max processors= {processes}') + pool = multiprocessing.Pool(processes) + pool.map(partial(uu.rewindow, download_pattern_name=download_pattern_name), + tile_id_list_inner) + pool.close() + pool.join() + + + ### STEP 3: Aggregates the rewindowed per-pixel values in each 160x160 window. + ### The sum for each 160x160 pixel window is stored in a 2D array, which is then converted back into a raster at + ### 0.04x0.04 degree resolution . + ### Each aggregated pixel in this raster is the sum of the forest extent 0.00025x0.00025 deg per-pixel maps. + ### 10x10 deg tiles at 0.04x0.04 deg resolution are output. + uu.print_log("STEP 3: Aggregate pixels within tiles") + + if cn.SINGLE_PROCESSOR: + for tile_id in tile_id_list_inner: + derivative_outputs.aggregate_within_tile(tile_id, download_pattern_name) + else: + if cn.count == 96: + if cn.SENSIT_TYPE == 'biomass_swap': + processes = 10 # 10 processors = XXX GB peak + else: + processes = 11 # 16 processors = 180 GB peak; 16 = XXX GB peak; 20 = >750 GB (maxed out) + else: + processes = 8 + uu.print_log(f'Aggregate max processors={processes}') + pool = multiprocessing.Pool(processes) + pool.map(partial(derivative_outputs.aggregate_within_tile, download_pattern_name=download_pattern_name), + tile_id_list_inner) + pool.close() + pool.join() + + + ### STEP 4: Combines 10x10 deg aggregated tiles into a global aggregated map + uu.print_log("STEP 4: Combine tiles into global raster") + derivative_outputs.aggregate_tiles(input_pattern, download_pattern_name) + + + ### STEP 5: Clean up folder + uu.print_log("STEP 5: Clean up folder") + vrt_list = glob.glob('*vrt') + for vrt in vrt_list: + os.remove(vrt) + + rewindow_list = glob.glob(f'*rewindow.tif') + for rewindow in rewindow_list: + os.remove(rewindow) + + aggreg_list = glob.glob(f'*_0_04deg.tif') + for aggreg in aggreg_list: + os.remove(aggreg) + + + ### STEP 6: Checks the two forest extent output tiles created from each input tile for whether there is data in them. + ### Because the extent is restricted in the forest extent pixels, some tiles with pixels in the full extent + ### version may not have pixels in the forest extent version. + uu.print_log("STEP 6: Checking forest extent outputs for data") + for output_pattern in output_patterns[1:3]: + if cn.SINGLE_PROCESSOR or cn.count < 4: + for tile_id in tile_id_list_inner: + uu.check_and_delete_if_empty_light(tile_id, output_pattern) + else: + processes = 55 # 50 processors = 560 GB peak for gross removals; 55 = XXX GB peak + uu.print_log(f'Checking for empty tiles of {output_pattern} pattern with {processes} processors...') + pool = multiprocessing.Pool(processes) + pool.map(partial(uu.check_and_delete_if_empty, output_pattern=output_pattern), tile_id_list_inner) + pool.close() + pool.join() + + + ### OPTIONAL STEP 7: Upload 0.00025x0.00025 deg and aggregated outputs to s3 + # If cn.NO_UPLOAD flag is not activated (by choice or by lack of AWS credentials), output is uploaded + if not cn.NO_UPLOAD: + uu.print_log("STEP 7: Uploading outputs to s3") + for output_dir, output_pattern in zip(output_dir_list, output_pattern_list): + uu.upload_final_set(output_dir, output_pattern) + + + # ### OPTIONAL STEP 8: Compares sensitivity analysis aggregated net flux map to standard model aggregated net flux map in two ways. + # ### This does not work for comparing the raw outputs of the biomass_swap and US_removals sensitivity models because their + # ### extents are different from the standard model's extent (tropics and US tiles vs. global). + # ### Thus, in order to do this comparison, you need to clip the standard model net flux and US_removals net flux to + # ### the outline of the US and clip the standard model net flux to the extent of JPL AGB2000. + # ### Then, manually upload the clipped US_removals and biomass_swap net flux rasters to the spot machine and the + # ### code below should work. + # ### WARNING: THIS HAS NOT BEEN TESTED SINCE MODEL V1.2.0 AND IS NOT LIKELY TO WORK WITHOUT SIGNIFICANT REVISIONS + # ### AND REFACTORING. THUS, IT IS COMMENTED OUT. + # if cn.SENSIT_TYPE not in ['std', 'biomass_swap', 'US_removals', 'legal_Amazon_loss']: + # + # if std_net_flux: + # + # uu.print_log('Standard aggregated flux results provided. Creating comparison maps.') + # + # # Copies the standard model aggregation outputs to s3. Only net flux is used, though. + # uu.s3_file_download(std_net_flux, cn.docker_base_dir, cn.SENSIT_TYPE) + # + # # Identifies the standard model net flux map + # std_aggreg_flux = os.path.split(std_net_flux)[1] + # + # try: + # # Identifies the sensitivity model net flux map + # sensit_aggreg_flux = glob.glob('net_flux_Mt_CO2e_*{}*'.format(cn.SENSIT_TYPE))[0] + # + # uu.print_log(f'Standard model net flux: {std_aggreg_flux}') + # uu.print_log(f'Sensitivity model net flux: {sensit_aggreg_flux}') + # + # except: + # uu.print_log( + # 'Cannot do comparison. One of the input flux tiles is not valid. Verify that both net flux rasters are on the spot machine.') + # + # uu.print_log(f'Creating map of percent difference between standard and {cn.SENSIT_TYPE} net flux') + # aggregate_results_to_4_km.percent_diff(std_aggreg_flux, sensit_aggreg_flux) + # + # uu.print_log( + # f'Creating map of which pixels change sign and which stay the same between standard and {cn.SENSIT_TYPE}') + # aggregate_results_to_4_km.sign_change(std_aggreg_flux, sensit_aggreg_flux) + # + # # If cn.NO_UPLOAD flag is not activated (by choice or by lack of AWS credentials), output is uploaded + # if not cn.NO_UPLOAD: + # uu.upload_final_set(output_dir_list[0], cn.pattern_aggreg_sensit_perc_diff) + # uu.upload_final_set(output_dir_list[0], cn.pattern_aggreg_sensit_sign_change) + # + # else: + # + # uu.print_log('No standard aggregated flux results provided. Not creating comparison maps.') + + +if __name__ == '__main__': + + # The argument for what kind of model run is being done: standard conditions or a sensitivity analysis run + parser = argparse.ArgumentParser( + description='Create supplementary outputs: aggregated maps, per-pixel at original resolution, forest-only at original resolution') + parser.add_argument('--model-type', '-t', required=True, + help=f'{cn.model_type_arg_help}') + parser.add_argument('--tile_id_list', '-l', required=True, + help='List of tile ids to use in the model. Should be of form 00N_110E or 00N_110E,00N_120E or all.') + parser.add_argument('--run-date', '-d', required=False, + help='Date of run. Must be format YYYYMMDD.') + parser.add_argument('--std-net-flux-aggreg', '-sagg', required=False, + help='The s3 standard model net flux aggregated tif, for comparison with the sensitivity analysis map') + parser.add_argument('--no-upload', '-nu', action='store_true', + help='Disables uploading of outputs to s3') + parser.add_argument('--single-processor', '-sp', action='store_true', + help='Uses single processing rather than multiprocessing') + args = parser.parse_args() + + # Sets global variables to the command line arguments + cn.SENSIT_TYPE = args.model_type + cn.RUN_DATE = args.run_date + cn.NO_UPLOAD = args.no_upload + cn.STD_NET_FLUX = args.std_net_flux_aggreg + cn.SINGLE_PROCESSOR = args.single_processor + + tile_id_list = args.tile_id_list + + # Disables upload to s3 if no AWS credentials are found in environment + if not uu.check_aws_creds(): + cn.NO_UPLOAD = True + + # Create the output log + uu.initiate_log(tile_id_list) + + # Checks whether the sensitivity analysis and tile_id_list arguments are valid + uu.check_sensit_type(cn.SENSIT_TYPE) + tile_id_list = uu.tile_id_list_check(tile_id_list) + + mp_derivative_outputs(tile_id_list) \ No newline at end of file diff --git a/analyses/mp_net_flux.py b/analyses/mp_net_flux.py index 9501a7d5..f7fa4dc0 100644 --- a/analyses/mp_net_flux.py +++ b/analyses/mp_net_flux.py @@ -1,31 +1,39 @@ -### Calculates the net emissions over the study period, with units of Mg CO2e/ha on a pixel-by-pixel basis. -### This only uses gross emissions from biomass+soil (doesn't run with gross emissions from soil_only). +""" +Calculates the net GHG flux over the study period, with units of Mg CO2e/ha on a pixel-by-pixel basis. +This only uses gross emissions from biomass+soil (doesn't run with gross emissions from soil_only). + +python -m analyses.mp_net_flux -t std -l 00N_000E -nu +python -m analyses.mp_net_flux -t std -l all +""" -import multiprocessing import argparse -import os -import datetime from functools import partial +import multiprocessing +import os import sys -sys.path.append('../') + import constants_and_names as cn import universal_util as uu -sys.path.append(os.path.join(cn.docker_app,'analyses')) -import net_flux +from . import net_flux -def mp_net_flux(sensit_type, tile_id_list, run_date = None, no_upload = None): +def mp_net_flux(tile_id_list): + """ + :param tile_id_list: list of tile ids to process + :return: 1 set of tiles with net GHG flux (gross emissions minus gross removals). + Units: Mg CO2e/ha over the model period + """ - os.chdir(cn.docker_base_dir) + os.chdir(cn.docker_tile_dir) # If a full model run is specified, the correct set of tiles for the particular script is listed if tile_id_list == 'all': # List of tiles to run in the model - tile_id_list = uu.create_combined_tile_list(cn.gross_emis_all_gases_all_drivers_biomass_soil_dir, - cn.cumul_gain_AGCO2_BGCO2_all_types_dir, - sensit_type=sensit_type) + tile_id_list = uu.create_combined_tile_list( + [cn.gross_emis_all_gases_all_drivers_biomass_soil_dir, cn.cumul_gain_AGCO2_BGCO2_all_types_dir], + sensit_type=cn.SENSIT_TYPE) uu.print_log(tile_id_list) - uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n") + uu.print_log(f'There are {str(len(tile_id_list))} tiles to process', "\n") # Files to download for this script @@ -42,46 +50,47 @@ def mp_net_flux(sensit_type, tile_id_list, run_date = None, no_upload = None): # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list for key, values in download_dict.items(): - dir = key + directory = key pattern = values[0] - uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list) + uu.s3_flexible_download(directory, pattern, cn.docker_tile_dir, cn.SENSIT_TYPE, tile_id_list) # If the model run isn't the standard one, the output directory and file names are changed - if sensit_type != 'std': - uu.print_log("Changing output directory and file name pattern based on sensitivity analysis") - output_dir_list = uu.alter_dirs(sensit_type, output_dir_list) - output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list) + if cn.SENSIT_TYPE != 'std': + uu.print_log('Changing output directory and file name pattern based on sensitivity analysis') + output_dir_list = uu.alter_dirs(cn.SENSIT_TYPE, output_dir_list) + output_pattern_list = uu.alter_patterns(cn.SENSIT_TYPE, output_pattern_list) # A date can optionally be provided by the full model script or a run of this script. # This replaces the date in constants_and_names. # Only done if output upload is enabled. - if run_date is not None and no_upload is not None: - output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date) + if cn.RUN_DATE is not None and cn.NO_UPLOAD is not None: + output_dir_list = uu.replace_output_dir_date(output_dir_list, cn.RUN_DATE) - # Creates a single filename pattern to pass to the multiprocessor call - pattern = output_pattern_list[0] - if cn.count == 96: - if sensit_type == 'biomass_swap': - processes = 32 # 32 processors = XXX GB peak - else: - processes = 40 # 38 = 690 GB peak; 40 = 715 GB peak - else: - processes = 9 - uu.print_log('Net flux max processors=', processes) - pool = multiprocessing.Pool(processes) - pool.map(partial(net_flux.net_calc, pattern=pattern, sensit_type=sensit_type, no_upload=no_upload), tile_id_list) - pool.close() - pool.join() + if cn.SINGLE_PROCESSOR: + for tile_id in tile_id_list: + net_flux.net_calc(tile_id, output_pattern_list[0]) - # # For single processor use - # for tile_id in tile_id_list: - # net_flux.net_calc(tile_id, output_pattern_list[0], sensit_type, no_upload) + else: + pattern = output_pattern_list[0] + if cn.count == 96: + if cn.SENSIT_TYPE == 'biomass_swap': + processes = 32 # 32 processors = XXX GB peak + else: + processes = 40 # 38 = 690 GB peak; 40 = 715 GB peak + else: + processes = 9 + uu.print_log(f'Net flux max processors={processes}') + with multiprocessing.Pool(processes) as pool: + pool.map(partial(net_flux.net_calc, pattern=pattern), + tile_id_list) + pool.close() + pool.join() - # If no_upload flag is not activated (by choice or by lack of AWS credentials), output is uploaded - if not no_upload: + # If cn.NO_UPLOAD flag is not activated (by choice or by lack of AWS credentials), output is uploaded + if not cn.NO_UPLOAD: uu.upload_final_set(output_dir_list[0], output_pattern_list[0]) @@ -91,28 +100,34 @@ def mp_net_flux(sensit_type, tile_id_list, run_date = None, no_upload = None): parser = argparse.ArgumentParser( description='Creates tiles of net GHG flux over model period') parser.add_argument('--model-type', '-t', required=True, - help='{}'.format(cn.model_type_arg_help)) + help=f'{cn.model_type_arg_help}') parser.add_argument('--tile_id_list', '-l', required=True, help='List of tile ids to use in the model. Should be of form 00N_110E or 00N_110E,00N_120E or all.') parser.add_argument('--run-date', '-d', required=False, help='Date of run. Must be format YYYYMMDD.') parser.add_argument('--no-upload', '-nu', action='store_true', help='Disables uploading of outputs to s3') + parser.add_argument('--single-processor', '-sp', action='store_true', + help='Uses single processing rather than multiprocessing') args = parser.parse_args() - sensit_type = args.model_type + + # Sets global variables to the command line arguments + cn.SENSIT_TYPE = args.model_type + cn.RUN_DATE = args.run_date + cn.NO_UPLOAD = args.no_upload + cn.SINGLE_PROCESSOR = args.single_processor + tile_id_list = args.tile_id_list - run_date = args.run_date - no_upload = args.no_upload # Disables upload to s3 if no AWS credentials are found in environment if not uu.check_aws_creds(): - no_upload = True + cn.NO_UPLOAD = True # Create the output log - uu.initiate_log(tile_id_list=tile_id_list, sensit_type=sensit_type, run_date=run_date, no_upload=no_upload) + uu.initiate_log(tile_id_list) # Checks whether the sensitivity analysis and tile_id_list arguments are valid - uu.check_sensit_type(sensit_type) + uu.check_sensit_type(cn.SENSIT_TYPE) tile_id_list = uu.tile_id_list_check(tile_id_list) - mp_net_flux(sensit_type=sensit_type, tile_id_list=tile_id_list, run_date=run_date, no_upload=no_upload) \ No newline at end of file + mp_net_flux(tile_id_list) diff --git a/analyses/mp_tile_statistics.py b/analyses/mp_tile_statistics.py index 82ac336e..3221c836 100644 --- a/analyses/mp_tile_statistics.py +++ b/analyses/mp_tile_statistics.py @@ -16,7 +16,7 @@ def mp_tile_statistics(sensit_type, tile_id_list): - os.chdir(cn.docker_base_dir) + os.chdir(cn.docker_tile_dir) # The column names for the tile summary statistics. # If the statistics calculations are changed in tile_statistics.py, the list here needs to be changed, too. @@ -34,7 +34,7 @@ def mp_tile_statistics(sensit_type, tile_id_list): uu.print_log(tile_id_list) # Pixel area tiles-- necessary for calculating sum of pixels for any set of tiles - uu.s3_flexible_download(cn.pixel_area_dir, cn.pattern_pixel_area, cn.docker_base_dir, 'std', tile_id_list) + uu.s3_flexible_download(cn.pixel_area_dir, cn.pattern_pixel_area, cn.docker_tile_dir, 'std', tile_id_list) # For downloading all tiles in selected folders download_dict = { @@ -150,7 +150,7 @@ def mp_tile_statistics(sensit_type, tile_id_list): # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list dir = key pattern = values[0] - uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list) + uu.s3_flexible_download(dir, pattern, cn.docker_tile_dir, sensit_type, tile_id_list) # List of all the tiles on the spot machine to be summarized (excludes pixel area tiles and tiles created by gdal_calc # (in case this script was already run on this spot machine and created output from gdal_calc) @@ -197,7 +197,7 @@ def mp_tile_statistics(sensit_type, tile_id_list): parser = argparse.ArgumentParser( description='Create tiles of the annual AGB and BGB removals rates for mangrove forests') parser.add_argument('--model-type', '-t', required=True, - help='{}'.format(cn.model_type_arg_help)) + help=f'{cn.model_type_arg_help}') parser.add_argument('--tile_id_list', '-l', required=True, help='List of tile ids to use in the model. Should be of form 00N_110E or 00N_110E,00N_120E or all.') args = parser.parse_args() @@ -205,7 +205,7 @@ def mp_tile_statistics(sensit_type, tile_id_list): tile_id_list = args.tile_id_list # Create the output log - uu.initiate_log(sensit_type=sensit_type, tile_id_list=tile_id_list) + uu.initiate_log(tile_id_list) # Checks whether the sensitivity analysis and tile_id_list arguments are valid uu.check_sensit_type(sensit_type) diff --git a/analyses/net_flux.py b/analyses/net_flux.py index 409c1f74..a0a26a8b 100644 --- a/analyses/net_flux.py +++ b/analyses/net_flux.py @@ -1,15 +1,26 @@ -### Calculates the net emissions over the study period, with units of Mg CO2/ha on a pixel-by-pixel basis +""" +Function to create net flux tiles +""" -import os import datetime import numpy as np import rasterio import sys +from memory_profiler import profile + sys.path.append('../') import constants_and_names as cn import universal_util as uu -def net_calc(tile_id, pattern, sensit_type, no_upload): +# @profile +def net_calc(tile_id, pattern): + """ + Creates net GHG flux tile set + :param tile_id: tile to be processed, identified by its tile id + :param pattern: pattern for output tile names + :return: 1 tile with net GHG flux (gross emissions minus gross removals). + Units: Mg CO2e/ha over the model period + """ uu.print_log("Calculating net flux for", tile_id) @@ -17,11 +28,11 @@ def net_calc(tile_id, pattern, sensit_type, no_upload): start = datetime.datetime.now() # Names of the removals and emissions tiles - removals_in = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_cumul_gain_AGCO2_BGCO2_all_types) - emissions_in = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_gross_emis_all_gases_all_drivers_biomass_soil) + removals_in = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_cumul_gain_AGCO2_BGCO2_all_types) + emissions_in = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_gross_emis_all_gases_all_drivers_biomass_soil) # Output net emissions file - net_flux = '{0}_{1}.tif'.format(tile_id, pattern) + net_flux = uu.make_tile_name(tile_id, pattern) try: removals_src = rasterio.open(removals_in) @@ -29,9 +40,9 @@ def net_calc(tile_id, pattern, sensit_type, no_upload): kwargs = removals_src.meta # Grabs the windows of the tile (stripes) so we can iterate over the entire tif without running out of memory windows = removals_src.block_windows(1) - uu.print_log(" Gross removals tile found for {}".format(removals_in)) - except: - uu.print_log(" No gross removals tile found for {}".format(removals_in)) + uu.print_log(f' Gross removals tile found for {removals_in}') + except rasterio.errors.RasterioIOError: + uu.print_log(f' Gross removals tile not found for {removals_in}') try: emissions_src = rasterio.open(emissions_in) @@ -39,9 +50,9 @@ def net_calc(tile_id, pattern, sensit_type, no_upload): kwargs = emissions_src.meta # Grabs the windows of the tile (stripes) so we can iterate over the entire tif without running out of memory windows = emissions_src.block_windows(1) - uu.print_log(" Gross emissions tile found for {}".format(emissions_in)) - except: - uu.print_log(" No gross emissions tile found for {}".format(emissions_in)) + uu.print_log(f' Gross emissions tile found for {emissions_in}') + except rasterio.errors.RasterioIOError: + uu.print_log(f' Gross emissions tile not found for {emissions_in}') # Skips the tile if there is neither a gross emissions nor a gross removals tile. # This should only occur for biomass_swap sensitivity analysis, which gets its net flux tile list from @@ -55,17 +66,17 @@ def net_calc(tile_id, pattern, sensit_type, no_upload): nodata=0, dtype='float32' ) - except: - uu.print_log("No gross emissions or gross removals for {}. Skipping tile.".format(tile_id)) + except rasterio.errors.RasterioIOError: + uu.print_log(f'Gross emissions or gross removals not found for {tile_id}. Skipping tile.') return # Opens the output tile, giving it the arguments of the input tiles net_flux_dst = rasterio.open(net_flux, 'w', **kwargs) # Adds metadata tags to the output raster - uu.add_rasterio_tags(net_flux_dst, sensit_type) + uu.add_universal_metadata_rasterio(net_flux_dst) net_flux_dst.update_tags( - units='Mg CO2e/ha over model duration (2001-20{})'.format(cn.loss_years)) + units=f'Mg CO2e/ha over model duration (2001-20{cn.loss_years})') net_flux_dst.update_tags( source='Gross emissions - gross removals') net_flux_dst.update_tags( @@ -81,11 +92,11 @@ def net_calc(tile_id, pattern, sensit_type, no_upload): # Creates windows for each input tile try: removals_window = removals_src.read(1, window=window).astype('float32') - except: + except UnboundLocalError: removals_window = np.zeros((window.height, window.width)).astype('float32') try: emissions_window = emissions_src.read(1, window=window).astype('float32') - except: + except UnboundLocalError: emissions_window = np.zeros((window.height, window.width)).astype('float32') # Subtracts removals from emissions to calculate net flux (negative is net sink, positive is net source) @@ -94,4 +105,4 @@ def net_calc(tile_id, pattern, sensit_type, no_upload): net_flux_dst.write_band(1, dst_data, window=window) # Prints information about the tile that was just processed - uu.end_of_fx_summary(start, tile_id, pattern, no_upload) \ No newline at end of file + uu.end_of_fx_summary(start, tile_id, pattern) diff --git a/burn_date/clip_year_tiles.py b/burn_date/clip_year_tiles.py deleted file mode 100644 index 651561af..00000000 --- a/burn_date/clip_year_tiles.py +++ /dev/null @@ -1,62 +0,0 @@ -import os -import datetime -from subprocess import Popen, PIPE, STDOUT, check_call -import sys -import utilities -sys.path.append('../') -import universal_util as uu -import constants_and_names as cn - -currentdir = os.path.dirname(os.path.abspath(__file__)) -parentdir = os.path.dirname(currentdir) -sys.path.insert(0, parentdir) - -def clip_year_tiles(tile_year_list, no_upload): - - # Start time - start = datetime.datetime.now() - - tile_id = tile_year_list[0].strip('.tif') - year = tile_year_list[1] - - vrt_name = "global_vrt_{}_wgs84.vrt".format(year) - - # Gets coordinates of hansen tile - uu.print_log("Getting coordinates of", tile_id) - xmin, ymin, xmax, ymax = uu.coords(tile_id) - - # Clips vrt to tile extent - uu.print_log("Clipping burn year vrt to {0} for {1}".format(tile_id, year)) - - clipped_raster = "ba_clipped_{0}_{1}.tif".format(year, tile_id) - cmd = ['gdal_translate', '-ot', 'Byte', '-co', 'COMPRESS=DEFLATE', '-a_nodata', '0'] - cmd += [vrt_name, clipped_raster, '-tr', '.00025', '.00025'] - cmd += ['-projwin', str(xmin), str(ymax), str(xmax), str(ymin)] - uu.log_subprocess_output_full(cmd) - - # Calculates year tile values to be equal to year. ex: 17*1 - calc = '--calc={}*(A>0)'.format(int(year)-2000) - recoded_output = "ba_{0}_{1}.tif".format(year, tile_id) - outfile = '--outfile={}'.format(recoded_output) - - cmd = ['gdal_calc.py', '-A', clipped_raster, calc, outfile, '--NoDataValue=0', '--co', 'COMPRESS=DEFLATE', '--quiet'] - uu.log_subprocess_output_full(cmd) - - # Only copies to s3 if the tile has data. - # No tiles for 2000 have data because the burn year is coded as 0, which is NoData. - uu.print_log("Checking if {} contains any data...".format(tile_id)) - empty = uu.check_for_data(recoded_output) - - if empty: - uu.print_log(" No data found. Not copying {}.".format(tile_id)) - - else: - uu.print_log(" Data found in {}. Copying tile to s3...".format(tile_id)) - cmd = ['aws', 's3', 'cp', recoded_output, cn.burn_year_warped_to_Hansen_dir] - uu.log_subprocess_output_full(cmd) - uu.print_log(" Tile copied to", cn.burn_year_warped_to_Hansen_dir) - - # Prints information about the tile that was just processed - uu.end_of_fx_summary(start, tile_id, "ba_{}".format(year), no_upload) - - diff --git a/burn_date/hansen_burnyear_final.py b/burn_date/hansen_burnyear_final.py deleted file mode 100644 index 77383987..00000000 --- a/burn_date/hansen_burnyear_final.py +++ /dev/null @@ -1,164 +0,0 @@ -import os -import datetime -import rasterio -import utilities -import glob -from shutil import copyfile -import numpy as np -from subprocess import Popen, PIPE, STDOUT, check_call -import sys -sys.path.append('../') -import constants_and_names as cn -import universal_util as uu - - -def hansen_burnyear(tile_id, no_upload): - - # Start time - start = datetime.datetime.now() - - uu.print_log("Processing", tile_id) - - # The tiles that are used. out_tile_no_tag is the output before metadata tags are added. out_tile is the output - # once metadata tags have been added. - out_tile_no_tag = '{0}_{1}_no_tag.tif'.format(tile_id, cn.pattern_burn_year) - out_tile = '{0}_{1}.tif'.format(tile_id, cn.pattern_burn_year) - loss = '{0}_{1}.tif'.format(cn.pattern_loss, tile_id) - - # Does not continue processing tile if no loss (because there will not be any output) - if not os.path.exists(loss): - uu.print_log("No loss tile for", tile_id) - return - else: - uu.print_log("Loss tile exists for", tile_id) - - - # Downloads the burned area tiles for each year - include = 'ba_*_{}.tif'.format(tile_id) - burn_tiles_dir = 'burn_tiles' - if not os.path.exists(burn_tiles_dir): - os.mkdir(burn_tiles_dir) - cmd = ['aws', 's3', 'cp', cn.burn_year_warped_to_Hansen_dir, burn_tiles_dir, '--recursive', '--exclude', "*", '--include', include] - uu.log_subprocess_output_full(cmd) - - # For each year tile, converts to array and stacks them - array_list = [] - ba_tifs = glob.glob(burn_tiles_dir + '/*{}*'.format(tile_id)) - - # Skips the tile if it has no burned area data for any year - uu.print_log("There are {0} tiles to stack for {1}".format(len(ba_tifs), tile_id)) - if len(ba_tifs) == 0: - uu.print_log("Skipping {} because there are no tiles to stack".format(tile_id)) - return - - - # NOTE: All of this could pretty easily be done in rasterio. However, Sam's use of GDAL for this still works fine, - # so I've left it using GDAL. - - for ba_tif in ba_tifs: - uu.print_log("Creating array with {}".format(ba_tif)) - array = utilities.raster_to_array(ba_tif) - array_list.append(array) - - # Stacks arrays from each year - uu.print_log("Stacking arrays for", tile_id) - stacked_year_array = utilities.stack_arrays(array_list) - - # Converts Hansen tile to array - uu.print_log("Creating loss year array for", tile_id) - loss_array = utilities.raster_to_array(loss) - - # Determines what year to assign burned area - lossarray_min1 = np.subtract(loss_array, 1) - - stack_con =(stacked_year_array >= lossarray_min1) & (stacked_year_array <= loss_array) - stack_con2 = stack_con * stacked_year_array - lossyear_burn_array = stack_con2.max(0) - - utilities.array_to_raster_simple(lossyear_burn_array, out_tile_no_tag, loss) - - # Only copies to s3 if the tile has data - uu.print_log("Checking if {} contains any data...".format(tile_id)) - empty = uu.check_for_data(out_tile_no_tag) - - # Checks output for data. There could be burned area but none of it coincides with tree cover loss, - # so this is the final check for whether there is any data. - if empty: - uu.print_log(" No data found. Not copying {}.".format(tile_id)) - - # Without this, the untagged version is counted and eventually copied to s3 if it has data in it - os.remove(out_tile_no_tag) - - return - - else: - uu.print_log(" Data found in {}. Adding metadata tags...".format(tile_id)) - - ### Thomas suggested these on 8/19/2020 but they didn't work. The first one wrote the tags but erased all the - ### data in the tiles (everything became 0 according to gdalinfo). The second one had some other error. - # with rasterio.open(out_tile_no_tag, 'r') as src: - # - # profile = src.profile - # - # with rasterio.open(out_tile_no_tag, 'w', **profile) as dst: - # - # dst.update_tags(units='year (2001, 2002, 2003...)', - # source='MODIS collection 6 burned area', - # extent='global') - # - # with rasterio.open(out_tile_no_tag, 'w+') as src: - # - # dst.update_tags(units='year (2001, 2002, 2003...)', - # source='MODIS collection 6 burned area', - # extent='global') - - - # All of the below is to add metadata tags to the output burn year masks. - # For some reason, just doing what's at https://rasterio.readthedocs.io/en/latest/topics/tags.html - # results in the data getting removed. - # I found it necessary to copy the desired output and read its windows into a new copy of the file, to which the - # metadata tags are added. I'm sure there's an easier way to do this but I couldn't figure out how. - # I know it's very convoluted but I really couldn't figure out how to add the tags without erasing the data. - - copyfile(out_tile_no_tag, out_tile) - - with rasterio.open(out_tile_no_tag) as out_tile_no_tag_src: - - # Grabs metadata about the tif, like its location/projection/cellsize - kwargs = out_tile_no_tag_src.meta #### Use profile instead - - # Grabs the windows of the tile (stripes) so we can iterate over the entire tif without running out of memory - windows = out_tile_no_tag_src.block_windows(1) - - # Updates kwargs for the output dataset - kwargs.update( - driver='GTiff', - count=1, - compress='DEFLATE', - nodata=0 - ) - - out_tile_tagged = rasterio.open(out_tile, 'w', **kwargs) - - # Adds metadata tags to the output raster - uu.add_rasterio_tags(out_tile_tagged, 'std') - out_tile_tagged.update_tags( - units='year (2001, 2002, 2003...)') - out_tile_tagged.update_tags( - source='MODIS collection 6 burned area, https://modis-fire.umd.edu/files/MODIS_C6_BA_User_Guide_1.3.pdf') - out_tile_tagged.update_tags( - extent='global') - - # Iterates across the windows (1 pixel strips) of the input tile - for idx, window in windows: - in_window = out_tile_no_tag_src.read(1, window=window) - - # Writes the output window to the output - out_tile_tagged.write_band(1, in_window, window=window) - - # Without this, the untagged version is counted and eventually copied to s3 if it has data in it - os.remove(out_tile_no_tag) - - # Prints information about the tile that was just processed - uu.end_of_fx_summary(start, tile_id, cn.pattern_burn_year, no_upload) - diff --git a/burn_date/mp_burn_year.py b/burn_date/mp_burn_year.py deleted file mode 100644 index 6149bceb..00000000 --- a/burn_date/mp_burn_year.py +++ /dev/null @@ -1,274 +0,0 @@ -''' -Creates tiles of when tree cover loss coincides with burning or preceded burning by one year. -There are four steps to this: 1) acquire raw hdfs from MODIS burned area sftp; 2) make tifs of burned area for -each year in each MODIS h-v tile; 3) make annual Hansen-style (extent, res, etc.) tiles of burned area; -4) make tiles of where TCL and burning coincided (same year or with 1 year lag). -To update this, steps 1-3 can be run on only the latest year of MODIS burned area product. Only step 4 needs to be run -on the entire time series. That is, steps 1-3 operate on burned area products separately for each year, so adding -another year of data won't change steps 1-3 for preceding years. - -NOTE: The step in which hdf files are opened and converted to tifs (step 2) requires -osgeo/gdal:ubuntu-full-X.X.X Docker image (change in Dockerfile). -The "small' Docker image doesn't have an hdf driver in gdal, so it can't read -the hdf files on the ftp site. The rest of the burned area analysis can be done with a 'small' version of the Docker image -(though that would require terminating the Docker container and restarting it, which would only make sense if the -analysis was being continued later). - -Step 4 takes many hours to run, mostly because it only uses five processors since each one requires so much memory. -The other steps might take an hour or two to run. - -This is still basically as Sam Gibbes wrote it in early 2018, with file name changes and other input/output changes -by David Gibbs. The real processing code is still all by Sam's parts. -''' - -import multiprocessing -from functools import partial -import pandas as pd -import datetime -import glob -import shutil -import argparse -from subprocess import Popen, PIPE, STDOUT, check_call -import os -import sys -import utilities -sys.path.append('../') -import constants_and_names as cn -import universal_util as uu -sys.path.append(os.path.join(cn.docker_app,'burn_date')) -import stack_ba_hv -import clip_year_tiles -import hansen_burnyear_final - - -def mp_burn_year(tile_id_list, run_date = None, no_upload = None): - - os.chdir(cn.docker_base_dir) - - # If a full model run is specified, the correct set of tiles for the particular script is listed - if tile_id_list == 'all': - # List of tiles to run in the model - tile_id_list = uu.tile_list_s3(cn.pixel_area_dir) - - uu.print_log(tile_id_list) - uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n") - - # List of output directories and output file name patterns - output_dir_list = [cn.burn_year_dir] - output_pattern_list = [cn.pattern_burn_year] - - # A date can optionally be provided by the full model script or a run of this script. - # This replaces the date in constants_and_names. - # Only done if output upload is enabled. - if run_date is not None and no_upload is not None: - output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date) - - global_grid_hv = ["h00v08", "h00v09", "h00v10", "h01v07", "h01v08", "h01v09", "h01v10", "h01v11", "h02v06", - "h02v08", "h02v09", "h02v10", "h02v11", "h03v06", "h03v07", "h03v09", "h03v10", "h03v11", - "h04v09", "h04v10", "h04v11", "h05v10", "h05v11", "h05v13", "h06v03", "h06v11", "h07v03", - "h07v05", "h07v06", "h07v07", "h08v03", "h08v04", "h08v05", "h08v06", "h08v07", "h08v08", - "h08v09", "h08v11", "h09v02", "h09v03", "h09v04", "h09v05", "h09v06", "h09v07", "h09v08", - "h09v09", "h10v02", "h10v03", "h10v04", "h10v05", "h10v06", "h10v07", "h10v08", "h10v09", - "h10v10", "h10v11", "h11v02", "h11v03", "h11v04", "h11v05", "h11v06", "h11v07", "h11v08", - "h11v09", "h11v10", "h11v11", "h11v12", "h12v02", "h12v03", "h12v04", "h12v05", "h12v07", - "h12v08", "h12v09", "h12v10", "h12v11", "h12v12", "h12v13", "h13v02", "h13v03", "h13v04", - "h13v08", "h13v09", "h13v10", "h13v11", "h13v12", "h13v13", "h13v14", "h14v02", "h14v03", - "h14v04", "h14v09", "h14v10", "h14v11", "h14v14", "h15v02", "h15v03", "h15v05", "h15v07", - "h15v11", "h16v02", "h16v05", "h16v06", "h16v07", "h16v08", "h16v09", "h17v02", "h17v03", - "h17v04", "h17v05", "h17v06", "h17v07", "h17v08", "h17v10", "h17v12", "h17v13", "h18v02", - "h18v03", "h18v04", "h18v05", "h18v06", "h18v07", "h18v08", "h18v09", "h19v02", "h19v03", - "h19v04", "h19v05", "h19v06", "h19v07", "h19v08", "h19v09", "h19v10", "h19v11", "h19v12", - "h20v02", "h20v03", "h20v04", "h20v05", "h20v06", "h20v07", "h20v08", "h20v09", "h20v10", - "h20v11", "h20v12", "h20v13", "h21v02", "h21v03", "h21v04", "h21v05", "h21v06", "h21v07", - "h21v08", "h21v09", "h21v10", "h21v11", "h21v13", "h22v02", "h22v03", "h22v04", "h22v05", - "h22v06", "h22v07", "h22v08", "h22v09", "h22v10", "h22v11", "h22v13", "h23v02", "h23v03", - "h23v04", "h23v05", "h23v06", "h23v07", "h23v08", "h23v09", "h23v10", "h23v11", "h24v02", - "h24v03", "h24v04", "h24v05", "h24v06", "h24v07", "h24v12", "h25v02", "h25v03", "h25v04", - "h25v05", "h25v06", "h25v07", "h25v08", "h25v09", "h26v02", "h26v03", "h26v04", "h26v05", - "h26v06", "h26v07", "h26v08", "h27v03", "h27v04", "h27v05", "h27v06", "h27v07", "h27v08", - "h27v09", "h27v10", "h27v11", "h27v12", "h28v03", "h28v04", "h28v05", "h28v06", "h28v07", - "h28v08", "h28v09", "h28v10", "h28v11", "h28v12", "h28v13", "h29v03", "h29v05", "h29v06", - "h29v07", "h29v08", "h29v09", "h29v10", "h29v11", "h29v12", "h29v13", "h30v06", "h30v07", - "h30v08", "h30v09", "h30v10", "h30v11", "h30v12", "h30v13", "h31v06", "h31v07", "h31v08", - "h31v09", "h31v10", "h31v11", "h31v12", "h31v13", "h32v07", "h32v08", "h32v09", "h32v10", - "h32v11", "h32v12", "h33v07", "h33v08", "h33v09", "h33v10", "h33v11", "h34v07", "h34v08", - "h34v09", "h34v10", "h35v08", "h35v09", "h35v10"] - - - # Step 1: download hdf files for relevant year(s) from sftp site. - # This only needs to be done for the most recent year of data. - - ''' - Downloading the hdf files from the sftp burned area site is done outside the script in the sftp shell on the command line. - This will download all the 2021 hdfs to the spot machine. There will be a pause of a few minutes before the first - hdf is downloaded but then it should go quickly (5 minutes for 2021 data). - Change 2021 to other year for future years of downloads. - https://modis-fire.umd.edu/files/MODIS_C6_BA_User_Guide_1.3.pdf, page 24, section 4.1.3 - - Change directory to /app/burn_date/ and download hdfs into burn_date folder: - - sftp fire@fuoco.geog.umd.edu - [For password] burnt - cd data/MODIS/C6/MCD64A1/HDF - ls [to check that it's the folder with all the h-v tile folders] - get h??v??/MCD64A1.A2021* - bye //exits the stfp shell - - Before moving to the next step, confirm that all months of burned area data were downloaded. - The last month will have the format MCD64A1.A20**336.h... or so. - ''' - - - # # Uploads the latest year of raw burn area hdfs to s3. - # # All hdfs go in this folder - # cmd = ['aws', 's3', 'cp', '{0}/burn_date/'.format(cn.docker_app), cn.burn_year_hdf_raw_dir, '--recursive', '--exclude', '*', '--include', '*hdf'] - # uu.log_subprocess_output_full(cmd) - # - # - # # Step 2: - # # Makes burned area rasters for each year for each MODIS horizontal-vertical tile. - # # This only needs to be done for the most recent year of data (set in stach_ba_hv). - # uu.print_log("Stacking hdf into MODIS burned area tifs by year and MODIS hv tile...") - # - # count = multiprocessing.cpu_count() - # pool = multiprocessing.Pool(processes=count - 10) - # pool.map(stack_ba_hv.stack_ba_hv, global_grid_hv) - # pool.close() - # pool.join() - # - # # # For single processor use - # # for hv_tile in global_grid_hv: - # # stack_ba_hv.stack_ba_hv(hv_tile) - # - # - # # Step 3: - # # Creates a 10x10 degree wgs 84 tile of .00025 res burned year. - # # Downloads all MODIS hv tiles from s3, - # # makes a mosaic for each year, and warps to Hansen extent. - # # Range is inclusive at lower end and exclusive at upper end (e.g., 2001, 2022 goes from 2001 to 2021). - # # This only needs to be done for the most recent year of data. - # # NOTE: The first time I ran this for the 2020 TCL update, I got an error about uploading the log to s3 - # # after most of the tiles were processed. I didn't know why it happened, so I reran the step and it went fine. - # - # start_year = 2000 + cn.loss_years - # end_year = 2000 + cn.loss_years + 1 - # - # # Assumes that only the last year of fires are being processed - # for year in range(start_year, end_year): - # - # uu.print_log("Processing", year) - # - # # Downloads all hv tifs for this year - # include = '{0}_*.tif'.format(year) - # year_tifs_folder = "{}_year_tifs".format(year) - # utilities.makedir(year_tifs_folder) - # - # uu.print_log("Downloading MODIS burn date files from s3...") - # - # cmd = ['aws', 's3', 'cp', cn.burn_year_stacked_hv_tif_dir, year_tifs_folder] - # cmd += ['--recursive', '--exclude', "*", '--include', include] - # uu.log_subprocess_output_full(cmd) - # - # uu.print_log("Creating vrt of MODIS files...") - # - # vrt_name = "global_vrt_{}.vrt".format(year) - # - # # Builds list of vrt files - # with open('vrt_files.txt', 'w') as vrt_files: - # vrt_tifs = glob.glob(year_tifs_folder + "/*.tif") - # for tif in vrt_tifs: - # vrt_files.write(tif + "\n") - # - # # Creates vrt with wgs84 MODIS tiles. - # cmd = ['gdalbuildvrt', '-input_file_list', 'vrt_files.txt', vrt_name] - # uu.log_subprocess_output_full(cmd) - # - # uu.print_log("Reprojecting vrt...") - # - # # Builds new vrt and virtually project it - # # This reprojection could be done as part of the clip_year_tiles function but Sam had it out here like this and - # # so I'm leaving it like that. - # vrt_wgs84 = 'global_vrt_{}_wgs84.vrt'.format(year) - # cmd = ['gdalwarp', '-of', 'VRT', '-t_srs', "EPSG:4326", '-tap', '-tr', str(cn.Hansen_res), str(cn.Hansen_res), - # '-overwrite', vrt_name, vrt_wgs84] - # uu.log_subprocess_output_full(cmd) - # - # # Creates a list of lists, with year and tile id to send to multi processor - # tile_year_list = [] - # for tile_id in tile_id_list: - # tile_year_list.append([tile_id, year]) - # - # # Given a list of tiles and years ['00N_000E', 2017] and a VRT of burn data, - # # the global vrt has pixels representing burned or not. This process clips the global VRT - # # and changes the pixel value to represent the year the pixel was burned. Each tile has value of - # # year burned and NoData. - # count = multiprocessing.cpu_count() - # pool = multiprocessing.Pool(processes=count-5) - # pool.map(partial(clip_year_tiles.clip_year_tiles, no_upload=no_upload), tile_year_list) - # pool.close() - # pool.join() - # - # # # For single processor use - # # for tile_year in tile_year_list: - # # clip_year_tiles.clip_year_tiles(tile_year, no_upload) - # - # uu.print_log("Processing for {} done. Moving to next year.".format(year)) - - - # Step 4: - # Creates a single Hansen tile covering all years that represents where burning coincided with tree cover loss - # or preceded TCL by one year. - # This needs to be done on all years each time burned area is updated. - - # Downloads the loss tiles. The step 3 burn year tiles are downloaded within hansen_burnyear - uu.s3_folder_download(cn.loss_dir, '.', 'std', cn.pattern_loss) - - uu.print_log("Extracting burn year data that coincides with tree cover loss...") - - # Downloads the 10x10 deg burn year tiles (1 for each year in which there was burned area), stack and evaluate - # to return burn year values on hansen loss pixels within 1 year of loss date - if cn.count == 96: - processes = 5 - # 6 processors = >750 GB peak (1 processor can use up to 130 GB of memory) - else: - processes = 1 - pool = multiprocessing.Pool(processes) - pool.map(partial(hansen_burnyear_final.hansen_burnyear, no_upload=no_upload), tile_id_list) - pool.close() - pool.join() - - # # For single processor use - # for tile_id in tile_id_list: - # hansen_burnyear_final.hansen_burnyear(tile_id, no_upload) - - - # If no_upload flag is not activated (by choice or by lack of AWS credentials), output is uploaded - if not no_upload: - - uu.upload_final_set(output_dir_list[0], output_pattern_list[0]) - - -if __name__ == '__main__': - - # The arguments for what kind of model run is being run (standard conditions or a sensitivity analysis) and - # the tiles to include - parser = argparse.ArgumentParser( - description='Creates tiles of the year in which pixels were burned') - parser.add_argument('--tile_id_list', '-l', required=True, - help='List of tile ids to use in the model. Should be of form 00N_110E or 00N_110E,00N_120E or all.') - parser.add_argument('--run-date', '-d', required=False, - help='Date of run. Must be format YYYYMMDD.') - parser.add_argument('--no-upload', '-nu', action='store_true', - help='Disables uploading of outputs to s3') - args = parser.parse_args() - tile_id_list = args.tile_id_list - run_date = args.run_date - no_upload = args.no_upload - - # Create the output log - uu.initiate_log(tile_id_list=tile_id_list, sensit_type='std', run_date=run_date, no_upload=no_upload) - - # Checks whether the tile_id_list argument is valid - tile_id_list = uu.tile_id_list_check(tile_id_list) - - mp_burn_year(tile_id_list=tile_id_list, run_date=run_date, no_upload=no_upload) \ No newline at end of file diff --git a/burn_date/stack_ba_hv.py b/burn_date/stack_ba_hv.py deleted file mode 100644 index a49358b0..00000000 --- a/burn_date/stack_ba_hv.py +++ /dev/null @@ -1,53 +0,0 @@ -from subprocess import Popen, PIPE, STDOUT, check_call -from osgeo import gdal -import utilities -import glob -import shutil -import sys -sys.path.append('../') -import constants_and_names as cn -import universal_util as uu - - -def stack_ba_hv(hv_tile): - - start_year = 2000 + cn.loss_years - end_year = 2000 + cn.loss_years + 1 - - # Assumes that only the last year of fires are being processed - for year in range(start_year, end_year): # End year is not included in burn year product - - # Download hdf files from s3 into folders by h and v - output_dir = utilities.makedir('{0}/{1}/raw/'.format(hv_tile, year)) - utilities.download_df(year, hv_tile, output_dir) - - # convert hdf to array - hdf_files = glob.glob(output_dir + "*hdf") - - if len(hdf_files) > 0: - array_list = [] - for hdf in hdf_files: - array = utilities.hdf_to_array(hdf) - array_list.append(array) - - # stack arrays, get 1 raster for the year and tile - stacked_year_array = utilities.stack_arrays(array_list) - max_stacked_year_array = stacked_year_array.max(0) - - # convert stacked month arrays to 1 raster for the year - template_hdf = hdf_files[0] - - year_folder = utilities.makedir('{0}/{1}/stacked/'.format(hv_tile, year)) - - stacked_year_raster = utilities.array_to_raster(hv_tile, year, max_stacked_year_array, template_hdf, - year_folder) - - # upload to s3 - cmd = ['aws', 's3', 'cp', stacked_year_raster, cn.burn_year_stacked_hv_tif_dir] - uu.log_subprocess_output_full(cmd) - - # remove files - shutil.rmtree(output_dir) - - else: - pass diff --git a/burn_date/utilities.py b/burn_date/utilities.py deleted file mode 100644 index ff0b4109..00000000 --- a/burn_date/utilities.py +++ /dev/null @@ -1,142 +0,0 @@ - -import os -from subprocess import Popen, PIPE, STDOUT, check_call -import numpy as np -from osgeo import gdal -from gdalconst import GA_ReadOnly -import sys -sys.path.append('../') -import constants_and_names as cn -import universal_util as uu - - -def hdf_to_array(hdf): - hdf_open = gdal.Open(hdf).GetSubDatasets() - ds = gdal.Open(hdf_open[0][0]) - array = ds.ReadAsArray() - - return array - - -def makedir(folder): - if not os.path.exists(folder): - os.mkdir(folder) - - -def raster_to_array(raster): - ds = gdal.Open(raster) - array = np.array(ds.GetRasterBand(1).ReadAsArray()) - # array = np.array(ds.GetRasterBand(1).ReadAsArray(win_xsize = 5000, win_ysize = 5000)) # For local testing. Reading in the full array is too large. - - return array - - -def array_to_raster_simple(array, outname, template): - - ds = gdal.Open(template) - x_pixels = ds.RasterXSize - y_pixels = ds.RasterYSize - - geoTransform = ds.GetGeoTransform() - height = geoTransform[1] - - pixel_size = height - - minx = geoTransform[0] - maxy = geoTransform[3] - - wkt_projection = ds.GetProjection() - - driver = gdal.GetDriverByName('GTiff') - - dataset = driver.Create( - outname, - x_pixels, - y_pixels, - 1, - gdal.GDT_Int16, - options=["COMPRESS=LZW"]) - - dataset.SetGeoTransform(( - minx, # 0 - pixel_size, # 1 - 0, # 2 - maxy, # 3 - 0, # 4 - -pixel_size)) - - dataset.SetProjection(wkt_projection) - dataset.GetRasterBand(1).WriteArray(array) - dataset.FlushCache() # Write to disk. - - return outname - - -def array_to_raster(global_grid_hv, year, array, template_hdf, outfolder): - - filename = '{0}_{1}.tif'.format(year, global_grid_hv) - dst_filename = os.path.join(outfolder, filename) - # x_pixels, y_pixels = get_extent.get_size(raster) - hdf_open = gdal.Open(template_hdf).GetSubDatasets() - ds = gdal.Open(hdf_open[0][0]) - x_pixels = ds.RasterXSize - y_pixels = ds.RasterYSize - - geoTransform = ds.GetGeoTransform() - - pixel_size = geoTransform[1] - - minx = geoTransform[0] - maxy = geoTransform[3] - - wkt_projection = ds.GetProjection() - - driver = gdal.GetDriverByName('GTiff') - - dataset = driver.Create( - dst_filename, - x_pixels, - y_pixels, - 1, - gdal.GDT_Int16, ) - - dataset.SetGeoTransform(( - minx, # 0 - pixel_size, # 1 - 0, # 2 - maxy, # 3 - 0, # 4 - -pixel_size)) - - dataset.SetProjection(wkt_projection) - dataset.GetRasterBand(1).WriteArray(array) - dataset.FlushCache() # Write to disk. - - return dst_filename - - -def stack_arrays(list_of_year_arrays): - - stack = np.stack(list_of_year_arrays) - - return stack - - -def makedir(dir): - if not os.path.exists(dir): - os.makedirs(dir) - - return dir - - -def download_df(year, hv_tile, output_dir): - include = 'MCD64A1.A{0}*{1}*'.format(year, hv_tile) - cmd = ['aws', 's3', 'cp', cn.burn_year_hdf_raw_dir, output_dir, '--recursive', '--exclude', - "*", '--include', include] - - # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging - process = Popen(cmd, stdout=PIPE, stderr=STDOUT) - with process.stdout: - uu.log_subprocess_output(process.stdout) - - diff --git a/carbon_pools/create_carbon_pools.py b/carbon_pools/create_carbon_pools.py index bd2435c9..a4d27ce6 100644 --- a/carbon_pools/create_carbon_pools.py +++ b/carbon_pools/create_carbon_pools.py @@ -1,16 +1,47 @@ +"""Functions to create carbon pools (Mg C/ha)""" + import datetime -import sys -import pandas as pd import os -import numpy as np import rasterio -sys.path.append('../') +import numpy as np +import pandas as pd +from memory_profiler import profile + import constants_and_names as cn import universal_util as uu +def prepare_gain_table(): + """ + Loads the mangrove gain rate spreadsheet and turns it into a Pandas table + :return: Pandas table of removal factors for mangroves + """ + + # Table with IPCC Wetland Supplement Table 4.4 default mangrove removals rates + # cmd = ['aws', 's3', 'cp', os.path.join(cn.gain_spreadsheet_dir, cn.gain_spreadsheet), cn.docker_base_dir, '--no-sign-request'] + cmd = ['aws', 's3', 'cp', os.path.join(cn.gain_spreadsheet_dir, cn.gain_spreadsheet), cn.docker_tile_dir] + uu.log_subprocess_output_full(cmd) + + pd.options.mode.chained_assignment = None + + # Imports the table with the ecozone-continent codes and the carbon removals rates + gain_table = pd.read_excel(f'{cn.docker_tile_dir}{cn.gain_spreadsheet}', + sheet_name="mangrove gain, for model") + + # Removes rows with duplicate codes (N. and S. America for the same ecozone) + gain_table_simplified = gain_table.drop_duplicates(subset='gainEcoCon', keep='first') + + return gain_table_simplified + -# Creates a dictionary of biomass in belowground, deadwood, and litter emitted_pools to aboveground biomass pool def mangrove_pool_ratio_dict(gain_table_simplified, tropical_dry, tropical_wet, subtropical): + """ + Creates a dictionary of biomass in belowground, deadwood, and litter emitted_pools to aboveground biomass pool + :param gain_table_simplified: Table of removal factors for mangroves + :param tropical_dry: Belowground:aboveground biomass ratio for tropical dry mangroves + :param tropical_wet: Belowground:aboveground biomass ratio for tropical wet mangroves + :param subtropical: Belowground:aboveground biomass ratio for subtropical mangroves + :return: BGB:AGB ratio for mangroves + """ # Creates x_pool:aboveground biomass ratio dictionary for the three mangrove types, where the keys correspond to # the "mangType" field in the removals rate spreadsheet. @@ -37,87 +68,91 @@ def mangrove_pool_ratio_dict(gain_table_simplified, tropical_dry, tropical_wet, return mang_x_pool_AGB_ratio - -# Creates aboveground carbon emitted_pools in 2000 and/or the year of loss (loss pixels only) -def create_AGC(tile_id, sensit_type, carbon_pool_extent, no_upload): +# @profile +def create_AGC(tile_id, carbon_pool_extent): + """ + Creates aboveground carbon emitted_pools in 2000 and/or the year of loss (loss pixels only) + :param tile_id: tile to be processed, identified by its tile id + :param carbon_pool_extent: the pixels and years for which carbon pools are caculated: loss or 2000 + :return: Aboveground carbon density in the specified pixels for the specified years (Mg C/ha) + """ # Start time start = datetime.datetime.now() # Names of the input tiles. Creates the names even if the files don't exist. - removal_forest_type = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_removal_forest_type) - mangrove_biomass_2000 = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_mangrove_biomass_2000) - gain = uu.sensit_tile_rename(sensit_type, cn.pattern_gain, tile_id) - annual_gain_AGC = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_annual_gain_AGC_all_types) - cumul_gain_AGCO2 = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_cumul_gain_AGCO2_all_types) - - # Biomass tile name depends on the sensitivity analysis - if sensit_type == 'biomass_swap': - natrl_forest_biomass_2000 = '{0}_{1}.tif'.format(tile_id, cn.pattern_JPL_unmasked_processed) - uu.print_log("Using JPL biomass tile for {} sensitivity analysis".format(sensit_type)) - else: - natrl_forest_biomass_2000 = '{0}_{1}.tif'.format(tile_id, cn.pattern_WHRC_biomass_2000_unmasked) - uu.print_log("Using WHRC biomass tile for {} sensitivity analysis".format(sensit_type)) + removal_forest_type = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_removal_forest_type) + mangrove_biomass_2000 = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_mangrove_biomass_2000) + gain = f'{tile_id}_{cn.pattern_gain_ec2}.tif' + annual_gain_AGC = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_annual_gain_AGC_all_types) + cumul_gain_AGCO2 = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_cumul_gain_AGCO2_all_types) + natrl_forest_biomass_2000 = uu.sensit_tile_rename_biomass(cn.SENSIT_TYPE, tile_id) + model_extent = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_model_extent) - uu.print_log(" Reading input files for {}...".format(tile_id)) + uu.print_log(f' Reading input files for {tile_id}...') # Loss tile name depends on the sensitivity analysis - if sensit_type == 'legal_Amazon_loss': - uu.print_log(" Brazil-specific loss tile found for {}".format(tile_id)) - loss_year = '{}_{}.tif'.format(tile_id, cn.pattern_Brazil_annual_loss_processed) - elif os.path.exists('{}_{}.tif'.format(tile_id, cn.pattern_Mekong_loss_processed)): - uu.print_log(" Mekong-specific loss tile found for {}".format(tile_id)) - loss_year = '{}_{}.tif'.format(tile_id, cn.pattern_Mekong_loss_processed) + if cn.SENSIT_TYPE == 'legal_Amazon_loss': + uu.print_log(f' Brazil-specific loss tile found for {tile_id}') + loss_year = f'{tile_id}_{cn.pattern_Brazil_annual_loss_processed}.tif' + elif os.path.exists(f'{tile_id}_{cn.pattern_Mekong_loss_processed}.tif'): + uu.print_log(f' Mekong-specific loss tile found for {tile_id}') + loss_year = f'{tile_id}_{cn.pattern_Mekong_loss_processed}.tif' else: - uu.print_log(" Hansen loss tile found for {}".format(tile_id)) - loss_year = '{0}_{1}.tif'.format(cn.pattern_loss, tile_id) + uu.print_log(f' Hansen loss tile found for {tile_id}') + loss_year = f'{cn.pattern_loss}_{tile_id}.tif' - # This input is required to exist - loss_year_src = rasterio.open(loss_year) + # Not actually used in the AGC creation but this tile should exist, so it can reliably be opened for metadata + model_extent_src = rasterio.open(model_extent) # Opens the input tiles if they exist + try: + loss_year_src = rasterio.open(loss_year) + uu.print_log(f' Loss year tile found for {tile_id}') + except rasterio.errors.RasterioIOError: + uu.print_log(f' Loss year tile not found for {tile_id}') try: annual_gain_AGC_src = rasterio.open(annual_gain_AGC) - uu.print_log(" Aboveground removal factor tile found for", tile_id) - except: - uu.print_log(" No aboveground removal factor tile for", tile_id) + uu.print_log(f' Aboveground removal factor tile found for {tile_id}') + except rasterio.errors.RasterioIOError: + uu.print_log(f' Aboveground removal factor tile not found for {tile_id}') try: cumul_gain_AGCO2_src = rasterio.open(cumul_gain_AGCO2) - uu.print_log(" Gross aboveground removal tile found for", tile_id) - except: - uu.print_log(" No gross aboveground removal tile for", tile_id) + uu.print_log(f' Gross aboveground removal tile found for {tile_id}') + except rasterio.errors.RasterioIOError: + uu.print_log(f' Gross aboveground removal tile not found for {tile_id}') try: mangrove_biomass_2000_src = rasterio.open(mangrove_biomass_2000) - uu.print_log(" Mangrove tile found for", tile_id) - except: - uu.print_log(" No mangrove tile for", tile_id) + uu.print_log(f' Mangrove tile found for {tile_id}') + except rasterio.errors.RasterioIOError: + uu.print_log(f' Mangrove tile not found for {tile_id}') try: natrl_forest_biomass_2000_src = rasterio.open(natrl_forest_biomass_2000) - uu.print_log(" Biomass found for", tile_id) - except: - uu.print_log(" No biomass found for", tile_id) + uu.print_log(f' Biomass tile found for {tile_id}') + except rasterio.errors.RasterioIOError: + uu.print_log(f' Biomass tile not found for {tile_id}') try: gain_src = rasterio.open(gain) - uu.print_log(" Gain tile found for", tile_id) - except: - uu.print_log(" No gain tile found for", tile_id) + uu.print_log(f' Gain tile found for {tile_id}') + except rasterio.errors.RasterioIOError: + uu.print_log(f' Gain tile not found for {tile_id}') try: removal_forest_type_src = rasterio.open(removal_forest_type) - uu.print_log(" Removal type tile found for", tile_id) - except: - uu.print_log(" No removal type tile found for", tile_id) + uu.print_log(f' Removal type tile found for {tile_id}') + except rasterio.errors.RasterioIOError: + uu.print_log(f' Removal type tile not found for {tile_id}') # Grabs the windows of a tile to iterate over the entire tif without running out of memory - windows = loss_year_src.block_windows(1) + windows = model_extent_src.block_windows(1) # Grabs metadata for one of the input tiles, like its location/projection/cellsize - kwargs = loss_year_src.meta + kwargs = model_extent_src.meta # Updates kwargs for the output dataset. # Need to update data type to float 32 so that it can handle fractional carbon @@ -132,12 +167,12 @@ def create_AGC(tile_id, sensit_type, carbon_pool_extent, no_upload): # The output files: aboveground carbon density in 2000 and in the year of loss. Creates names and rasters to write to. if '2000' in carbon_pool_extent: output_pattern_list = [cn.pattern_AGC_2000] - if sensit_type != 'std': - output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list) - AGC_2000 = '{0}_{1}.tif'.format(tile_id, output_pattern_list[0]) + if cn.SENSIT_TYPE != 'std': + output_pattern_list = uu.alter_patterns(cn.SENSIT_TYPE, output_pattern_list) + AGC_2000 = f'{tile_id}_{output_pattern_list[0]}.tif' dst_AGC_2000 = rasterio.open(AGC_2000, 'w', **kwargs) # Adds metadata tags to the output raster - uu.add_rasterio_tags(dst_AGC_2000, sensit_type) + uu.add_universal_metadata_rasterio(dst_AGC_2000) dst_AGC_2000.update_tags( units='megagrams aboveground carbon (AGC)/ha') dst_AGC_2000.update_tags( @@ -146,12 +181,12 @@ def create_AGC(tile_id, sensit_type, carbon_pool_extent, no_upload): extent='aboveground biomass in 2000 (WHRC if standard model, JPL if biomass_swap sensitivity analysis) and mangrove AGB. Mangrove AGB has precedence.') if 'loss' in carbon_pool_extent: output_pattern_list = [cn.pattern_AGC_emis_year] - if sensit_type != 'std': - output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list) - AGC_emis_year = '{0}_{1}.tif'.format(tile_id, output_pattern_list[0]) + if cn.SENSIT_TYPE != 'std': + output_pattern_list = uu.alter_patterns(cn.SENSIT_TYPE, output_pattern_list) + AGC_emis_year = f'{tile_id}_{output_pattern_list[0]}.tif' dst_AGC_emis_year = rasterio.open(AGC_emis_year, 'w', **kwargs) # Adds metadata tags to the output raster - uu.add_rasterio_tags(dst_AGC_emis_year, sensit_type) + uu.add_universal_metadata_rasterio(dst_AGC_emis_year) dst_AGC_emis_year.update_tags( units='megagrams aboveground carbon (AGC)/ha') dst_AGC_emis_year.update_tags( @@ -160,7 +195,7 @@ def create_AGC(tile_id, sensit_type, carbon_pool_extent, no_upload): extent='tree cover loss pixels within model extent') - uu.print_log(" Creating aboveground carbon density for {0} using carbon_pool_extent '{1}'...".format(tile_id, carbon_pool_extent)) + uu.print_log(f' Creating aboveground carbon density for {tile_id} using carbon_pool_extent {carbon_pool_extent}') uu.check_memory() @@ -168,30 +203,33 @@ def create_AGC(tile_id, sensit_type, carbon_pool_extent, no_upload): for idx, window in windows: # Reads the input tiles' windows. For windows from tiles that may not exist, an array of all 0s is created. - loss_year_window = loss_year_src.read(1, window=window) + try: + loss_year_window = loss_year_src.read(1, window=window) + except UnboundLocalError: + loss_year_window = np.zeros((window.height, window.width), dtype='uint8') try: annual_gain_AGC_window = annual_gain_AGC_src.read(1, window=window) - except: + except UnboundLocalError: annual_gain_AGC_window = np.zeros((window.height, window.width), dtype='float32') try: cumul_gain_AGCO2_window = cumul_gain_AGCO2_src.read(1, window=window) - except: + except UnboundLocalError: cumul_gain_AGCO2_window = np.zeros((window.height, window.width), dtype='float32') try: removal_forest_type_window = removal_forest_type_src.read(1, window=window) - except: + except UnboundLocalError: removal_forest_type_window = np.zeros((window.height, window.width), dtype='uint8') try: gain_window = gain_src.read(1, window=window) - except: + except UnboundLocalError: gain_window = np.zeros((window.height, window.width), dtype='uint8') try: mangrove_biomass_2000_window = mangrove_biomass_2000_src.read(1, window=window) - except: + except UnboundLocalError: mangrove_biomass_2000_window = np.zeros((window.height, window.width), dtype='uint8') try: natrl_forest_biomass_2000_window = natrl_forest_biomass_2000_src.read(1, window=window) - except: + except UnboundLocalError: natrl_forest_biomass_2000_window = np.zeros((window.height, window.width), dtype='uint8') @@ -214,7 +252,7 @@ def create_AGC(tile_id, sensit_type, carbon_pool_extent, no_upload): agc_2000_model_extent_window = np.where(removal_forest_type_window > 0, agc_2000_window, 0) # print(agc_2000_model_extent_window[0][0:5]) - # Creates a mask based on whether the pixels had loss and gain in them. Loss&gain pixels are 1, all else are 0. + # Creates a mask based on whether the pixels had loss-and-gain in them. Loss&gain pixels are 1, all else are 0. # This is used to determine how much post-2000 carbon removals to add to AGC2000 pixels. loss_gain_mask = np.ma.masked_where(loss_year_window == 0, gain_window).filled(0) @@ -254,34 +292,41 @@ def create_AGC(tile_id, sensit_type, carbon_pool_extent, no_upload): # Prints information about the tile that was just processed if 'loss' in carbon_pool_extent: - uu.end_of_fx_summary(start, tile_id, cn.pattern_AGC_emis_year, no_upload) + uu.end_of_fx_summary(start, tile_id, cn.pattern_AGC_emis_year) else: - uu.end_of_fx_summary(start, tile_id, cn.pattern_AGC_2000, no_upload) + uu.end_of_fx_summary(start, tile_id, cn.pattern_AGC_2000) -# Creates belowground carbon tiles (both in 2000 and loss year) -def create_BGC(tile_id, mang_BGB_AGB_ratio, carbon_pool_extent, sensit_type, no_upload): +def create_BGC(tile_id, mang_BGB_AGB_ratio, carbon_pool_extent): + """ + Creates belowground carbon tiles (both in 2000 and loss year) + :param tile_id: tile to be processed, identified by its tile id + :param mang_BGB_AGB_ratio: BGB:AGB ratio for mangroves + :param carbon_pool_extent: carbon_pool_extent: the pixels and years for which carbon pools are caculated: loss or 2000 + :return: Belowground carbon density in the specified pixels for the specified years (Mg C/ha) + """ start = datetime.datetime.now() # Names of the input tiles - removal_forest_type = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_removal_forest_type) - cont_ecozone = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_cont_eco_processed) + removal_forest_type = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_removal_forest_type) + cont_ecozone = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_cont_eco_processed) + BGB_AGB_ratio = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_BGB_AGB_ratio) # For BGC 2000, opens AGC, names the output tile, creates the output tile if '2000' in carbon_pool_extent: - AGC_2000 = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_AGC_2000) + AGC_2000 = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_AGC_2000) AGC_2000_src = rasterio.open(AGC_2000) kwargs = AGC_2000_src.meta kwargs.update(driver='GTiff', count=1, compress='DEFLATE', nodata=0) windows = AGC_2000_src.block_windows(1) output_pattern_list = [cn.pattern_BGC_2000] - if sensit_type != 'std': - output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list) - BGC_2000 = '{0}_{1}.tif'.format(tile_id, output_pattern_list[0]) + if cn.SENSIT_TYPE != 'std': + output_pattern_list = uu.alter_patterns(cn.SENSIT_TYPE, output_pattern_list) + BGC_2000 = f'{tile_id}_{output_pattern_list[0]}.tif' dst_BGC_2000 = rasterio.open(BGC_2000, 'w', **kwargs) # Adds metadata tags to the output raster - uu.add_rasterio_tags(dst_BGC_2000, sensit_type) + uu.add_universal_metadata_rasterio(dst_BGC_2000) dst_BGC_2000.update_tags( units='megagrams belowground carbon (BGC)/ha') dst_BGC_2000.update_tags( @@ -291,18 +336,19 @@ def create_BGC(tile_id, mang_BGB_AGB_ratio, carbon_pool_extent, sensit_type, no_ # For BGC in emissions year, opens AGC, names the output tile, creates the output tile if 'loss' in carbon_pool_extent: - AGC_emis_year = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_AGC_emis_year) + + AGC_emis_year = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_AGC_emis_year) AGC_emis_year_src = rasterio.open(AGC_emis_year) kwargs = AGC_emis_year_src.meta kwargs.update(driver='GTiff', count=1, compress='DEFLATE', nodata=0) windows = AGC_emis_year_src.block_windows(1) output_pattern_list = [cn.pattern_BGC_emis_year] - if sensit_type != 'std': - output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list) - BGC_emis_year = '{0}_{1}.tif'.format(tile_id, output_pattern_list[0]) + if cn.SENSIT_TYPE != 'std': + output_pattern_list = uu.alter_patterns(cn.SENSIT_TYPE, output_pattern_list) + BGC_emis_year = uu.make_tile_name(tile_id, output_pattern_list[0]) dst_BGC_emis_year = rasterio.open(BGC_emis_year, 'w', **kwargs) # Adds metadata tags to the output raster - uu.add_rasterio_tags(dst_BGC_emis_year, sensit_type) + uu.add_universal_metadata_rasterio(dst_BGC_emis_year) dst_BGC_emis_year.update_tags( units='megagrams belowground carbon (BGC)/ha') dst_BGC_emis_year.update_tags( @@ -310,23 +356,28 @@ def create_BGC(tile_id, mang_BGB_AGB_ratio, carbon_pool_extent, sensit_type, no_ dst_BGC_emis_year.update_tags( extent='tree cover loss pixels within model extent') - - uu.print_log(" Reading input files for {}...".format(tile_id)) + uu.print_log(f' Reading input files for {tile_id}') # Opens inputs that are used regardless of whether calculating BGC2000 or BGC in emissions year try: cont_ecozone_src = rasterio.open(cont_ecozone) - uu.print_log(" Continent-ecozone tile found for", tile_id) - except: - uu.print_log(" No Continent-ecozone tile found for", tile_id) + uu.print_log(f' Continent-ecozone tile found for {tile_id}') + except rasterio.errors.RasterioIOError: + uu.print_log(f' Continent-ecozone tile not found for {tile_id}') try: removal_forest_type_src = rasterio.open(removal_forest_type) - uu.print_log(" Removal forest type tile found for", tile_id) - except: - uu.print_log(" No Removal forest type tile found for", tile_id) + uu.print_log(f' Removal forest type tile found for {tile_id}') + except rasterio.errors.RasterioIOError: + uu.print_log(f' Removal forest type tile not found for {tile_id}') + + try: + BGB_AGB_ratio_src = rasterio.open(BGB_AGB_ratio) + uu.print_log(f' BGB:AGB tile found for {tile_id}') + except rasterio.errors.RasterioIOError: + uu.print_log(f' BGB:AGB tile not found for {tile_id}. Using default BGB:AGB from Mokany instead.') - uu.print_log(" Creating belowground carbon density for {0} using carbon_pool_extent '{1}'...".format(tile_id, carbon_pool_extent)) + uu.print_log(f' Creating belowground carbon density for {tile_id} using carbon_pool_extent {carbon_pool_extent}') uu.check_memory() @@ -336,14 +387,20 @@ def create_BGC(tile_id, mang_BGB_AGB_ratio, carbon_pool_extent, sensit_type, no_ # Creates windows from inputs that are used regardless of whether calculating BGC2000 or BGC in emissions year try: cont_ecozone_window = cont_ecozone_src.read(1, window=window).astype('float32') - except: + except UnboundLocalError: cont_ecozone_window = np.zeros((window.height, window.width), dtype='float32') try: removal_forest_type_window = removal_forest_type_src.read(1, window=window) - except: + except UnboundLocalError: removal_forest_type_window = np.zeros((window.height, window.width)) + try: + BGB_AGB_ratio_window = BGB_AGB_ratio_src.read(1, window=window) + except UnboundLocalError: + BGB_AGB_ratio_window = np.empty((window.height, window.width), dtype='float32') + BGB_AGB_ratio_window[:] = cn.below_to_above_non_mang + # Applies the mangrove BGB:AGB ratios (3 different ratios) to the ecozone raster to create a raster of BGB:AGB ratios for key, value in mang_BGB_AGB_ratio.items(): cont_ecozone_window[cont_ecozone_window == key] = value @@ -355,7 +412,7 @@ def create_BGC(tile_id, mang_BGB_AGB_ratio, carbon_pool_extent, sensit_type, no_ # Applies mangrove-specific AGB:BGB ratios by ecozone (ratio applies to AGC:BGC as well) mangrove_BGC_2000 = np.where(removal_forest_type_window == cn.mangrove_rank, AGC_2000_window * cont_ecozone_window, 0) # Applies non-mangrove AGB:BGB ratio to all non-mangrove pixels - non_mangrove_BGC_2000 = np.where(removal_forest_type_window != cn.mangrove_rank, AGC_2000_window * cn.below_to_above_non_mang, 0) + non_mangrove_BGC_2000 = np.where(removal_forest_type_window != cn.mangrove_rank, AGC_2000_window * BGB_AGB_ratio_window, 0) # Combines mangrove and non-mangrove pixels BGC_2000_window = mangrove_BGC_2000 + non_mangrove_BGC_2000 @@ -366,7 +423,7 @@ def create_BGC(tile_id, mang_BGB_AGB_ratio, carbon_pool_extent, sensit_type, no_ AGC_emis_year_window = AGC_emis_year_src.read(1, window=window) mangrove_BGC_emis_year = np.where(removal_forest_type_window == cn.mangrove_rank, AGC_emis_year_window * cont_ecozone_window, 0) - non_mangrove_BGC_emis_year = np.where(removal_forest_type_window != cn.mangrove_rank, AGC_emis_year_window * cn.below_to_above_non_mang, 0) + non_mangrove_BGC_emis_year = np.where(removal_forest_type_window != cn.mangrove_rank, AGC_emis_year_window * BGB_AGB_ratio_window, 0) BGC_emis_year_window = mangrove_BGC_emis_year + non_mangrove_BGC_emis_year dst_BGC_emis_year.write_band(1, BGC_emis_year_window, window=window) @@ -374,45 +431,47 @@ def create_BGC(tile_id, mang_BGB_AGB_ratio, carbon_pool_extent, sensit_type, no_ # Prints information about the tile that was just processed if 'loss' in carbon_pool_extent: - uu.end_of_fx_summary(start, tile_id, cn.pattern_BGC_emis_year, no_upload) + uu.end_of_fx_summary(start, tile_id, cn.pattern_BGC_emis_year) else: - uu.end_of_fx_summary(start, tile_id, cn.pattern_BGC_2000, no_upload) + uu.end_of_fx_summary(start, tile_id, cn.pattern_BGC_2000) -# Creates deadwood and litter carbon tiles (in 2000 and/or in loss year) -def create_deadwood_litter(tile_id, mang_deadwood_AGB_ratio, mang_litter_AGB_ratio, carbon_pool_extent, sensit_type, no_upload): +def create_deadwood_litter(tile_id, mang_deadwood_AGB_ratio, mang_litter_AGB_ratio, carbon_pool_extent): + """ + Creates deadwood and litter carbon tiles using AGC in 2000 (with loss extent or 2000 forest extent) + :param tile_id: tile to be processed, identified by its tile id + :param mang_deadwood_AGB_ratio: ratio of deadwood carbon to aboveground carbon for mangroves + :param mang_litter_AGB_ratio: ratio of litter carbon to aboveground carbon for mangroves + :param carbon_pool_extent: the pixels and years for which carbon pools are caculated: loss or 2000 + :return: Deadwood and litter carbon density tiles in the specified pixels for the specified years (Mg C/ha) + """ start = datetime.datetime.now() # Names of the input tiles. Creates the names even if the files don't exist. - mangrove_biomass_2000 = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_mangrove_biomass_2000) - bor_tem_trop = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_bor_tem_trop_processed) - cont_eco = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_cont_eco_processed) - precip = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_precip) - elevation = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_elevation) - if sensit_type == 'biomass_swap': - natrl_forest_biomass_2000 = '{0}_{1}.tif'.format(tile_id, cn.pattern_JPL_unmasked_processed) - uu.print_log("Using JPL biomass tile for {} sensitivity analysis".format(sensit_type)) - else: - natrl_forest_biomass_2000 = '{0}_{1}.tif'.format(tile_id, cn.pattern_WHRC_biomass_2000_unmasked) - uu.print_log("Using WHRC biomass tile for {} sensitivity analysis".format(sensit_type)) + mangrove_biomass_2000 = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_mangrove_biomass_2000) + bor_tem_trop = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_bor_tem_trop_processed) + cont_eco = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_cont_eco_processed) + precip = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_precip) + elevation = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_elevation) + natrl_forest_biomass_2000 = uu.sensit_tile_rename_biomass(cn.SENSIT_TYPE, tile_id) # For deadwood and litter 2000, opens AGC, names the output tiles, creates the output tiles if '2000' in carbon_pool_extent: - AGC_2000 = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_AGC_2000) + AGC_2000 = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_AGC_2000) AGC_2000_src = rasterio.open(AGC_2000) kwargs = AGC_2000_src.meta kwargs.update(driver='GTiff', count=1, compress='DEFLATE', nodata=0) windows = AGC_2000_src.block_windows(1) output_pattern_list = [cn.pattern_deadwood_2000, cn.pattern_litter_2000] - if sensit_type != 'std': - output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list) - deadwood_2000 = '{0}_{1}.tif'.format(tile_id, output_pattern_list[0]) - litter_2000 = '{0}_{1}.tif'.format(tile_id, output_pattern_list[1]) + if cn.SENSIT_TYPE != 'std': + output_pattern_list = uu.alter_patterns(cn.SENSIT_TYPE, output_pattern_list) + deadwood_2000 = f'{tile_id}_{output_pattern_list[0]}.tif' + litter_2000 = f'{tile_id}_{output_pattern_list[1]}.tif' dst_deadwood_2000 = rasterio.open(deadwood_2000, 'w', **kwargs) dst_litter_2000 = rasterio.open(litter_2000, 'w', **kwargs) # Adds metadata tags to the output raster - uu.add_rasterio_tags(dst_deadwood_2000, sensit_type) + uu.add_universal_metadata_rasterio(dst_deadwood_2000) dst_deadwood_2000.update_tags( units='megagrams deadwood carbon/ha') dst_deadwood_2000.update_tags( @@ -420,7 +479,7 @@ def create_deadwood_litter(tile_id, mang_deadwood_AGB_ratio, mang_litter_AGB_rat dst_deadwood_2000.update_tags( extent='aboveground biomass in 2000 (WHRC if standard model, JPL if biomass_swap sensitivity analysis) and mangrove AGB. Mangrove AGB has precedence.') # Adds metadata tags to the output raster - uu.add_rasterio_tags(dst_litter_2000, sensit_type) + uu.add_universal_metadata_rasterio(dst_litter_2000) dst_litter_2000.update_tags( units='megagrams litter carbon/ha') dst_litter_2000.update_tags( @@ -430,21 +489,21 @@ def create_deadwood_litter(tile_id, mang_deadwood_AGB_ratio, mang_litter_AGB_rat # For deadwood and litter in emissions year, opens AGC, names the output tiles, creates the output tiles if 'loss' in carbon_pool_extent: - AGC_emis_year = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_AGC_emis_year) + AGC_emis_year = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_AGC_emis_year) AGC_emis_year_src = rasterio.open(AGC_emis_year) kwargs = AGC_emis_year_src.meta kwargs.update(driver='GTiff', count=1, compress='DEFLATE', nodata=0) windows = AGC_emis_year_src.block_windows(1) output_pattern_list = [cn.pattern_deadwood_emis_year_2000, cn.pattern_litter_emis_year_2000] - if sensit_type != 'std': - output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list) - deadwood_emis_year = '{0}_{1}.tif'.format(tile_id, output_pattern_list[0]) - litter_emis_year = '{0}_{1}.tif'.format(tile_id, output_pattern_list[1]) + if cn.SENSIT_TYPE != 'std': + output_pattern_list = uu.alter_patterns(cn.SENSIT_TYPE, output_pattern_list) + deadwood_emis_year = uu.make_tile_name(tile_id, output_pattern_list[0]) + litter_emis_year = uu.make_tile_name(tile_id, output_pattern_list[1]) dst_deadwood_emis_year = rasterio.open(deadwood_emis_year, 'w', **kwargs) dst_litter_emis_year = rasterio.open(litter_emis_year, 'w', **kwargs) # Adds metadata tags to the output raster - uu.add_rasterio_tags(dst_deadwood_emis_year, sensit_type) + uu.add_universal_metadata_rasterio(dst_deadwood_emis_year) dst_deadwood_emis_year.update_tags( units='megagrams deadwood carbon/ha') dst_deadwood_emis_year.update_tags( @@ -452,7 +511,7 @@ def create_deadwood_litter(tile_id, mang_deadwood_AGB_ratio, mang_litter_AGB_rat dst_deadwood_emis_year.update_tags( extent='tree cover loss pixels within model extent') # Adds metadata tags to the output raster - uu.add_rasterio_tags(dst_litter_emis_year, sensit_type) + uu.add_universal_metadata_rasterio(dst_litter_emis_year) dst_litter_emis_year.update_tags( units='megagrams litter carbon/ha') dst_litter_emis_year.update_tags( @@ -460,49 +519,49 @@ def create_deadwood_litter(tile_id, mang_deadwood_AGB_ratio, mang_litter_AGB_rat dst_litter_emis_year.update_tags( extent='tree cover loss pixels within model extent') - uu.print_log(" Reading input files for {}...".format(tile_id)) + uu.print_log(f' Reading input files for {tile_id}') try: precip_src = rasterio.open(precip) - uu.print_log(" Precipitation tile found for", tile_id) - except: - uu.print_log(" No precipitation tile biomass for", tile_id) + uu.print_log(f' Precipitation tile found for {tile_id}') + except rasterio.errors.RasterioIOError: + uu.print_log(f' Precipitation tile not found for {tile_id}') try: elevation_src = rasterio.open(elevation) - uu.print_log(" Elevation tile found for", tile_id) - except: - uu.print_log(" No elevation tile biomass for", tile_id) + uu.print_log(f' Elevation tile found for {tile_id}') + except rasterio.errors.RasterioIOError: + uu.print_log(f' Elevation tile not found for {tile_id}') # Opens the mangrove biomass tile if it exists try: bor_tem_trop_src = rasterio.open(bor_tem_trop) - uu.print_log(" Boreal/temperate/tropical tile found for", tile_id) - except: - uu.print_log(" No boreal/temperate/tropical tile biomass for", tile_id) + uu.print_log(f' Boreal/temperate/tropical tile found for {tile_id}') + except rasterio.errors.RasterioIOError: + uu.print_log(f' Boreal/temperate/tropical tile not found for {tile_id}') # Opens the mangrove biomass tile if it exists try: mangrove_biomass_2000_src = rasterio.open(mangrove_biomass_2000) - uu.print_log(" Mangrove biomass found for", tile_id) - except: - uu.print_log(" No mangrove biomass for", tile_id) + uu.print_log(f' Mangrove biomass tile found for {tile_id}') + except rasterio.errors.RasterioIOError: + uu.print_log(f' Mangrove biomass tile not found for {tile_id}') # Opens the WHRC/JPL biomass tile if it exists try: natrl_forest_biomass_2000_src = rasterio.open(natrl_forest_biomass_2000) - uu.print_log(" Biomass found for", tile_id) - except: - uu.print_log(" No biomass for", tile_id) + uu.print_log(f' Biomass tile found for {tile_id}') + except rasterio.errors.RasterioIOError: + uu.print_log(f' Biomass tile not found for {tile_id}') # Opens the continent-ecozone tile if it exists try: cont_ecozone_src = rasterio.open(cont_eco) - uu.print_log(" Continent-ecozone tile found for", tile_id) - except: - uu.print_log(" No Continent-ecozone tile found for", tile_id) + uu.print_log(f' Continent-ecozone tile found for {tile_id}') + except rasterio.errors.RasterioIOError: + uu.print_log(f' Continent-ecozone tile not found for {tile_id}') - uu.print_log(" Creating deadwood and litter carbon density for {0} using carbon_pool_extent '{1}'...".format(tile_id, carbon_pool_extent)) + uu.print_log(f' Creating deadwood and litter carbon density for {tile_id} using carbon_pool_extent {carbon_pool_extent}') uu.check_memory() @@ -521,27 +580,27 @@ def create_deadwood_litter(tile_id, mang_deadwood_AGB_ratio, mang_litter_AGB_rat # # clipping to AGC2000; I'm doing that just as a formality. It feels more complete. # try: # AGC_2000_window = AGC_2000_src.read(1, window=window) - # except: + # except UnboundLocalError: # AGC_2000_window = np.zeros((window.height, window.width), dtype='float32') try: AGC_emis_year_window = AGC_emis_year_src.read(1, window=window) - except: + except UnboundLocalError: AGC_emis_year_window = np.zeros((window.height, window.width), dtype='float32') try: cont_ecozone_window = cont_ecozone_src.read(1, window=window).astype('float32') - except: + except UnboundLocalError: cont_ecozone_window = np.zeros((window.height, window.width), dtype='float32') try: bor_tem_trop_window = bor_tem_trop_src.read(1, window=window) - except: + except UnboundLocalError: bor_tem_trop_window = np.zeros((window.height, window.width)) try: precip_window = precip_src.read(1, window=window) - except: + except UnboundLocalError: precip_window = np.zeros((window.height, window.width)) try: elevation_window = elevation_src.read(1, window=window) - except: + except UnboundLocalError: elevation_window = np.zeros((window.height, window.width)) # This allows the script to bypass the few tiles that have mangrove biomass but not WHRC biomass @@ -550,67 +609,9 @@ def create_deadwood_litter(tile_id, mang_deadwood_AGB_ratio, mang_litter_AGB_rat # Reads in the windows of each input file that definitely exist natrl_forest_biomass_window = natrl_forest_biomass_2000_src.read(1, window=window) - # The deadwood and litter conversions generally come from here: https://cdm.unfccc.int/methodologies/ARmethodologies/tools/ar-am-tool-12-v3.0.pdf, p. 17-18 - # They depend on the elevation, precipitation, and broad biome category (boreal/temperate/tropical). - # For some reason, the masks need to be named different variables for each equation. - # If they all have the same name (e.g., elev_mask and condition_mask are reused), then at least the condition_mask_4 - # equation won't work properly.) - - # Equation for elevation <= 2000, precip <= 1000, bor/temp/trop = 1 (tropical) - elev_mask_1 = elevation_window <= 2000 - precip_mask_1 = precip_window <= 1000 - ecozone_mask_1 = bor_tem_trop_window == 1 - condition_mask_1 = elev_mask_1 & precip_mask_1 & ecozone_mask_1 - agb_masked_1 = np.ma.array(natrl_forest_biomass_window, mask=np.invert(condition_mask_1)) - deadwood_masked = agb_masked_1 * 0.02 * cn.biomass_to_c_non_mangrove - deadwood_2000_output = deadwood_2000_output + deadwood_masked.filled(0) - litter_masked = agb_masked_1 * 0.04 * cn.biomass_to_c_non_mangrove_litter - litter_2000_output = litter_2000_output + litter_masked.filled(0) - - - # Equation for elevation <= 2000, 1000 < precip <= 1600, bor/temp/trop = 1 (tropical) - elev_mask_2 = elevation_window <= 2000 - precip_mask_2 = (precip_window > 1000) & (precip_window <= 1600) - ecozone_mask_2 = bor_tem_trop_window == 1 - condition_mask_2 = elev_mask_2 & precip_mask_2 & ecozone_mask_2 - agb_masked_2 = np.ma.array(natrl_forest_biomass_window, mask=np.invert(condition_mask_2)) - deadwood_masked = agb_masked_2 * 0.01 * cn.biomass_to_c_non_mangrove - deadwood_2000_output = deadwood_2000_output + deadwood_masked.filled(0) - litter_masked = agb_masked_2 * 0.01 * cn.biomass_to_c_non_mangrove_litter - litter_2000_output = litter_2000_output + litter_masked.filled(0) - - # Equation for elevation <= 2000, precip > 1600, bor/temp/trop = 1 (tropical) - elev_mask_3 = elevation_window <= 2000 - precip_mask_3 = precip_window > 1600 - ecozone_mask_3 = bor_tem_trop_window == 1 - condition_mask_3 = elev_mask_3 & precip_mask_3 & ecozone_mask_3 - agb_masked_3 = np.ma.array(natrl_forest_biomass_window, mask=np.invert(condition_mask_3)) - deadwood_masked = agb_masked_3 * 0.06 * cn.biomass_to_c_non_mangrove - deadwood_2000_output = deadwood_2000_output + deadwood_masked.filled(0) - litter_masked = agb_masked_3 * 0.01 * cn.biomass_to_c_non_mangrove_litter - litter_2000_output = litter_2000_output + litter_masked.filled(0) - - # Equation for elevation > 2000, precip = any value, bor/temp/trop = 1 (tropical) - elev_mask_4 = elevation_window > 2000 - ecozone_mask_4 = bor_tem_trop_window == 1 - condition_mask_4 = elev_mask_4 & ecozone_mask_4 - agb_masked_4 = np.ma.array(natrl_forest_biomass_window, mask=np.invert(condition_mask_4)) - deadwood_masked = agb_masked_4 * 0.07 * cn.biomass_to_c_non_mangrove - deadwood_2000_output = deadwood_2000_output + deadwood_masked.filled(0) - litter_masked = agb_masked_4 * 0.01 * cn.biomass_to_c_non_mangrove_litter - litter_2000_output = litter_2000_output + litter_masked.filled(0) - - # Equation for elevation = any value, precip = any value, bor/temp/trop = 2 or 3 (boreal or temperate) - ecozone_mask_5 = bor_tem_trop_window != 1 - condition_mask_5 = ecozone_mask_5 - agb_masked_5 = np.ma.array(natrl_forest_biomass_window, mask=np.invert(condition_mask_5)) - deadwood_masked = agb_masked_5 * 0.08 * cn.biomass_to_c_non_mangrove - deadwood_2000_output = deadwood_2000_output + deadwood_masked.filled(0) - litter_masked = agb_masked_5 * 0.04 * cn.biomass_to_c_non_mangrove_litter - litter_2000_output = litter_2000_output + litter_masked.filled(0) - - deadwood_2000_output = deadwood_2000_output.astype('float32') - litter_2000_output = litter_2000_output.astype('float32') + deadwood_2000_output, litter_2000_output = deadwood_litter_equations( + bor_tem_trop_window, deadwood_2000_output, elevation_window, + litter_2000_output, natrl_forest_biomass_window, precip_window) # Replaces non-mangrove deadwood and litter with special mangrove deadwood and litter values if there is mangrove if os.path.exists(mangrove_biomass_2000): @@ -641,7 +642,7 @@ def create_deadwood_litter(tile_id, mang_deadwood_AGB_ratio, mang_litter_AGB_rat # Same as above but for litter try: cont_ecozone_window = cont_ecozone_src.read(1, window=window).astype('float32') - except: + except UnboundLocalError: cont_ecozone_window = np.zeros((window.height, window.width), dtype='float32') # Applies the mangrove deadwood:AGB ratios (2 different ratios) to the ecozone raster to create a raster of deadwood:AGB ratios @@ -681,29 +682,109 @@ def create_deadwood_litter(tile_id, mang_deadwood_AGB_ratio, mang_litter_AGB_rat # Prints information about the tile that was just processed if 'loss' in carbon_pool_extent: - uu.end_of_fx_summary(start, tile_id, cn.pattern_deadwood_emis_year_2000, no_upload) + uu.end_of_fx_summary(start, tile_id, cn.pattern_deadwood_emis_year_2000) else: - uu.end_of_fx_summary(start, tile_id, cn.pattern_deadwood_2000, no_upload) - - -# Creates soil carbon tiles in loss pixels only -def create_soil_emis_extent(tile_id, pattern, sensit_type, no_upload): + uu.end_of_fx_summary(start, tile_id, cn.pattern_deadwood_2000) + + +def deadwood_litter_equations(bor_tem_trop_window, deadwood_2000_output, elevation_window, litter_2000_output, + natrl_forest_biomass_window, precip_window): + """ + :param bor_tem_trop_window: array representing boreal, temperate or tropical climate domains + :param deadwood_2000_output: array representing the deadwood output + :param elevation_window: array representing elevation + :param litter_2000_output: array representing litter output + :param natrl_forest_biomass_window: array representing aboveground biomass + :param precip_window: array representing annual precipitation + :return: arrays of deadwood and litter carbon + """ + + # The deadwood and litter conversions generally come from here: https://cdm.unfccc.int/methodologies/ARmethodologies/tools/ar-am-tool-12-v3.0.pdf, p. 17-18 + # They depend on the elevation, precipitation, and climate domain (boreal/temperate/tropical). + # For some reason, the masks need to be named different variables for each equation. + # If they all have the same name (e.g., elev_mask and condition_mask are reused), then at least the condition_mask_4 + # equation won't work properly.) + + # Equation for elevation <= 2000, precip <= 1000, bor/temp/trop = 1 (tropical) + elev_mask_1 = elevation_window <= 2000 + precip_mask_1 = precip_window <= 1000 + ecozone_mask_1 = bor_tem_trop_window == 1 + condition_mask_1 = elev_mask_1 & precip_mask_1 & ecozone_mask_1 + agb_masked_1 = np.ma.array(natrl_forest_biomass_window, mask=np.invert(condition_mask_1)) + deadwood_masked = agb_masked_1 * 0.02 * cn.biomass_to_c_non_mangrove + deadwood_2000_output = deadwood_2000_output + deadwood_masked.filled(0) + litter_masked = agb_masked_1 * 0.04 * cn.biomass_to_c_non_mangrove_litter + litter_2000_output = litter_2000_output + litter_masked.filled(0) + + # Equation for elevation <= 2000, 1000 < precip <= 1600, bor/temp/trop = 1 (tropical) + elev_mask_2 = elevation_window <= 2000 + precip_mask_2 = (precip_window > 1000) & (precip_window <= 1600) + ecozone_mask_2 = bor_tem_trop_window == 1 + condition_mask_2 = elev_mask_2 & precip_mask_2 & ecozone_mask_2 + agb_masked_2 = np.ma.array(natrl_forest_biomass_window, mask=np.invert(condition_mask_2)) + deadwood_masked = agb_masked_2 * 0.01 * cn.biomass_to_c_non_mangrove + deadwood_2000_output = deadwood_2000_output + deadwood_masked.filled(0) + litter_masked = agb_masked_2 * 0.01 * cn.biomass_to_c_non_mangrove_litter + litter_2000_output = litter_2000_output + litter_masked.filled(0) + + # Equation for elevation <= 2000, precip > 1600, bor/temp/trop = 1 (tropical) + elev_mask_3 = elevation_window <= 2000 + precip_mask_3 = precip_window > 1600 + ecozone_mask_3 = bor_tem_trop_window == 1 + condition_mask_3 = elev_mask_3 & precip_mask_3 & ecozone_mask_3 + agb_masked_3 = np.ma.array(natrl_forest_biomass_window, mask=np.invert(condition_mask_3)) + deadwood_masked = agb_masked_3 * 0.06 * cn.biomass_to_c_non_mangrove + deadwood_2000_output = deadwood_2000_output + deadwood_masked.filled(0) + litter_masked = agb_masked_3 * 0.01 * cn.biomass_to_c_non_mangrove_litter + litter_2000_output = litter_2000_output + litter_masked.filled(0) + + # Equation for elevation > 2000, precip = any value, bor/temp/trop = 1 (tropical) + elev_mask_4 = elevation_window > 2000 + ecozone_mask_4 = bor_tem_trop_window == 1 + condition_mask_4 = elev_mask_4 & ecozone_mask_4 + agb_masked_4 = np.ma.array(natrl_forest_biomass_window, mask=np.invert(condition_mask_4)) + deadwood_masked = agb_masked_4 * 0.07 * cn.biomass_to_c_non_mangrove + deadwood_2000_output = deadwood_2000_output + deadwood_masked.filled(0) + litter_masked = agb_masked_4 * 0.01 * cn.biomass_to_c_non_mangrove_litter + litter_2000_output = litter_2000_output + litter_masked.filled(0) + + # Equation for elevation = any value, precip = any value, bor/temp/trop = 2 or 3 (boreal or temperate) + ecozone_mask_5 = bor_tem_trop_window != 1 + condition_mask_5 = ecozone_mask_5 + agb_masked_5 = np.ma.array(natrl_forest_biomass_window, mask=np.invert(condition_mask_5)) + deadwood_masked = agb_masked_5 * 0.08 * cn.biomass_to_c_non_mangrove + deadwood_2000_output = deadwood_2000_output + deadwood_masked.filled(0) + litter_masked = agb_masked_5 * 0.04 * cn.biomass_to_c_non_mangrove_litter + litter_2000_output = litter_2000_output + litter_masked.filled(0) + deadwood_2000_output = deadwood_2000_output.astype('float32') + litter_2000_output = litter_2000_output.astype('float32') + + return deadwood_2000_output, litter_2000_output + + +def create_soil_emis_extent(tile_id, pattern): + """ + Creates soil carbon tiles in loss pixels only + :param tile_id: tile to be processed, identified by its tile id + :param pattern: tile pattern to be processed + :return: Soil organic carbon density tile in the specified pixels for the specified years (Mg C/ha) + """ start = datetime.datetime.now() # Names of the input tiles. Creates the names even if the files don't exist. - soil_full_extent = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_soil_C_full_extent_2000) - AGC_emis_year = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_AGC_emis_year) + soil_full_extent = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_soil_C_full_extent_2000) + AGC_emis_year = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_AGC_emis_year) if os.path.exists(soil_full_extent) & os.path.exists(AGC_emis_year): - uu.print_log("Soil C 2000 and loss found for {}. Proceeding with soil C in loss extent.".format(tile_id)) + uu.print_log(f'Soil C 2000 and loss found for {tile_id}. Proceeding with soil C in loss extent.') else: - return uu.print_log("Soil C 2000 and/or loss not found for {}. Skipping soil C in loss extent.".format(tile_id)) + return uu.print_log(f'Soil C 2000 and/or loss not found for {tile_id}. Skipping soil C in loss extent.') # Name of output tile - soil_emis_year = '{0}_{1}.tif'.format(tile_id, pattern) + soil_emis_year = uu.make_tile_name(tile_id, pattern) - uu.print_log(" Reading input files for {}...".format(tile_id)) + uu.print_log(f' Reading input files for {tile_id}...') # Both of these tiles should exist and thus be able to be opened soil_full_extent_src = rasterio.open(soil_full_extent) @@ -728,7 +809,7 @@ def create_soil_emis_extent(tile_id, pattern, sensit_type, no_upload): dst_soil_emis_year = rasterio.open(soil_emis_year, 'w', **kwargs) # Adds metadata tags to the output raster - uu.add_rasterio_tags(dst_soil_emis_year, sensit_type) + uu.add_universal_metadata_rasterio(dst_soil_emis_year) dst_soil_emis_year.update_tags( units='megagrams soil carbon/ha') dst_soil_emis_year.update_tags( @@ -736,7 +817,7 @@ def create_soil_emis_extent(tile_id, pattern, sensit_type, no_upload): dst_soil_emis_year.update_tags( extent='tree cover loss pixels') - uu.print_log(" Creating soil carbon density for loss pixels in {}...".format(tile_id)) + uu.print_log(f' Creating soil carbon density for loss pixels in {tile_id}...') uu.check_memory() @@ -758,11 +839,16 @@ def create_soil_emis_extent(tile_id, pattern, sensit_type, no_upload): dst_soil_emis_year.write_band(1, soil_output, window=window) # Prints information about the tile that was just processed - uu.end_of_fx_summary(start, tile_id, pattern, no_upload) + uu.end_of_fx_summary(start, tile_id, pattern) -# Creates total carbon tiles (both in 2000 and loss year) -def create_total_C(tile_id, carbon_pool_extent, sensit_type, no_upload): +def create_total_C(tile_id, carbon_pool_extent): + """ + Creates total carbon tiles (both in 2000 and loss year) + :param tile_id: tile to be processed, identified by its tile id + :param carbon_pool_extent: the pixels and years for which carbon pools are caculated: loss or 2000 + :return: Total carbon density tile in the specified pixels for the specified years (Mg C/ha) + """ start = datetime.datetime.now() @@ -772,31 +858,31 @@ def create_total_C(tile_id, carbon_pool_extent, sensit_type, no_upload): # If litter in 2000 is being created, is uses the 2000 AGC tile. # The other inputs tiles aren't affected by whether the output is for 2000 or for the loss year. if '2000' in carbon_pool_extent: - AGC_2000 = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_AGC_2000) - BGC_2000 = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_BGC_2000) - deadwood_2000 = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_deadwood_2000) - litter_2000 = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_litter_2000) - soil_2000 = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_soil_C_full_extent_2000) + AGC_2000 = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_AGC_2000) + BGC_2000 = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_BGC_2000) + deadwood_2000 = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_deadwood_2000) + litter_2000 = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_litter_2000) + soil_2000 = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_soil_C_full_extent_2000) AGC_2000_src = rasterio.open(AGC_2000) BGC_2000_src = rasterio.open(BGC_2000) deadwood_2000_src = rasterio.open(deadwood_2000) litter_2000_src = rasterio.open(litter_2000) try: soil_2000_src = rasterio.open(soil_2000) - uu.print_log(" Soil C 2000 tile found for", tile_id) - except: - uu.print_log(" No soil C 2000 tile found for", tile_id) + uu.print_log(f' Soil C 2000 tile found for {tile_id}') + except rasterio.errors.RasterioIOError: + uu.print_log(f' Soil C 2000 tile not found for {tile_id}') kwargs = AGC_2000_src.meta kwargs.update(driver='GTiff', count=1, compress='DEFLATE', nodata=0) windows = AGC_2000_src.block_windows(1) output_pattern_list = [cn.pattern_total_C_2000] - if sensit_type != 'std': - output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list) - total_C_2000 = '{0}_{1}.tif'.format(tile_id, output_pattern_list[0]) + if cn.SENSIT_TYPE != 'std': + output_pattern_list = uu.alter_patterns(cn.SENSIT_TYPE, output_pattern_list) + total_C_2000 = f'{tile_id}_{output_pattern_list[0]}.tif' dst_total_C_2000 = rasterio.open(total_C_2000, 'w', **kwargs) # Adds metadata tags to the output raster - uu.add_rasterio_tags(dst_total_C_2000, sensit_type) + uu.add_universal_metadata_rasterio(dst_total_C_2000) dst_total_C_2000.update_tags( units='megagrams total (all emitted_pools) carbon/ha') dst_total_C_2000.update_tags( @@ -806,31 +892,31 @@ def create_total_C(tile_id, carbon_pool_extent, sensit_type, no_upload): if 'loss' in carbon_pool_extent: - AGC_emis_year = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_AGC_emis_year) - BGC_emis_year = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_BGC_emis_year) - deadwood_emis_year = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_deadwood_emis_year_2000) - litter_emis_year = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_litter_emis_year_2000) - soil_emis_year = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_soil_C_emis_year_2000) + AGC_emis_year = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_AGC_emis_year) + BGC_emis_year = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_BGC_emis_year) + deadwood_emis_year = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_deadwood_emis_year_2000) + litter_emis_year = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_litter_emis_year_2000) + soil_emis_year = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_soil_C_emis_year_2000) AGC_emis_year_src = rasterio.open(AGC_emis_year) BGC_emis_year_src = rasterio.open(BGC_emis_year) deadwood_emis_year_src = rasterio.open(deadwood_emis_year) litter_emis_year_src = rasterio.open(litter_emis_year) try: soil_emis_year_src = rasterio.open(soil_emis_year) - uu.print_log(" Soil C emission year tile found for", tile_id) - except: - uu.print_log(" No soil C emission year tile found for", tile_id) + uu.print_log(f' Soil C emission year tile found for {tile_id}') + except rasterio.errors.RasterioIOError: + uu.print_log(f' Soil C emission year tile not found for {tile_id}') kwargs = AGC_emis_year_src.meta kwargs.update(driver='GTiff', count=1, compress='DEFLATE', nodata=0) windows = AGC_emis_year_src.block_windows(1) output_pattern_list = [cn.pattern_total_C_emis_year] - if sensit_type != 'std': - output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list) - total_C_emis_year = '{0}_{1}.tif'.format(tile_id, output_pattern_list[0]) + if cn.SENSIT_TYPE != 'std': + output_pattern_list = uu.alter_patterns(cn.SENSIT_TYPE, output_pattern_list) + total_C_emis_year = f'{tile_id}_{output_pattern_list[0]}.tif' dst_total_C_emis_year = rasterio.open(total_C_emis_year, 'w', **kwargs) # Adds metadata tags to the output raster - uu.add_rasterio_tags(dst_total_C_emis_year, sensit_type) + uu.add_universal_metadata_rasterio(dst_total_C_emis_year) dst_total_C_emis_year.update_tags( units='megagrams total (all emitted_pools) carbon/ha') dst_total_C_emis_year.update_tags( @@ -839,7 +925,7 @@ def create_total_C(tile_id, carbon_pool_extent, sensit_type, no_upload): extent='tree cover loss pixels within model extent') - uu.print_log(" Creating total carbon density for {0} using carbon_pool_extent '{1}'...".format(tile_id, carbon_pool_extent)) + uu.print_log(f' Creating total carbon density for {tile_id} using carbon_pool_extent {carbon_pool_extent}...') uu.check_memory() @@ -855,7 +941,7 @@ def create_total_C(tile_id, carbon_pool_extent, sensit_type, no_upload): litter_2000_window = litter_2000_src.read(1, window=window) try: soil_2000_window = soil_2000_src.read(1, window=window) - except: + except UnboundLocalError: soil_2000_window = np.zeros((window.height, window.width)) total_C_2000_window = AGC_2000_window + BGC_2000_window + deadwood_2000_window + litter_2000_window + soil_2000_window @@ -876,7 +962,7 @@ def create_total_C(tile_id, carbon_pool_extent, sensit_type, no_upload): litter_emis_year_window = litter_emis_year_src.read(1, window=window) try: soil_emis_year_window = soil_emis_year_src.read(1, window=window) - except: + except UnboundLocalError: soil_emis_year_window = np.zeros((window.height, window.width)) total_C_emis_year_window = AGC_emis_year_window + BGC_emis_year_window + deadwood_emis_year_window + litter_emis_year_window + soil_emis_year_window @@ -890,6 +976,6 @@ def create_total_C(tile_id, carbon_pool_extent, sensit_type, no_upload): # Prints information about the tile that was just processed if 'loss' in carbon_pool_extent: - uu.end_of_fx_summary(start, tile_id, cn.pattern_total_C_emis_year, no_upload) + uu.end_of_fx_summary(start, tile_id, cn.pattern_total_C_emis_year) else: - uu.end_of_fx_summary(start, tile_id, cn.pattern_total_C_2000, no_upload) + uu.end_of_fx_summary(start, tile_id, cn.pattern_total_C_2000) diff --git a/carbon_pools/create_soil_C.py b/carbon_pools/create_soil_C.py index b022b800..b2649408 100644 --- a/carbon_pools/create_soil_C.py +++ b/carbon_pools/create_soil_C.py @@ -14,12 +14,10 @@ ''' import datetime -from subprocess import Popen, PIPE, STDOUT, check_call import numpy as np import rasterio import os -import sys -sys.path.append('../') + import universal_util as uu import constants_and_names as cn @@ -54,7 +52,7 @@ def create_mangrove_soil_C(tile_id, no_upload): else: - uu.print_log("No mangrove aboveground biomass tile for", tile_id) + uu.print_log("Mangrove aboveground biomass tile not found for", tile_id) # Prints information about the tile that was just processed uu.end_of_fx_summary(start, tile_id, 'mangrove_masked_to_mangrove', no_upload) @@ -112,7 +110,7 @@ def create_combined_soil_C(tile_id, no_upload): else: - uu.print_log("No mangrove aboveground biomass tile for", tile_id) + uu.print_log("Mangrove aboveground biomass tile not found for", tile_id) # If there is no mangrove soil C tile, the final output of the mineral soil function needs to receive the # correct final name. diff --git a/carbon_pools/mp_create_carbon_pools.py b/carbon_pools/mp_create_carbon_pools.py index e45d61c8..a9652b5b 100644 --- a/carbon_pools/mp_create_carbon_pools.py +++ b/carbon_pools/mp_create_carbon_pools.py @@ -1,4 +1,4 @@ -''' +""" This script creates carbon pools in the year of loss (emitted-year carbon) and in 2000. For the year 2000, it creates aboveground, belowground, deadwood, litter, and total carbon emitted_pools (soil is created in a separate script but is brought in to create total carbon). All but total carbon are to the extent @@ -18,53 +18,57 @@ Which carbon emitted_pools are being generated (2000 and/or loss pixels) is controlled through the command line argument --carbon-pool-extent (-ce). This extent argument determines which AGC function is used and how the outputs of the other emitted_pools' scripts are named. Carbon emitted_pools in both 2000 and in the year of loss can be created in a single run by using '2000,loss' or 'loss,2000'. -''' -import multiprocessing -import pandas as pd -from subprocess import Popen, PIPE, STDOUT, check_call -import datetime -import glob -import os +python -m carbon_pools.mp_create_carbon_pools -t std -l 00N_000E -si -nu -ce loss +python -m carbon_pools.mp_create_carbon_pools -t std -l all -si -ce loss +""" + import argparse from functools import partial +import glob +import multiprocessing +import os +import pandas as pd import sys -sys.path.append('../') + import constants_and_names as cn import universal_util as uu -sys.path.append(os.path.join(cn.docker_app,'carbon_pools')) -import create_carbon_pools +from . import create_carbon_pools -def mp_create_carbon_pools(sensit_type, tile_id_list, carbon_pool_extent, run_date = None, no_upload = None, - save_intermediates = None): +def mp_create_carbon_pools(tile_id_list, carbon_pool_extent): + """ + :param tile_id_list: list of tile ids to process + :param carbon_pool_extent: the pixels and years for which carbon pools are caculated: loss or 2000 + :return: set of tiles with each carbon pool density (Mg/ha): aboveground, belowground, dead wood, litter, soil, total + """ - os.chdir(cn.docker_base_dir) + os.chdir(cn.docker_tile_dir) - if (sensit_type != 'std') & (carbon_pool_extent != 'loss'): - uu.exception_log(no_upload, "Sensitivity analysis run must use 'loss' extent") + if (cn.SENSIT_TYPE != 'std') & (carbon_pool_extent != 'loss'): + uu.exception_log("Sensitivity analysis run must use loss extent") # Checks the validity of the carbon_pool_extent argument if (carbon_pool_extent not in ['loss', '2000', 'loss,2000', '2000,loss']): - uu.exception_log(no_upload, "Invalid carbon_pool_extent input. Please choose loss, 2000, loss,2000 or 2000,loss.") - + uu.exception_log('Invalid carbon_pool_extent input. Please choose loss, 2000, loss,2000 or 2000,loss.') # If a full model run is specified, the correct set of tiles for the particular script is listed. # For runs generating carbon pools in emissions year, only tiles with model extent and loss are relevant # because there must be loss pixels for emissions-year carbon pools to exist. if (tile_id_list == 'all') & (carbon_pool_extent == 'loss'): # Lists the tiles that have both model extent and loss pixels, both being necessary precursors for emissions - model_extent_tile_id_list = uu.tile_list_s3(cn.model_extent_dir, sensit_type=sensit_type) - loss_tile_id_list = uu.tile_list_s3(cn.loss_dir, sensit_type=sensit_type) - uu.print_log("Carbon pool at emissions year is combination of model_extent and loss tiles:") + model_extent_tile_id_list = uu.tile_list_s3(cn.model_extent_dir, sensit_type=cn.SENSIT_TYPE) + loss_tile_id_list = uu.tile_list_s3(cn.loss_dir, sensit_type=cn.SENSIT_TYPE) + uu.print_log('Carbon pool at emissions year is combination of model_extent and loss tiles:') tile_id_list = list(set(model_extent_tile_id_list).intersection(loss_tile_id_list)) # For runs generating carbon pools in 2000, all model extent tiles are relevant. if (tile_id_list == 'all') & (carbon_pool_extent != 'loss'): - tile_id_list = uu.tile_list_s3(cn.model_extent_dir, sensit_type=sensit_type) + tile_id_list = uu.tile_list_s3(cn.model_extent_dir, sensit_type=cn.SENSIT_TYPE) uu.print_log(tile_id_list) - uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n") + uu.print_log(f'There are {str(len(tile_id_list))} tiles to process' + "\n") + output_dir_list = [] output_pattern_list = [] @@ -80,6 +84,7 @@ def mp_create_carbon_pools(sensit_type, tile_id_list, carbon_pool_extent, run_da # Files to download for this script download_dict = { + cn.model_extent_dir: [cn.pattern_model_extent], cn.removal_forest_type_dir: [cn.pattern_removal_forest_type], cn.mangrove_biomass_2000_dir: [cn.pattern_mangrove_biomass_2000], cn.cont_eco_dir: [cn.pattern_cont_eco_processed], @@ -87,19 +92,20 @@ def mp_create_carbon_pools(sensit_type, tile_id_list, carbon_pool_extent, run_da cn.precip_processed_dir: [cn.pattern_precip], cn.elevation_processed_dir: [cn.pattern_elevation], cn.soil_C_full_extent_2000_dir: [cn.pattern_soil_C_full_extent_2000], - cn.gain_dir: [cn.pattern_gain], + cn.gain_dir: [cn.pattern_gain_data_lake], + cn.BGB_AGB_ratio_dir: [cn.pattern_BGB_AGB_ratio] } # Adds the correct AGB tiles to the download dictionary depending on the model run - if sensit_type == 'biomass_swap': + if cn.SENSIT_TYPE == 'biomass_swap': download_dict[cn.JPL_processed_dir] = [cn.pattern_JPL_unmasked_processed] else: download_dict[cn.WHRC_biomass_2000_unmasked_dir] = [cn.pattern_WHRC_biomass_2000_unmasked] # Adds the correct loss tile to the download dictionary depending on the model run - if sensit_type == 'legal_Amazon_loss': + if cn.SENSIT_TYPE == 'legal_Amazon_loss': download_dict[cn.Brazil_annual_loss_processed_dir] = [cn.pattern_Brazil_annual_loss_processed] - elif sensit_type == 'Mekong_loss': + elif cn.SENSIT_TYPE == 'Mekong_loss': download_dict[cn.Mekong_loss_processed_dir] = [cn.pattern_Mekong_loss_processed] else: download_dict[cn.loss_dir] = [cn.pattern_loss] @@ -116,6 +122,7 @@ def mp_create_carbon_pools(sensit_type, tile_id_list, carbon_pool_extent, run_da # Files to download for this script. This has the same items as the download_dict for 2000 pools plus # other tiles. download_dict = { + cn.model_extent_dir: [cn.pattern_model_extent], cn.removal_forest_type_dir: [cn.pattern_removal_forest_type], cn.mangrove_biomass_2000_dir: [cn.pattern_mangrove_biomass_2000], cn.cont_eco_dir: [cn.pattern_cont_eco_processed], @@ -123,21 +130,22 @@ def mp_create_carbon_pools(sensit_type, tile_id_list, carbon_pool_extent, run_da cn.precip_processed_dir: [cn.pattern_precip], cn.elevation_processed_dir: [cn.pattern_elevation], cn.soil_C_full_extent_2000_dir: [cn.pattern_soil_C_full_extent_2000], - cn.gain_dir: [cn.pattern_gain], + cn.gain_dir: [cn.pattern_gain_data_lake], + cn.BGB_AGB_ratio_dir: [cn.pattern_BGB_AGB_ratio], cn.annual_gain_AGC_all_types_dir: [cn.pattern_annual_gain_AGC_all_types], cn.cumul_gain_AGCO2_all_types_dir: [cn.pattern_cumul_gain_AGCO2_all_types] } # Adds the correct AGB tiles to the download dictionary depending on the model run - if sensit_type == 'biomass_swap': + if cn.SENSIT_TYPE == 'biomass_swap': download_dict[cn.JPL_processed_dir] = [cn.pattern_JPL_unmasked_processed] else: download_dict[cn.WHRC_biomass_2000_unmasked_dir] = [cn.pattern_WHRC_biomass_2000_unmasked] # Adds the correct loss tile to the download dictionary depending on the model run - if sensit_type == 'legal_Amazon_loss': + if cn.SENSIT_TYPE == 'legal_Amazon_loss': download_dict[cn.Brazil_annual_loss_processed_dir] = [cn.pattern_Brazil_annual_loss_processed] - elif sensit_type == 'Mekong_loss': + elif cn.SENSIT_TYPE == 'Mekong_loss': download_dict[cn.Mekong_loss_processed_dir] = [cn.pattern_Mekong_loss_processed] else: download_dict[cn.loss_dir] = [cn.pattern_loss] @@ -145,80 +153,72 @@ def mp_create_carbon_pools(sensit_type, tile_id_list, carbon_pool_extent, run_da # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list for key, values in download_dict.items(): - dir = key + directory = key pattern = values[0] - uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list) + uu.s3_flexible_download(directory, pattern, cn.docker_tile_dir, cn.SENSIT_TYPE, tile_id_list) # If the model run isn't the standard one, the output directory and file names are changed - if sensit_type != 'std': - uu.print_log("Changing output directory and file name pattern based on sensitivity analysis") - output_dir_list = uu.alter_dirs(sensit_type, output_dir_list) - output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list) + if cn.SENSIT_TYPE != 'std': + uu.print_log('Changing output directory and file name pattern based on sensitivity analysis') + output_dir_list = uu.alter_dirs(cn.SENSIT_TYPE, output_dir_list) + output_pattern_list = uu.alter_patterns(cn.SENSIT_TYPE, output_pattern_list) else: - uu.print_log("Output directory list for standard model:", output_dir_list) + uu.print_log(f'Output directory list for standard model: {output_dir_list}') # A date can optionally be provided by the full model script or a run of this script. # This replaces the date in constants_and_names. # Only done if output upload is enabled. - if run_date is not None and no_upload is not None: - output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date) - - # Table with IPCC Wetland Supplement Table 4.4 default mangrove removals rates - # cmd = ['aws', 's3', 'cp', os.path.join(cn.gain_spreadsheet_dir, cn.gain_spreadsheet), cn.docker_base_dir, '--no-sign-request'] - cmd = ['aws', 's3', 'cp', os.path.join(cn.gain_spreadsheet_dir, cn.gain_spreadsheet), cn.docker_base_dir] - uu.log_subprocess_output_full(cmd) - - pd.options.mode.chained_assignment = None - - # Imports the table with the ecozone-continent codes and the carbon removals rates - gain_table = pd.read_excel("{}".format(cn.gain_spreadsheet), - sheet_name="mangrove gain, for model") + if cn.RUN_DATE is not None and cn.NO_UPLOAD is not None: + output_dir_list = uu.replace_output_dir_date(output_dir_list, cn.RUN_DATE) - # Removes rows with duplicate codes (N. and S. America for the same ecozone) - gain_table_simplified = gain_table.drop_duplicates(subset='gainEcoCon', keep='first') + # Formats the mangrove removal factor table from Excel + gain_table_simplified = create_carbon_pools.prepare_gain_table() mang_BGB_AGB_ratio = create_carbon_pools.mangrove_pool_ratio_dict(gain_table_simplified, - cn.below_to_above_trop_dry_mang, - cn.below_to_above_trop_wet_mang, - cn.below_to_above_subtrop_mang) + cn.below_to_above_trop_dry_mang, + cn.below_to_above_trop_wet_mang, + cn.below_to_above_subtrop_mang) mang_deadwood_AGB_ratio = create_carbon_pools.mangrove_pool_ratio_dict(gain_table_simplified, - cn.deadwood_to_above_trop_dry_mang, - cn.deadwood_to_above_trop_wet_mang, - cn.deadwood_to_above_subtrop_mang) + cn.deadwood_to_above_trop_dry_mang, + cn.deadwood_to_above_trop_wet_mang, + cn.deadwood_to_above_subtrop_mang) mang_litter_AGB_ratio = create_carbon_pools.mangrove_pool_ratio_dict(gain_table_simplified, - cn.litter_to_above_trop_dry_mang, - cn.litter_to_above_trop_wet_mang, - cn.litter_to_above_subtrop_mang) - - uu.print_log("Creating tiles of aboveground carbon in {}".format(carbon_pool_extent)) - if cn.count == 96: - # More processors can be used for loss carbon pools than for 2000 carbon pools - if carbon_pool_extent == 'loss': - if sensit_type == 'biomass_swap': - processes = 16 # 16 processors = XXX GB peak - else: - processes = 20 # 25 processors > 750 GB peak; 16 = 560 GB peak; - # 18 = 570 GB peak; 19 = 620 GB peak; 20 = 690 GB peak (stops at 600, then increases slowly); 21 > 750 GB peak - else: # For 2000, or loss & 2000 - processes = 15 # 12 processors = 490 GB peak (stops around 455, then increases slowly); 15 = XXX GB peak + cn.litter_to_above_trop_dry_mang, + cn.litter_to_above_trop_wet_mang, + cn.litter_to_above_subtrop_mang) + + uu.print_log(f'Creating tiles of aboveground carbon in {carbon_pool_extent}') + + if cn.SINGLE_PROCESSOR: + for tile_id in tile_id_list: + create_carbon_pools.create_AGC(tile_id, carbon_pool_extent) + else: - processes = 2 - uu.print_log('AGC loss year max processors=', processes) - pool = multiprocessing.Pool(processes) - pool.map(partial(create_carbon_pools.create_AGC, - sensit_type=sensit_type, carbon_pool_extent=carbon_pool_extent, no_upload=no_upload), tile_id_list) - pool.close() - pool.join() + if cn.count == 96: + # More processors can be used for loss carbon pools than for 2000 carbon pools + if carbon_pool_extent == 'loss': + if cn.SENSIT_TYPE == 'biomass_swap': + processes = 16 # 16 processors = XXX GB peak + else: + processes = 17 # 19=around 650 but increases slowly and maxes out; 17=600 GB peak + else: # For 2000, or loss & 2000 + processes = 32 # 25=540 GB peak; 32=690 GB peak; 34=sometimes 700, sometimes 760 GB peak (too high); + # 36=760 GB peak (too high) + else: + processes = 2 + uu.print_log(f'AGC loss year max processors={processes}') + with multiprocessing.Pool(processes) as pool: + pool.map(partial(create_carbon_pools.create_AGC, carbon_pool_extent=carbon_pool_extent), + tile_id_list) + pool.close() + pool.join() - # # For single processor use - # for tile_id in tile_id_list: - # create_carbon_pools.create_AGC(tile_id, sensit_type, carbon_pool_extent, no_upload) - # If no_upload flag is not activated (by choice or by lack of AWS credentials), output is uploaded - if not no_upload: + # If cn.NO_UPLOAD flag is not activated (by choice or by lack of AWS credentials), output is uploaded + if not cn.NO_UPLOAD: if carbon_pool_extent in ['loss', '2000']: uu.upload_final_set(output_dir_list[0], output_pattern_list[0]) @@ -228,46 +228,47 @@ def mp_create_carbon_pools(sensit_type, tile_id_list, carbon_pool_extent, run_da uu.check_storage() - if not save_intermediates: + if not cn.SAVE_INTERMEDIATES: - uu.print_log(":::::Freeing up memory for belowground carbon creation; deleting unneeded tiles") - tiles_to_delete = glob.glob('*{}*tif'.format(cn.pattern_annual_gain_AGC_all_types)) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_cumul_gain_AGCO2_all_types))) - uu.print_log(" Deleting", len(tiles_to_delete), "tiles...") + uu.print_log(':::::Freeing up memory for belowground carbon creation; deleting unneeded tiles') + tiles_to_delete = glob.glob(f'*{cn.pattern_annual_gain_AGC_all_types}*tif') + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_cumul_gain_AGCO2_all_types}*tif')) + uu.print_log(f' Deleting {len(tiles_to_delete)} tiles...') for tile_to_delete in tiles_to_delete: os.remove(tile_to_delete) - uu.print_log(":::::Deleted unneeded tiles") + uu.print_log(':::::Deleted unneeded tiles') uu.check_storage() - uu.print_log("Creating tiles of belowground carbon in {}".format(carbon_pool_extent)) - # Creates a single filename pattern to pass to the multiprocessor call - if cn.count == 96: - # More processors can be used for loss carbon pools than for 2000 carbon pools - if carbon_pool_extent == 'loss': - if sensit_type == 'biomass_swap': - processes = 30 # 30 processors = XXX GB peak - else: - processes = 39 # 20 processors = 370 GB peak; 32 = 590 GB peak; 36 = 670 GB peak; 38 = 690 GB peak; 39 = XXX GB peak - else: # For 2000, or loss & 2000 - processes = 30 # 20 processors = 370 GB peak; 25 = 460 GB peak; 30 = XXX GB peak + uu.print_log(f'Creating tiles of belowground carbon in {carbon_pool_extent}') + + if cn.SINGLE_PROCESSOR: + for tile_id in tile_id_list: + create_carbon_pools.create_BGC(tile_id, mang_BGB_AGB_ratio, carbon_pool_extent) + else: - processes = 2 - uu.print_log('BGC max processors=', processes) - pool = multiprocessing.Pool(processes) - pool.map(partial(create_carbon_pools.create_BGC, mang_BGB_AGB_ratio=mang_BGB_AGB_ratio, - carbon_pool_extent=carbon_pool_extent, - sensit_type=sensit_type, no_upload=no_upload), tile_id_list) - pool.close() - pool.join() - - # # For single processor use - # for tile_id in tile_id_list: - # create_carbon_pools.create_BGC(tile_id, mang_BGB_AGB_ratio, carbon_pool_extent, sensit_type, no_upload) - - # If no_upload flag is not activated (by choice or by lack of AWS credentials), output is uploaded - if not no_upload: + if cn.count == 96: + # More processors can be used for loss carbon pools than for 2000 carbon pools + if carbon_pool_extent == 'loss': + if cn.SENSIT_TYPE == 'biomass_swap': + processes = 30 # 30 processors = XXX GB peak + else: + processes = 30 # 20 processors = 370 GB peak; 32 = 590 GB peak; 33=760 BG peak (too high) + else: # For 2000, or loss & 2000 + processes = 30 # 20 processors = 370 GB peak; 25 = 460 GB peak; 30=725 GB peak; 40 = 760 GB peak (too high) + else: + processes = 2 + uu.print_log(f'BGC max processors={processes}') + with multiprocessing.Pool(processes) as pool: + pool.map(partial(create_carbon_pools.create_BGC, mang_BGB_AGB_ratio=mang_BGB_AGB_ratio, + carbon_pool_extent=carbon_pool_extent), + tile_id_list) + pool.close() + pool.join() + + # If cn.NO_UPLOAD flag is not activated (by choice or by lack of AWS credentials), output is uploaded + if not cn.NO_UPLOAD: if carbon_pool_extent in ['loss', '2000']: uu.upload_final_set(output_dir_list[1], output_pattern_list[1]) @@ -282,55 +283,58 @@ def mp_create_carbon_pools(sensit_type, tile_id_list, carbon_pool_extent, run_da # Thus must delete AGC, BGC, and soil C 2000 for creation of deadwood and litter, then copy them back to spot machine # for total C 2000 calculation. if '2000' in carbon_pool_extent: - uu.print_log(":::::Freeing up memory for deadwood and litter carbon 2000 creation; deleting unneeded tiles") + uu.print_log(':::::Freeing up memory for deadwood and litter carbon 2000 creation; deleting unneeded tiles') tiles_to_delete = [] - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_BGC_2000))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_removal_forest_type))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_gain))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_soil_C_full_extent_2000))) + # tiles_to_delete.extend(glob.glob(f'*{cn.pattern_BGC_2000}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_removal_forest_type}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_gain_ec2}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_soil_C_full_extent_2000}*tif')) - uu.print_log(" Deleting", len(tiles_to_delete), "tiles...") + uu.print_log(f' Deleting {len(tiles_to_delete)} tiles...') for tile_to_delete in tiles_to_delete: os.remove(tile_to_delete) - uu.print_log(":::::Deleted unneeded tiles") + uu.print_log(':::::Deleted unneeded tiles') uu.check_storage() - uu.print_log("Creating tiles of deadwood and litter carbon in {}".format(carbon_pool_extent)) - if cn.count == 96: - # More processors can be used for loss carbon pools than for 2000 carbon pools - if carbon_pool_extent == 'loss': - if sensit_type == 'biomass_swap': - processes = 10 # 10 processors = XXX GB peak - else: - # 32 processors = >750 GB peak; 24 > 750 GB peak; 14 = 685 GB peak (stops around 600, then increases very very slowly); - # 15 = 700 GB peak once but also too much memory another time, so back to 14 - processes = 14 - else: # For 2000, or loss & 2000 - ### Note: deleted precip, elevation, and WHRC AGB tiles at equatorial latitudes as deadwood and litter were produced. - ### There wouldn't have been enough room for all deadwood and litter otherwise. - ### For example, when deadwood and litter generation started getting up to around 50N, I deleted - ### 00N precip, elevation, and WHRC AGB. I deleted all of those from 30N to 20S. - processes = 16 # 7 processors = 320 GB peak; 14 = 620 GB peak; 16 = XXX GB peak + uu.print_log(f'Creating tiles of deadwood and litter carbon in {carbon_pool_extent}') + + if cn.SINGLE_PROCESSOR: + # For single processor use + for tile_id in tile_id_list: + create_carbon_pools.create_deadwood_litter(tile_id, mang_deadwood_AGB_ratio, mang_litter_AGB_ratio, carbon_pool_extent) + else: - processes = 2 - uu.print_log('Deadwood and litter max processors=', processes) - pool = multiprocessing.Pool(processes) - pool.map( - partial(create_carbon_pools.create_deadwood_litter, mang_deadwood_AGB_ratio=mang_deadwood_AGB_ratio, - mang_litter_AGB_ratio=mang_litter_AGB_ratio, - carbon_pool_extent=carbon_pool_extent, - sensit_type=sensit_type, no_upload=no_upload), tile_id_list) - pool.close() - pool.join() - - # # For single processor use - # for tile_id in tile_id_list: - # create_carbon_pools.create_deadwood_litter(tile_id, mang_deadwood_AGB_ratio, mang_litter_AGB_ratio, carbon_pool_extent, sensit_type, no_upload) - - # If no_upload flag is not activated (by choice or by lack of AWS credentials), output is uploaded - if not no_upload: + if cn.count == 96: + # More processors can be used for loss carbon pools than for 2000 carbon pools + if carbon_pool_extent == 'loss': + if cn.SENSIT_TYPE == 'biomass_swap': + processes = 10 # 10 processors = XXX GB peak + else: + # 32 processors = >750 GB peak; 24 > 750 GB peak; 14 = 685 GB peak (stops around 600, then increases very very slowly); + # 15 = 700 GB peak once but also too much memory another time, so back to 13 (580 GB peak that I observed) + processes = 13 + else: # For 2000, or loss & 2000 + ### Note: deleted precip, elevation, and WHRC AGB tiles at equatorial latitudes as deadwood and litter were produced. + ### There wouldn't have been enough room for all deadwood and litter otherwise. + ### For example, when deadwood and litter generation started getting up to around 50N, I deleted + ### 00N precip, elevation, and WHRC AGB. I deleted all of those from 30N to 20S. + processes = 16 # 7 processors = 320 GB peak; 14 = 620 GB peak; 16 = 710 GB peak + else: + processes = 2 + uu.print_log(f'Deadwood and litter max processors={processes}') + with multiprocessing.Pool(processes) as pool: + pool.map(partial(create_carbon_pools.create_deadwood_litter, mang_deadwood_AGB_ratio=mang_deadwood_AGB_ratio, + mang_litter_AGB_ratio=mang_litter_AGB_ratio, + carbon_pool_extent=carbon_pool_extent), + tile_id_list) + pool.close() + pool.join() + + + # If cn.NO_UPLOAD flag is not activated (by choice or by lack of AWS credentials), output is uploaded + if not cn.NO_UPLOAD: if carbon_pool_extent in ['loss', '2000']: uu.upload_final_set(output_dir_list[2], output_pattern_list[2]) # deadwood @@ -343,26 +347,26 @@ def mp_create_carbon_pools(sensit_type, tile_id_list, carbon_pool_extent, run_da uu.check_storage() - if not save_intermediates: + if not cn.SAVE_INTERMEDIATES: - uu.print_log(":::::Freeing up memory for soil and total carbon creation; deleting unneeded tiles") + uu.print_log(':::::Freeing up memory for soil and total carbon creation; deleting unneeded tiles') tiles_to_delete = [] - tiles_to_delete .extend(glob.glob('*{}*tif'.format(cn.pattern_elevation))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_precip))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_WHRC_biomass_2000_unmasked))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_JPL_unmasked_processed))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_cont_eco_processed))) - uu.print_log(" Deleting", len(tiles_to_delete), "tiles...") + tiles_to_delete .extend(glob.glob(f'*{cn.pattern_elevation}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_precip}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_WHRC_biomass_2000_unmasked}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_JPL_unmasked_processed}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_cont_eco_processed}*tif')) + uu.print_log(f' Deleting {len(tiles_to_delete)} tiles...') for tile_to_delete in tiles_to_delete: os.remove(tile_to_delete) - uu.print_log(":::::Deleted unneeded tiles") + uu.print_log(':::::Deleted unneeded tiles') uu.check_storage() if 'loss' in carbon_pool_extent: - uu.print_log("Creating tiles of soil carbon in loss extent") + uu.print_log('Creating tiles of soil carbon in loss extent') # If pools in 2000 weren't generated, soil carbon in emissions extent is 4. # If pools in 2000 were generated, soil carbon in emissions extent is 10. @@ -371,30 +375,33 @@ def mp_create_carbon_pools(sensit_type, tile_id_list, carbon_pool_extent, run_da else: pattern = output_pattern_list[10] - if cn.count == 96: - # More processors can be used for loss carbon pools than for 2000 carbon pools - if carbon_pool_extent == 'loss': - if sensit_type == 'biomass_swap': - processes = 36 # 36 processors = XXX GB peak - else: - processes = 44 # 24 processors = 360 GB peak; 32 = 490 GB peak; 38 = 580 GB peak; 42 = 640 GB peak; 44 = XXX GB peak - else: # For 2000, or loss & 2000 - processes = 12 # 12 processors = XXX GB peak + if cn.SINGLE_PROCESSOR: + # For single processor use + for tile_id in tile_id_list: + create_carbon_pools.create_soil_emis_extent(tile_id, pattern) + else: - processes = 2 - uu.print_log('Soil carbon loss year max processors=', processes) - pool = multiprocessing.Pool(processes) - pool.map(partial(create_carbon_pools.create_soil_emis_extent, pattern=pattern, - sensit_type=sensit_type, no_upload=no_upload), tile_id_list) - pool.close() - pool.join() + if cn.count == 96: + # More processors can be used for loss carbon pools than for 2000 carbon pools + if carbon_pool_extent == 'loss': + if cn.SENSIT_TYPE == 'biomass_swap': + processes = 36 # 36 processors = XXX GB peak + else: + processes = 46 # 24 processors = 360 GB peak; 32 = 490 GB peak; 38 = 580 GB peak; 42 = 640 GB peak; 46 = XXX GB peak + else: # For 2000, or loss & 2000 + processes = 12 # 12 processors = XXX GB peak + else: + processes = 2 + uu.print_log(f'Soil carbon loss year max processors={processes}') + with multiprocessing.Pool(processes) as pool: + pool.map(partial(create_carbon_pools.create_soil_emis_extent, pattern=pattern), + tile_id_list) + pool.close() + pool.join() - # # For single processor use - # for tile_id in tile_id_list: - # create_carbon_pools.create_soil_emis_extent(tile_id, pattern, sensit_type, no_upload) - # If no_upload flag is not activated (by choice or by lack of AWS credentials), output is uploaded - if not no_upload: + # If cn.NO_UPLOAD flag is not activated (by choice or by lack of AWS credentials), output is uploaded + if not cn.NO_UPLOAD: # If pools in 2000 weren't generated, soil carbon in emissions extent is 4. # If pools in 2000 were generated, soil carbon in emissions extent is 10. @@ -406,52 +413,51 @@ def mp_create_carbon_pools(sensit_type, tile_id_list, carbon_pool_extent, run_da uu.check_storage() if '2000' in carbon_pool_extent: - uu.print_log("Skipping soil for 2000 carbon pool calculation. Soil carbon in 2000 already created.") + uu.print_log('Skipping soil for 2000 carbon pool calculation. Soil carbon in 2000 already created.') uu.check_storage() - # 825 GB isn't enough space to create deadwood and litter 2000 while having AGC and BGC 2000 on. - # Thus must delete BGC and soil C 2000 for creation of deadwood and litter, then copy them back to spot machine - # for total C 2000 calculation. if '2000' in carbon_pool_extent: # Files to download for total C 2000. Previously deleted to save space download_dict = { - cn.BGC_2000_dir: [cn.pattern_BGC_2000], cn.soil_C_full_extent_2000_dir: [cn.pattern_soil_C_full_extent_2000] } for key, values in download_dict.items(): - dir = key + directory = key pattern = values[0] - uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list) + uu.s3_flexible_download(directory, pattern, cn.docker_tile_dir, cn.SENSIT_TYPE, tile_id_list) - uu.print_log("Creating tiles of total carbon") - if cn.count == 96: - # More processors can be used for loss carbon pools than for 2000 carbon pools - if carbon_pool_extent == 'loss': - if sensit_type == 'biomass_swap': - processes = 14 # 14 processors = XXX GB peak - else: - processes = 19 # 20 processors > 750 GB peak (by just a bit, I think); 15 = 550 GB peak; 18 = 660 GB peak; 19 = XXX GB peak - else: # For 2000, or loss & 2000 - processes = 12 # 12 processors = XXX GB peak + uu.print_log('Creating tiles of total carbon') + + if cn.SINGLE_PROCESSOR: + for tile_id in tile_id_list: + create_carbon_pools.create_total_C(tile_id, carbon_pool_extent) + else: - processes = 2 - uu.print_log('Total carbon loss year max processors=', processes) - pool = multiprocessing.Pool(processes) - pool.map(partial(create_carbon_pools.create_total_C, carbon_pool_extent=carbon_pool_extent, - sensit_type=sensit_type, no_upload=no_upload), tile_id_list) - pool.close() - pool.join() + if cn.count == 96: + # More processors can be used for loss carbon pools than for 2000 carbon pools + if carbon_pool_extent == 'loss': + if cn.SENSIT_TYPE == 'biomass_swap': + processes = 14 # 14 processors = XXX GB peak + else: + processes = 18 # 20 processors > 750 GB peak (by just a bit, I think); 15 = 550 GB peak; 18 = XXX GB peak + else: # For 2000, or loss & 2000 + processes = 12 # 12 processors = XXX GB peak + else: + processes = 2 + uu.print_log(f'Total carbon loss year max processors={processes}') + with multiprocessing.Pool(processes) as pool: + pool.map(partial(create_carbon_pools.create_total_C, carbon_pool_extent=carbon_pool_extent), + tile_id_list) + pool.close() + pool.join() - # # For single processor use - # for tile_id in tile_id_list: - # create_carbon_pools.create_total_C(tile_id, carbon_pool_extent, sensit_type, no_upload) - # If no_upload flag is not activated (by choice or by lack of AWS credentials), output is uploaded - if not no_upload: + # If cn.NO_UPLOAD flag is not activated (by choice or by lack of AWS credentials), output is uploaded + if not cn.NO_UPLOAD: if carbon_pool_extent in ['loss', '2000']: uu.upload_final_set(output_dir_list[5], output_pattern_list[5]) @@ -468,37 +474,40 @@ def mp_create_carbon_pools(sensit_type, tile_id_list, carbon_pool_extent, run_da parser = argparse.ArgumentParser( description='Creates tiles of carbon pool densities in the year of loss or in 2000') parser.add_argument('--model-type', '-t', required=True, - help='{}'.format(cn.model_type_arg_help)) + help=f'{cn.model_type_arg_help}') parser.add_argument('--tile_id_list', '-l', required=True, help='List of tile ids to use in the model. Should be of form 00N_110E or 00N_110E,00N_120E or all.') - parser.add_argument('--carbon_pool_extent', '-ce', required=True, - help='Extent over which carbon emitted_pools should be calculated: loss, 2000, loss,2000, or 2000,loss') parser.add_argument('--run-date', '-d', required=False, help='Date of run. Must be format YYYYMMDD.') parser.add_argument('--no-upload', '-nu', action='store_true', help='Disables uploading of outputs to s3') + parser.add_argument('--single-processor', '-sp', action='store_true', + help='Uses single processing rather than multiprocessing') parser.add_argument('--save-intermediates', '-si', action='store_true', help='Saves intermediate model outputs rather than deleting them to save storage') + parser.add_argument('--carbon_pool_extent', '-ce', required=True, + help='Extent over which carbon emitted_pools should be calculated: loss, 2000, loss,2000, or 2000,loss') args = parser.parse_args() - sensit_type = args.model_type + + # Sets global variables to the command line arguments + cn.SENSIT_TYPE = args.model_type + cn.RUN_DATE = args.run_date + cn.NO_UPLOAD = args.no_upload + cn.SINGLE_PROCESSOR = args.single_processor + cn.SAVE_INTERMEDIATES = args.save_intermediates + cn.CARBON_POOL_EXTENT = args.carbon_pool_extent # Tells the pool creation functions to calculate carbon emitted_pools as they were at the year of loss in loss pixels only + tile_id_list = args.tile_id_list - carbon_pool_extent = args.carbon_pool_extent # Tells the pool creation functions to calculate carbon emitted_pools as they were at the year of loss in loss pixels only - run_date = args.run_date - no_upload = args.no_upload - save_intermediates = args.save_intermediates # Disables upload to s3 if no AWS credentials are found in environment if not uu.check_aws_creds(): - no_upload = True + cn.NO_UPLOAD = True # Create the output log - uu.initiate_log(tile_id_list=tile_id_list, sensit_type=sensit_type, run_date=run_date, - carbon_pool_extent=carbon_pool_extent, no_upload=no_upload, save_intermediates=save_intermediates) + uu.initiate_log(tile_id_list) # Checks whether the sensitivity analysis and tile_id_list arguments are valid - uu.check_sensit_type(sensit_type) + uu.check_sensit_type(cn.SENSIT_TYPE) tile_id_list = uu.tile_id_list_check(tile_id_list) - mp_create_carbon_pools(sensit_type=sensit_type, tile_id_list=tile_id_list, - carbon_pool_extent=carbon_pool_extent, run_date=run_date, no_upload=no_upload, - save_intermediates=save_intermediates) + mp_create_carbon_pools(tile_id_list, cn.CARBON_POOL_EXTENT) diff --git a/carbon_pools/mp_create_soil_C.py b/carbon_pools/mp_create_soil_C.py index 30773b52..e26f24e1 100644 --- a/carbon_pools/mp_create_soil_C.py +++ b/carbon_pools/mp_create_soil_C.py @@ -15,7 +15,6 @@ ''' from subprocess import Popen, PIPE, STDOUT, check_call -import create_soil_C from functools import partial import multiprocessing import datetime @@ -23,25 +22,24 @@ import argparse import os import sys -sys.path.append('../') import constants_and_names as cn import universal_util as uu +from . import create_soil_C def mp_create_soil_C(tile_id_list, no_upload=None): - os.chdir(cn.docker_base_dir) + os.chdir(cn.docker_tile_dir) sensit_type = 'std' # If a full model run is specified, the correct set of tiles for the particular script is listed if tile_id_list == 'all': # List of tiles to run in the model - tile_id_list = uu.create_combined_tile_list(cn.WHRC_biomass_2000_unmasked_dir, - cn.mangrove_biomass_2000_dir, - set3=cn.gain_dir - ) + tile_id_list = uu.create_combined_tile_list( + [cn.WHRC_biomass_2000_unmasked_dir, cn.mangrove_biomass_2000_dir, cn.gain_dir], + sensit_type=cn.SENSIT_TYPE) uu.print_log(tile_id_list) - uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n") + uu.print_log(f'There are {str(len(tile_id_list))} tiles to process', "\n") # List of output directories and output file name patterns @@ -54,13 +52,13 @@ def mp_create_soil_C(tile_id_list, no_upload=None): ### Soil carbon density uu.print_log("Downloading mangrove soil C rasters") - uu.s3_file_download(os.path.join(cn.mangrove_soil_C_dir, cn.name_mangrove_soil_C), cn.docker_base_dir, sensit_type) + uu.s3_file_download(os.path.join(cn.mangrove_soil_C_dir, cn.name_mangrove_soil_C), cn.docker_tile_dir, sensit_type) # For downloading all tiles in the input folders. input_files = [cn.mangrove_biomass_2000_dir] for input in input_files: - uu.s3_folder_download(input, cn.docker_base_dir, sensit_type) + uu.s3_folder_download(input, cn.docker_tile_dir, sensit_type) # Download raw mineral soil C density tiles. # First tries to download index.html.tmp from every folder, then goes back and downloads all the tifs in each folder @@ -71,7 +69,7 @@ def mp_create_soil_C(tile_id_list, no_upload=None): uu.log_subprocess_output_full(cmd) uu.print_log("Unzipping mangrove soil C rasters...") - cmd = ['unzip', '-j', cn.name_mangrove_soil_C, '-d', cn.docker_base_dir] + cmd = ['unzip', '-j', cn.name_mangrove_soil_C, '-d', cn.docker_tile_dir] uu.log_subprocess_output_full(cmd) # Mangrove soil receives precedence over mineral soil @@ -96,7 +94,7 @@ def mp_create_soil_C(tile_id_list, no_upload=None): # # create_soil_C.create_mangrove_soil_C(tile_id, no_Upload) - uu.print_log('Done making mangrove soil C tiles', '\n') + uu.print_log('Done making mangrove soil C tiles', "\n") uu.print_log("Making mineral soil C vrt...") check_call('gdalbuildvrt mineral_soil_C.vrt *{}*'.format(cn.pattern_mineral_soil_C_raw), shell=True) @@ -112,8 +110,8 @@ def mp_create_soil_C(tile_id_list, no_upload=None): processes = int(cn.count/2) uu.print_log("Creating mineral soil C density tiles with {} processors...".format(processes)) pool = multiprocessing.Pool(processes) - pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt, - no_upload=no_upload), tile_id_list) + pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), + tile_id_list) pool.close() pool.join() @@ -175,8 +173,8 @@ def mp_create_soil_C(tile_id_list, no_upload=None): ### Soil carbon density uncertainty # Separate directories for the 5% CI and 95% CI - dir_CI05 = '{0}{1}'.format(cn.docker_base_dir, 'CI05/') - dir_CI95 = '{0}{1}'.format(cn.docker_base_dir, 'CI95/') + dir_CI05 = '{0}{1}'.format(cn.docker_tile_dir, 'CI05/') + dir_CI95 = '{0}{1}'.format(cn.docker_tile_dir, 'CI95/') vrt_CI05 = 'mineral_soil_C_CI05.vrt' vrt_CI95 = 'mineral_soil_C_CI95.vrt' soil_C_stdev_global = 'soil_C_stdev.tif' @@ -236,8 +234,8 @@ def mp_create_soil_C(tile_id_list, no_upload=None): processes = 2 uu.print_log("Creating mineral soil C stock stdev tiles with {} processors...".format(processes)) pool = multiprocessing.Pool(processes) - pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt, - no_upload=no_upload), tile_id_list) + pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), + tile_id_list) pool.close() pool.join() @@ -291,14 +289,14 @@ def mp_create_soil_C(tile_id_list, no_upload=None): args = parser.parse_args() tile_id_list = args.tile_id_list run_date = args.run_date - no_upload = args.no_upload + no_upload = args.NO_UPLOAD # Disables upload to s3 if no AWS credentials are found in environment if not uu.check_aws_creds(): no_upload = True # Create the output log - uu.initiate_log(tile_id_list, run_date=run_date) + uu.initiate_log(tile_id_list) tile_id_list = uu.tile_id_list_check(tile_id_list) mp_create_soil_C(tile_id_list=tile_id_list, no_upload=no_upload) \ No newline at end of file diff --git a/constants_and_names.py b/constants_and_names.py index 8b1fda7d..984c40e2 100644 --- a/constants_and_names.py +++ b/constants_and_names.py @@ -8,15 +8,44 @@ ######## ######## # Model version -version = '1.2.2' +version = '1.2.3' version_filename = version.replace('.', '_') +# Global variables that can be modified by the command line +global NO_UPLOAD +NO_UPLOAD = False +global SENSIT_TYPE +SENSIT_TYPE = 'std' +global RUN_DATE +RUN_DATE = None +global STAGE_INPUT +STAGE_INPUT = '' +global RUN_THROUGH +RUN_THROUGH = True +global CARBON_POOL_EXTENT +CARBON_POOL_EXTENT = '' +global EMITTED_POOLS +EMITTED_POOLS = '' +global STD_NET_FLUX +STD_NET_FLUX = '' +global INCLUDE_MANGROVES +INCLUDE_MANGROVES = False +global INCLUDE_US +INCLUDE_US = False +global SAVE_INTERMEDIATES +SAVE_INTERMEDIATES = True +global SINGLE_PROCESSOR +SINGLE_PROCESSOR = False +global LOG_NOTE +LOG_NOTE = '' + + # Number of years of tree cover loss. If input loss raster is changed, this must be changed, too. -loss_years = 21 +loss_years = 22 # Number of years in tree cover gain. If input cover gain raster is changed, this must be changed, too. -gain_years = 12 +gain_years = 20 # Biomass to carbon ratio for aboveground, belowground, and deadwood in non-mangrove forests (planted and non-planted) biomass_to_c_non_mangrove = 0.47 @@ -39,7 +68,8 @@ tonnes_to_megatonnes = 1000000 # Belowground to aboveground biomass ratios. Mangrove values are from Table 4.5 of IPCC wetland supplement. -# Non-mangrove value is the average slope of the AGB:BGB relationship in Figure 3 of Mokany et al. 2006. +# Non-mangrove ratio below is the average slope of the AGB:BGB relationship in Figure 3 of Mokany et al. 2006. +# and is only used where Huang et al. 2021 can't reach (remote Pacific islands). below_to_above_non_mang = 0.26 below_to_above_trop_wet_mang = 0.49 below_to_above_trop_dry_mang = 0.29 @@ -64,6 +94,11 @@ tile_width = 10 / Hansen_res tile_height = 10 / Hansen_res +# Resolution of aggregated output rasters in decimal degrees +agg_pixel_res = 0.04 + +agg_pixel_res_filename = str(agg_pixel_res).replace('.', '_') + # Pixel window sizes for rewindowed input rasters agg_pixel_window = int(tile_width * 0.004) @@ -82,20 +117,20 @@ s3_base_dir = 's3://gfw2-data/climate/carbon_model/' # Directory for all tiles in the Docker container -docker_base_dir = '/usr/local/tiles/' +docker_tile_dir = '/usr/local/tiles/' docker_tmp = '/usr/local/tmp' docker_app = '/usr/local/app' -c_emis_compile_dst = '{0}/emissions/cpp_util'.format(docker_app) +c_emis_compile_dst = f'{docker_app}/emissions/cpp_util' # Model log start = datetime.datetime.now() date = datetime.datetime.now() date_formatted = date.strftime("%Y_%m_%d__%H_%M_%S") -model_log_dir = os.path.join(s3_base_dir, 'model_logs/v{}/'.format(version)) -model_log = "flux_model_log_{}.txt".format(date_formatted) +model_log_dir = os.path.join(s3_base_dir, f'model_logs/v{version}/') +model_log = f'flux_model_log_{date_formatted}.txt' # Blank created tile list txt @@ -112,7 +147,7 @@ ### Model extent ###### pattern_model_extent = 'model_extent' -model_extent_dir = os.path.join(s3_base_dir, 'model_extent/standard/20220309/') +model_extent_dir = os.path.join(s3_base_dir, 'model_extent/standard/20230315/') ###### ### Biomass tiles @@ -135,8 +170,21 @@ # Processed mangrove aboveground biomass in the year 2000 pattern_mangrove_biomass_2000 = 'mangrove_agb_t_ha_2000' mangrove_biomass_2000_dir = os.path.join(s3_base_dir, 'mangrove_biomass/processed/standard/20190220/') -pattern_mangrove_biomass_2000_rewindow = 'mangrove_agb_t_ha_2000_rewindow' -mangrove_biomass_2000_rewindow_dir = os.path.join(s3_base_dir, 'rewindow/mangrove_biomass/20210621/') + +# Belowground biomass:aboveground biomass ratio tiles +name_raw_AGB_Huang_global = 'pergridarea_agb.nc' +name_raw_BGB_Huang_global = 'pergridarea_bgb.nc' +AGB_BGB_Huang_raw_dir = os.path.join(s3_base_dir, 'BGB_AGB_ratio/raw_AGB_BGB_Huang_et_al_2021/') + +name_rasterized_AGB_Huang_global = 'AGB_global_from_Huang_2021_Mg_ha__20230201.tif' +name_rasterized_BGB_Huang_global = 'BGB_global_from_Huang_2021_Mg_ha__20230201.tif' +name_rasterized_BGB_AGB_Huang_global = 'BGB_AGB_ratio_global_from_Huang_2021__20230201.tif' +name_rasterized_BGB_AGB_Huang_global_extended = 'BGB_AGB_ratio_global_from_Huang_2021__20230201_extended_1400.tif' +AGB_BGB_Huang_rasterized_dir = os.path.join(s3_base_dir, 'BGB_AGB_ratio/rasterized_AGB_BGB_and_ratio_Huang_et_al_2021/') + +pattern_BGB_AGB_ratio = 'BGB_AGB_ratio' +BGB_AGB_ratio_dir = os.path.join(s3_base_dir, 'BGB_AGB_ratio/processed/20230216/') + ###### @@ -146,29 +194,24 @@ # The area of each pixel in m^2 pattern_pixel_area = 'hanson_2013_area' pixel_area_dir = 's3://gfw2-data/analyses/area_28m/' -pattern_pixel_area_rewindow = 'hanson_2013_area_rewindow' -pixel_area_rewindow_dir = os.path.join(s3_base_dir, 'rewindow/pixel_area/20210621/') - # Spreadsheet with annual removals rates -gain_spreadsheet = 'gain_rate_continent_ecozone_age_20200820.xlsx' +gain_spreadsheet = 'gain_rate_continent_ecozone_age_20220914.xlsx' gain_spreadsheet_dir = os.path.join(s3_base_dir, 'removal_rate_tables/') -# Annual Hansen loss tiles (2001-2021) -pattern_loss = 'GFW2021' -loss_dir = 's3://gfw2-data/forest_change/hansen_2021/' +# Annual Hansen loss tiles (2001-2022) +pattern_loss = 'GFW2022' +loss_dir = 's3://gfw2-data/forest_change/hansen_2022/' -# Hansen removals tiles (2001-2012) -pattern_gain = 'Hansen_GFC2015_gain' -gain_dir = 's3://gfw2-data/forest_change/tree_cover_gain/gaindata_2012/' -pattern_gain_rewindow = 'Hansen_GFC2015_gain_rewindow' -gain_rewindow_dir = os.path.join(s3_base_dir, 'rewindow/tree_cover_gain_2001_2012/20210621/') +# Hansen removals tiles based on canopy height (2000-2020) +# From https://www.frontiersin.org/articles/10.3389/frsen.2022.856903/full +pattern_gain_data_lake = '' +pattern_gain_ec2 = 'tree_cover_gain_2000_2020' +gain_dir = 's3://gfw-data-lake/umd_tree_cover_gain_from_height/v202206/raster/epsg-4326/10/40000/gain/geotiff/' # Tree cover density 2000 tiles pattern_tcd = 'Hansen_GFC2014_treecover2000' tcd_dir = 's3://gfw2-data/forest_cover/2000_treecover/' -pattern_tcd_rewindow = 'Hansen_GFC2014_treecover2000_rewindow' -tcd_rewindow_dir = os.path.join(s3_base_dir, 'rewindow/2000_treecover_density/20210621/') # Intact forest landscape 2000 tiles pattern_ifl = 'res_ifl_2000' @@ -198,15 +241,39 @@ # Peat mask inputs peat_unprocessed_dir = os.path.join(s3_base_dir, 'other_emissions_inputs/peatlands/raw/') -cifor_peat_file = 'cifor_peat_mask.tif' -jukka_peat_zip = 'Jukka_peatland.zip' -jukka_peat_shp = 'peatland_drainage_proj.shp' -soilgrids250_peat_url = 'https://files.isric.org/soilgrids/latest/data/wrb/MostProbable/' #Value 14 is histosol according to https://files.isric.org/soilgrids/latest/data/wrb/MostProbable.qml -pattern_soilgrids_most_likely_class = 'geotiff' -# Peat mask +# Gumbricht et al. 2017 (CIFOR) used for 40N to 60S +# https://data.cifor.org/dataset.xhtml?persistentId=doi:10.17528/CIFOR/DATA.00058 +# https://data.cifor.org/file.xhtml?fileId=1727&version=7.0 +Gumbricht_peat_name = 'Gumbricht_2017_CIFOR__TROP_SUBTROP_PeatV21_2016.tif' + +# Creeze et al. 2022 for the Congo basin +# https://congopeat.net/maps/ +# Probability layers of the 5 landcover types (GIS files) as published: https://drive.google.com/file/d/1zsUyFeO9TqRs5oxys3Ld4Ikgk8OYgHgc/ +# Peat is codes 4 and 5 +Crezee_name = 'Crezee_et_al_2022__Congo_Basin__Unsmoothed_Classification_Most_likely_class__compressed_20230315.tif' +Crezee_peat_name = 'Crezee_et_al_2022__Congo_Basin__Unsmoothed_Classification_Most_likely_class__compressed_20230315__peat_only.tif' + +# Hastie et al. 2022 for Peru peat +# https://www.nature.com/articles/s41561-022-00923-4 +Hastie_name = 'Hastie_et_al_2022__Peru__Peatland_Extent_LPA_50m__compressed_20230315.tif' + +# Miettinen et al. 2016 for Indonesia and Malaysia +# https://www.sciencedirect.com/science/article/pii/S2351989415300470 +Miettinen_peat_zip = 'Miettinen_2016__IDN_MYS_peat__aka_peatland_drainage_proj.zip' +Miettinen_peat_shp = 'Miettinen_2016__IDN_MYS_peat__aka_peatland_drainage_proj.shp' +Miettinen_peat_tif = 'Miettinen_2016__IDN_MYS_peat__aka_peatland_drainage_proj.tif' + +# Xu et al. 2018 for >40N (and <60S, though there's no land down there) +# Xu et al. 2018 for >40N (and <60S, though there's no land down there) +# https://www.sciencedirect.com/science/article/abs/pii/S0341816217303004#ec0005 +Xu_peat_zip = 'Xu_et_al_north_of_40N_reproj__20230302.zip' +Xu_peat_shp = 'Xu_et_al_north_of_40N_reproj__20230302.shp' +Xu_peat_tif = 'Xu_et_al_north_of_40N_reproj__20230302.tif' + +# Combined peat mask tiles pattern_peat_mask = 'peat_mask_processed' -peat_mask_dir = os.path.join(s3_base_dir, 'other_emissions_inputs/peatlands/processed/20200807/') +peat_mask_dir = os.path.join(s3_base_dir, 'other_emissions_inputs/peatlands/processed/20230315/') # Climate zone climate_zone_raw_dir = os.path.join(s3_base_dir, 'other_emissions_inputs/climate_zone/raw/') @@ -222,17 +289,15 @@ # Drivers of tree cover loss drivers_raw_dir = os.path.join(s3_base_dir, 'other_emissions_inputs/tree_cover_loss_drivers/raw/') -pattern_drivers_raw = 'Final_Classification_2021__reproj_nearest_0-005_0-005_deg__20220316.tif' +pattern_drivers_raw = 'TCL_DD_2022_20230407_wgs84_setnodata.tif' pattern_drivers = 'tree_cover_loss_driver_processed' -drivers_processed_dir = os.path.join(s3_base_dir, 'other_emissions_inputs/tree_cover_loss_drivers/processed/drivers_2021/20220316/') +drivers_processed_dir = os.path.join(s3_base_dir, 'other_emissions_inputs/tree_cover_loss_drivers/processed/drivers_2022/20230407') + +# Tree cover loss from fires +TCLF_raw_dir = 's3://gfw-data-lake/umd_tree_cover_loss_from_fires/v20230315/raw/' +TCLF_processed_dir = os.path.join(s3_base_dir, 'other_emissions_inputs/tree_cover_loss_fires/20230315/processed/') +pattern_TCLF_processed = 'tree_cover_loss_fire_processed' -# Burn year -burn_area_raw_ftp = 'sftp://fuoco.geog.umd.edu/data/MODIS/C6/MCD64A1/HDF/' # per https://modis-fire.umd.edu/files/MODIS_C6_BA_User_Guide_1.3.pdf -burn_year_hdf_raw_dir = os.path.join(s3_base_dir, 'other_emissions_inputs/burn_year/raw_hdf/') -burn_year_stacked_hv_tif_dir = os.path.join(s3_base_dir, 'other_emissions_inputs/burn_year/stacked_hv_tifs/') -burn_year_warped_to_Hansen_dir = os.path.join(s3_base_dir, 'other_emissions_inputs/burn_year/burn_year_10x10_clip/') -pattern_burn_year = "burnyear_with_Hansen_loss" -burn_year_dir = os.path.join(s3_base_dir, 'other_emissions_inputs/burn_year/burn_year_with_Hansen_loss/20220308/') ###### ### Plantation processing @@ -274,7 +339,7 @@ # Age categories over entire model extent, as a precursor to assigning IPCC default removal rates pattern_age_cat_IPCC = 'forest_age_category_IPCC__1_young_2_mid_3_old' -age_cat_IPCC_dir = os.path.join(s3_base_dir, 'forest_age_category_IPCC/standard/20220309/') +age_cat_IPCC_dir = os.path.join(s3_base_dir, 'forest_age_category_IPCC/standard/20230315/') ### US-specific removal precursors @@ -333,31 +398,31 @@ # Annual aboveground biomass removals rate using IPCC default removal rates pattern_annual_gain_AGB_IPCC_defaults = 'annual_removal_factor_AGB_Mg_ha_IPCC_defaults_all_ages' -annual_gain_AGB_IPCC_defaults_dir = os.path.join(s3_base_dir, 'annual_removal_factor_AGB_IPCC_defaults_all_ages/standard/20220309/') +annual_gain_AGB_IPCC_defaults_dir = os.path.join(s3_base_dir, 'annual_removal_factor_AGB_IPCC_defaults_all_ages/standard/20230315/') # Annual aboveground biomass removals rate using IPCC default removal rates pattern_annual_gain_BGB_IPCC_defaults = 'annual_removal_factor_BGB_Mg_ha_IPCC_defaults_all_ages' -annual_gain_BGB_IPCC_defaults_dir = os.path.join(s3_base_dir, 'annual_removal_factor_BGB_IPCC_defaults_all_ages/standard/20220309/') +annual_gain_BGB_IPCC_defaults_dir = os.path.join(s3_base_dir, 'annual_removal_factor_BGB_IPCC_defaults_all_ages/standard/20230315/') ### Annual composite removal factor # Annual aboveground removals rate for all forest types pattern_annual_gain_AGC_all_types = 'annual_removal_factor_AGC_Mg_ha_all_forest_types' -annual_gain_AGC_all_types_dir = os.path.join(s3_base_dir, 'annual_removal_factor_AGC_all_forest_types/standard/20220309/') +annual_gain_AGC_all_types_dir = os.path.join(s3_base_dir, 'annual_removal_factor_AGC_all_forest_types/standard/20230315/') # Annual belowground removals rate for all forest types pattern_annual_gain_BGC_all_types = 'annual_removal_factor_BGC_Mg_ha_all_forest_types' -annual_gain_BGC_all_types_dir = os.path.join(s3_base_dir, 'annual_removal_factor_BGC_all_forest_types/standard/20220309/') +annual_gain_BGC_all_types_dir = os.path.join(s3_base_dir, 'annual_removal_factor_BGC_all_forest_types/standard/20230315/') # Annual aboveground+belowground removals rate for all forest types pattern_annual_gain_AGC_BGC_all_types = 'annual_removal_factor_AGC_BGC_Mg_ha_all_forest_types' -annual_gain_AGC_BGC_all_types_dir = os.path.join(s3_base_dir, 'annual_removal_factor_AGC_BGC_all_forest_types/standard/20220309/') +annual_gain_AGC_BGC_all_types_dir = os.path.join(s3_base_dir, 'annual_removal_factor_AGC_BGC_all_forest_types/standard/20230315/') ### Removal forest types (sources) # Forest type used in removals model pattern_removal_forest_type = 'removal_forest_type' -removal_forest_type_dir = os.path.join(s3_base_dir, 'removal_forest_type/standard/20220309/') +removal_forest_type_dir = os.path.join(s3_base_dir, 'removal_forest_type/standard/20230315/') # Removal model forest type codes mangrove_rank = 6 @@ -372,26 +437,26 @@ # Number of removals years for all forest types pattern_gain_year_count = 'gain_year_count_all_forest_types' -gain_year_count_dir = os.path.join(s3_base_dir, 'gain_year_count_all_forest_types/standard/20220309/') +gain_year_count_dir = os.path.join(s3_base_dir, 'gain_year_count_all_forest_types/standard/20230315/') ### Cumulative gross carbon dioxide removals # Gross aboveground removals for all forest types -pattern_cumul_gain_AGCO2_all_types = 'gross_removals_AGCO2_Mg_ha_all_forest_types_2001_{}'.format(loss_years) -cumul_gain_AGCO2_all_types_dir = os.path.join(s3_base_dir, 'gross_removals_AGCO2_all_forest_types/standard/per_hectare/20220309/') +pattern_cumul_gain_AGCO2_all_types = f'gross_removals_AGCO2_Mg_ha_all_forest_types_2001_{loss_years}' +cumul_gain_AGCO2_all_types_dir = os.path.join(s3_base_dir, 'gross_removals_AGCO2_all_forest_types/standard/per_hectare/20230315/') # Gross belowground removals for all forest types -pattern_cumul_gain_BGCO2_all_types = 'gross_removals_BGCO2_Mg_ha_all_forest_types_2001_{}'.format(loss_years) -cumul_gain_BGCO2_all_types_dir = os.path.join(s3_base_dir, 'gross_removals_BGCO2_all_forest_types/standard/per_hectare/20220309/') +pattern_cumul_gain_BGCO2_all_types = f'gross_removals_BGCO2_Mg_ha_all_forest_types_2001_{loss_years}' +cumul_gain_BGCO2_all_types_dir = os.path.join(s3_base_dir, 'gross_removals_BGCO2_all_forest_types/standard/per_hectare/20230315/') # Gross aboveground and belowground removals for all forest types in all pixels -pattern_cumul_gain_AGCO2_BGCO2_all_types = 'gross_removals_AGCO2_BGCO2_Mg_ha_all_forest_types_2001_{}'.format(loss_years) -cumul_gain_AGCO2_BGCO2_all_types_dir = os.path.join(s3_base_dir, 'gross_removals_AGCO2_BGCO2_all_forest_types/standard/full_extent/per_hectare/20220309/') +pattern_cumul_gain_AGCO2_BGCO2_all_types = f'gross_removals_AGCO2_BGCO2_Mg_ha_all_forest_types_2001_{loss_years}' +cumul_gain_AGCO2_BGCO2_all_types_dir = os.path.join(s3_base_dir, 'gross_removals_AGCO2_BGCO2_all_forest_types/standard/full_extent/per_hectare/20230315/') # Gross aboveground and belowground removals for all forest types in pixels within forest extent -pattern_cumul_gain_AGCO2_BGCO2_all_types_forest_extent = 'gross_removals_AGCO2_BGCO2_Mg_ha_all_forest_types_forest_extent_2001_{}'.format(loss_years) -cumul_gain_AGCO2_BGCO2_all_types_forest_extent_dir = os.path.join(s3_base_dir, 'gross_removals_AGCO2_BGCO2_all_forest_types/standard/forest_extent/per_hectare/20220309/') +pattern_cumul_gain_AGCO2_BGCO2_all_types_forest_extent = f'gross_removals_AGCO2_BGCO2_Mg_ha_all_forest_types_forest_extent_2001_{loss_years}' +cumul_gain_AGCO2_BGCO2_all_types_forest_extent_dir = os.path.join(s3_base_dir, 'gross_removals_AGCO2_BGCO2_all_forest_types/standard/forest_extent/per_hectare/20230407/') ###### @@ -403,7 +468,7 @@ # FAO ecozones as boreal/temperate/tropical pattern_fao_ecozone_raw = 'fao_ecozones_bor_tem_tro_20180619.zip' -fao_ecozone_raw_dir = os.path.join(s3_base_dir, 'inputs_for_carbon_pools/raw/{}'.format(pattern_fao_ecozone_raw)) +fao_ecozone_raw_dir = os.path.join(s3_base_dir, f'inputs_for_carbon_pools/raw/{pattern_fao_ecozone_raw}') pattern_bor_tem_trop_intermediate = 'fao_ecozones_bor_tem_tro_intermediate' pattern_bor_tem_trop_processed = 'fao_ecozones_bor_tem_tro_processed' bor_tem_trop_processed_dir = os.path.join(s3_base_dir, 'inputs_for_carbon_pools/processed/fao_ecozones_bor_tem_tro/20190418/') @@ -427,51 +492,51 @@ ## Carbon emitted_pools in loss year # Date to include in the output directory for all emissions year carbon emitted_pools -emis_pool_run_date = '20220309' +emis_pool_run_date = '20230315' # Aboveground carbon in the year of emission for all forest types in loss pixels pattern_AGC_emis_year = "Mg_AGC_ha_emis_year" -AGC_emis_year_dir = os.path.join(base_carbon_pool_dir, 'aboveground_carbon/loss_pixels/standard/{}/'.format(emis_pool_run_date)) +AGC_emis_year_dir = os.path.join(base_carbon_pool_dir, f'aboveground_carbon/loss_pixels/standard/{emis_pool_run_date}/') # Belowground carbon in loss pixels pattern_BGC_emis_year = 'Mg_BGC_ha_emis_year' -BGC_emis_year_dir = os.path.join(base_carbon_pool_dir, 'belowground_carbon/loss_pixels/standard/{}/'.format(emis_pool_run_date)) +BGC_emis_year_dir = os.path.join(base_carbon_pool_dir, f'belowground_carbon/loss_pixels/standard/{emis_pool_run_date}/') # Deadwood in loss pixels pattern_deadwood_emis_year_2000 = 'Mg_deadwood_C_ha_emis_year_2000' -deadwood_emis_year_2000_dir = os.path.join(base_carbon_pool_dir, 'deadwood_carbon/loss_pixels/standard/{}/'.format(emis_pool_run_date)) +deadwood_emis_year_2000_dir = os.path.join(base_carbon_pool_dir, f'deadwood_carbon/loss_pixels/standard/{emis_pool_run_date}/') # Litter in loss pixels pattern_litter_emis_year_2000 = 'Mg_litter_C_ha_emis_year_2000' -litter_emis_year_2000_dir = os.path.join(base_carbon_pool_dir, 'litter_carbon/loss_pixels/standard/{}/'.format(emis_pool_run_date)) +litter_emis_year_2000_dir = os.path.join(base_carbon_pool_dir, f'litter_carbon/loss_pixels/standard/{emis_pool_run_date}/') # Soil C in loss pixels pattern_soil_C_emis_year_2000 = 'Mg_soil_C_ha_emis_year_2000' -soil_C_emis_year_2000_dir = os.path.join(base_carbon_pool_dir, 'soil_carbon/loss_pixels/standard/{}/'.format(emis_pool_run_date)) +soil_C_emis_year_2000_dir = os.path.join(base_carbon_pool_dir, f'soil_carbon/loss_pixels/standard/{emis_pool_run_date}/') # All carbon emitted_pools combined in loss pixels, with emitted values pattern_total_C_emis_year = 'Mg_total_C_ha_emis_year' -total_C_emis_year_dir = os.path.join(base_carbon_pool_dir, 'total_carbon/loss_pixels/standard/{}/'.format(emis_pool_run_date)) +total_C_emis_year_dir = os.path.join(base_carbon_pool_dir, f'total_carbon/loss_pixels/standard/{emis_pool_run_date}/') ## Carbon emitted_pools in 2000 -pool_2000_run_date = '20200826' +pool_2000_run_date = '20230222' # Aboveground carbon for the full biomass 2000 (mangrove and non-mangrove) extent based on 2000 stocks pattern_AGC_2000 = "Mg_AGC_ha_2000" -AGC_2000_dir = os.path.join(base_carbon_pool_dir, 'aboveground_carbon/extent_2000/standard/{}/'.format(emis_pool_run_date)) +AGC_2000_dir = os.path.join(base_carbon_pool_dir, f'aboveground_carbon/extent_2000/standard/{pool_2000_run_date}/') # Belowground carbon for the full biomass 2000 (mangrove and non-mangrove) extent based on 2000 stocks pattern_BGC_2000 = "Mg_BGC_ha_2000" -BGC_2000_dir = os.path.join(base_carbon_pool_dir, 'belowground_carbon/extent_2000/standard/{}/'.format(emis_pool_run_date)) +BGC_2000_dir = os.path.join(base_carbon_pool_dir, f'belowground_carbon/extent_2000/standard/{pool_2000_run_date}/') # Deadwood carbon for the full biomass 2000 (mangrove and non-mangrove) extent based on 2000 stocks pattern_deadwood_2000 = "Mg_deadwood_C_ha_2000" -deadwood_2000_dir = os.path.join(base_carbon_pool_dir, 'deadwood_carbon/extent_2000/standard/{}/'.format(emis_pool_run_date)) +deadwood_2000_dir = os.path.join(base_carbon_pool_dir, f'deadwood_carbon/extent_2000/standard/{pool_2000_run_date}/') # Litter carbon for the full biomass 2000 (mangrove and non-mangrove) extent based on 2000 stocks pattern_litter_2000 = "Mg_litter_C_ha_2000" -litter_2000_dir = os.path.join(base_carbon_pool_dir, 'litter_carbon/extent_2000/standard/{}/'.format(emis_pool_run_date)) +litter_2000_dir = os.path.join(base_carbon_pool_dir, f'litter_carbon/extent_2000/standard/{pool_2000_run_date}/') # Raw mangrove soil C mangrove_soil_C_dir = os.path.join(s3_base_dir, 'carbon_pools/soil_carbon/raw/') @@ -484,7 +549,7 @@ # Soil C full extent but just from SoilGrids250 (mangrove soil C layer not added in) # Not used in model. pattern_soil_C_full_extent_2000_non_mang = 'soil_C_ha_full_extent_2000_non_mangrove_Mg_ha' -soil_C_full_extent_2000_non_mang_dir = os.path.join(base_carbon_pool_dir, 'soil_carbon/intermediate_full_extent/no_mangrove/20220414/') +soil_C_full_extent_2000_non_mang_dir = os.path.join(base_carbon_pool_dir, 'soil_carbon/intermediate_full_extent/no_mangrove/20210414/') # Soil C full extent (all soil pixels, with mangrove soil C in Giri mangrove extent getting priority over mineral soil C) # Non-mangrove C is 0-30 cm, mangrove C is 0-100 cm @@ -493,7 +558,7 @@ # Total carbon (all carbon emitted_pools combined) for the full biomass 2000 (mangrove and non-mangrove) extent based on 2000 stocks pattern_total_C_2000 = "Mg_total_C_ha_2000" -total_C_2000_dir = os.path.join(base_carbon_pool_dir, 'total_carbon/extent_2000/standard/{}/'.format(emis_pool_run_date)) +total_C_2000_dir = os.path.join(base_carbon_pool_dir, f'total_carbon/extent_2000/standard/{pool_2000_run_date}/') ###### @@ -503,126 +568,126 @@ ### Emissions from biomass and soil (all carbon emitted_pools) # Date to include in the output directory -emis_run_date_biomass_soil = '20220316' +emis_run_date_biomass_soil = '20230407' -# pattern_gross_emis_commod_biomass_soil = 'gross_emis_commodity_Mg_CO2e_ha_biomass_soil_2001_{}'.format(loss_years) -pattern_gross_emis_commod_biomass_soil = 'gross_emis_commodity_Mg_CO2e_ha_biomass_soil_2001_{}'.format(loss_years) -gross_emis_commod_biomass_soil_dir = '{0}gross_emissions/commodities/biomass_soil/standard/{1}/'.format(s3_base_dir, emis_run_date_biomass_soil) +# pattern_gross_emis_commod_biomass_soil = f'gross_emis_commodity_Mg_CO2e_ha_biomass_soil_2001_{loss_years}' +pattern_gross_emis_commod_biomass_soil = f'gross_emis_commodity_Mg_CO2e_ha_biomass_soil_2001_{loss_years}' +gross_emis_commod_biomass_soil_dir = f'{s3_base_dir}gross_emissions/commodities/biomass_soil/standard/{emis_run_date_biomass_soil}/' -pattern_gross_emis_forestry_biomass_soil = 'gross_emis_forestry_Mg_CO2e_ha_biomass_soil_2001_{}'.format(loss_years) -gross_emis_forestry_biomass_soil_dir = '{0}gross_emissions/forestry/biomass_soil/standard/{1}/'.format(s3_base_dir, emis_run_date_biomass_soil) +pattern_gross_emis_forestry_biomass_soil = f'gross_emis_forestry_Mg_CO2e_ha_biomass_soil_2001_{loss_years}' +gross_emis_forestry_biomass_soil_dir = f'{s3_base_dir}gross_emissions/forestry/biomass_soil/standard/{emis_run_date_biomass_soil}/' -pattern_gross_emis_shifting_ag_biomass_soil = 'gross_emis_shifting_ag_Mg_CO2e_ha_biomass_soil_2001_{}'.format(loss_years) -gross_emis_shifting_ag_biomass_soil_dir = '{0}gross_emissions/shifting_ag/biomass_soil/standard/{1}/'.format(s3_base_dir, emis_run_date_biomass_soil) +pattern_gross_emis_shifting_ag_biomass_soil = f'gross_emis_shifting_ag_Mg_CO2e_ha_biomass_soil_2001_{loss_years}' +gross_emis_shifting_ag_biomass_soil_dir = f'{s3_base_dir}gross_emissions/shifting_ag/biomass_soil/standard/{emis_run_date_biomass_soil}/' -pattern_gross_emis_urban_biomass_soil = 'gross_emis_urbanization_Mg_CO2e_ha_biomass_soil_2001_{}'.format(loss_years) -gross_emis_urban_biomass_soil_dir = '{0}gross_emissions/urbanization/biomass_soil/standard/{1}/'.format(s3_base_dir, emis_run_date_biomass_soil) +pattern_gross_emis_urban_biomass_soil = f'gross_emis_urbanization_Mg_CO2e_ha_biomass_soil_2001_{loss_years}' +gross_emis_urban_biomass_soil_dir = f'{s3_base_dir}gross_emissions/urbanization/biomass_soil/standard/{emis_run_date_biomass_soil}/' -pattern_gross_emis_wildfire_biomass_soil = 'gross_emis_wildfire_Mg_CO2e_ha_biomass_soil_2001_{}'.format(loss_years) -gross_emis_wildfire_biomass_soil_dir = '{0}gross_emissions/wildfire/biomass_soil/standard/{1}/'.format(s3_base_dir, emis_run_date_biomass_soil) +pattern_gross_emis_wildfire_biomass_soil = f'gross_emis_wildfire_Mg_CO2e_ha_biomass_soil_2001_{loss_years}' +gross_emis_wildfire_biomass_soil_dir = f'{s3_base_dir}gross_emissions/wildfire/biomass_soil/standard/{emis_run_date_biomass_soil}/' -pattern_gross_emis_no_driver_biomass_soil = 'gross_emis_no_driver_Mg_CO2e_ha_biomass_soil_2001_{}'.format(loss_years) -gross_emis_no_driver_biomass_soil_dir = '{0}gross_emissions/no_driver/biomass_soil/standard/{1}/'.format(s3_base_dir, emis_run_date_biomass_soil) +pattern_gross_emis_no_driver_biomass_soil = f'gross_emis_no_driver_Mg_CO2e_ha_biomass_soil_2001_{loss_years}' +gross_emis_no_driver_biomass_soil_dir = f'{s3_base_dir}gross_emissions/no_driver/biomass_soil/standard/{emis_run_date_biomass_soil}/' -pattern_gross_emis_co2_only_all_drivers_biomass_soil = 'gross_emis_CO2_only_all_drivers_Mg_CO2e_ha_biomass_soil_2001_{}'.format(loss_years) -gross_emis_co2_only_all_drivers_biomass_soil_dir = '{0}gross_emissions/all_drivers/CO2_only/biomass_soil/standard/{1}/'.format(s3_base_dir, emis_run_date_biomass_soil) +pattern_gross_emis_co2_only_all_drivers_biomass_soil = f'gross_emis_CO2_only_all_drivers_Mg_CO2e_ha_biomass_soil_2001_{loss_years}' +gross_emis_co2_only_all_drivers_biomass_soil_dir = f'{s3_base_dir}gross_emissions/all_drivers/CO2_only/biomass_soil/standard/{emis_run_date_biomass_soil}/' -pattern_gross_emis_non_co2_all_drivers_biomass_soil = 'gross_emis_non_CO2_all_drivers_Mg_CO2e_ha_biomass_soil_2001_{}'.format(loss_years) -gross_emis_non_co2_all_drivers_biomass_soil_dir = '{0}gross_emissions/all_drivers/non_CO2/biomass_soil/standard/{1}/'.format(s3_base_dir, emis_run_date_biomass_soil) +pattern_gross_emis_non_co2_all_drivers_biomass_soil = f'gross_emis_non_CO2_all_drivers_Mg_CO2e_ha_biomass_soil_2001_{loss_years}' +gross_emis_non_co2_all_drivers_biomass_soil_dir = f'{s3_base_dir}gross_emissions/all_drivers/non_CO2/biomass_soil/standard/{emis_run_date_biomass_soil}/' -pattern_gross_emis_all_gases_all_drivers_biomass_soil = 'gross_emis_all_gases_all_drivers_Mg_CO2e_ha_biomass_soil_2001_{}'.format(loss_years) -gross_emis_all_gases_all_drivers_biomass_soil_dir = '{0}gross_emissions/all_drivers/all_gases/biomass_soil/standard/full_extent/per_hectare/{1}/'.format(s3_base_dir, emis_run_date_biomass_soil) +pattern_gross_emis_all_gases_all_drivers_biomass_soil = f'gross_emis_all_gases_all_drivers_Mg_CO2e_ha_biomass_soil_2001_{loss_years}' +gross_emis_all_gases_all_drivers_biomass_soil_dir = f'{s3_base_dir}gross_emissions/all_drivers/all_gases/biomass_soil/standard/full_extent/per_hectare/{emis_run_date_biomass_soil}/' -pattern_gross_emis_all_gases_all_drivers_biomass_soil_forest_extent = 'gross_emis_all_gases_all_drivers_Mg_CO2e_ha_biomass_soil_forest_extent_2001_{}'.format(loss_years) -gross_emis_all_gases_all_drivers_biomass_soil_forest_extent_dir = '{0}gross_emissions/all_drivers/all_gases/biomass_soil/standard/forest_extent/per_hectare/{1}/'.format(s3_base_dir, emis_run_date_biomass_soil) +pattern_gross_emis_all_gases_all_drivers_biomass_soil_forest_extent = f'gross_emis_all_gases_all_drivers_Mg_CO2e_ha_biomass_soil_forest_extent_2001_{loss_years}' +gross_emis_all_gases_all_drivers_biomass_soil_forest_extent_dir = f'{s3_base_dir}gross_emissions/all_drivers/all_gases/biomass_soil/standard/forest_extent/per_hectare/{emis_run_date_biomass_soil}/' -pattern_gross_emis_nodes_biomass_soil = 'gross_emis_decision_tree_nodes_biomass_soil_2001_{}'.format(loss_years) -gross_emis_nodes_biomass_soil_dir = '{0}gross_emissions/decision_tree_nodes/biomass_soil/standard/{1}/'.format(s3_base_dir, emis_run_date_biomass_soil) +pattern_gross_emis_nodes_biomass_soil = f'gross_emis_decision_tree_nodes_biomass_soil_2001_{loss_years}' +gross_emis_nodes_biomass_soil_dir = f'{s3_base_dir}gross_emissions/decision_tree_nodes/biomass_soil/standard/{emis_run_date_biomass_soil}/' ### Emissions from soil only # Date to include in the output directory -emis_run_date_soil_only = '20220318' +emis_run_date_soil_only = '20230407' -pattern_gross_emis_commod_soil_only = 'gross_emis_commodity_Mg_CO2e_ha_soil_only_2001_{}'.format(loss_years) -gross_emis_commod_soil_only_dir = '{0}gross_emissions/commodities/soil_only/standard/{1}/'.format(s3_base_dir, emis_run_date_soil_only) +pattern_gross_emis_commod_soil_only = f'gross_emis_commodity_Mg_CO2e_ha_soil_only_2001_{loss_years}' +gross_emis_commod_soil_only_dir = f'{s3_base_dir}gross_emissions/commodities/soil_only/standard/{emis_run_date_soil_only}/' -pattern_gross_emis_forestry_soil_only = 'gross_emis_forestry_Mg_CO2e_ha_soil_only_2001_{}'.format(loss_years) -gross_emis_forestry_soil_only_dir = '{0}gross_emissions/forestry/soil_only/standard/{1}/'.format(s3_base_dir, emis_run_date_soil_only) +pattern_gross_emis_forestry_soil_only = f'gross_emis_forestry_Mg_CO2e_ha_soil_only_2001_{loss_years}' +gross_emis_forestry_soil_only_dir = f'{s3_base_dir}gross_emissions/forestry/soil_only/standard/{emis_run_date_soil_only}/' -pattern_gross_emis_shifting_ag_soil_only = 'gross_emis_shifting_ag_Mg_CO2e_ha_soil_only_2001_{}'.format(loss_years) -gross_emis_shifting_ag_soil_only_dir = '{0}gross_emissions/shifting_ag/soil_only/standard/{1}/'.format(s3_base_dir, emis_run_date_soil_only) +pattern_gross_emis_shifting_ag_soil_only = f'gross_emis_shifting_ag_Mg_CO2e_ha_soil_only_2001_{loss_years}' +gross_emis_shifting_ag_soil_only_dir = f'{s3_base_dir}gross_emissions/shifting_ag/soil_only/standard/{emis_run_date_soil_only}/' -pattern_gross_emis_urban_soil_only = 'gross_emis_urbanization_Mg_CO2e_ha_soil_only_2001_{}'.format(loss_years) -gross_emis_urban_soil_only_dir = '{0}gross_emissions/urbanization/soil_only/standard/{1}/'.format(s3_base_dir, emis_run_date_soil_only) +pattern_gross_emis_urban_soil_only = f'gross_emis_urbanization_Mg_CO2e_ha_soil_only_2001_{loss_years}' +gross_emis_urban_soil_only_dir = f'{s3_base_dir}gross_emissions/urbanization/soil_only/standard/{emis_run_date_soil_only}/' -pattern_gross_emis_wildfire_soil_only = 'gross_emis_wildfire_Mg_CO2e_ha_soil_only_2001_{}'.format(loss_years) -gross_emis_wildfire_soil_only_dir = '{0}gross_emissions/wildfire/soil_only/standard/{1}/'.format(s3_base_dir, emis_run_date_soil_only) +pattern_gross_emis_wildfire_soil_only = f'gross_emis_wildfire_Mg_CO2e_ha_soil_only_2001_{loss_years}' +gross_emis_wildfire_soil_only_dir = f'{s3_base_dir}gross_emissions/wildfire/soil_only/standard/{emis_run_date_soil_only}/' -pattern_gross_emis_no_driver_soil_only = 'gross_emis_no_driver_Mg_CO2e_ha_soil_only_2001_{}'.format(loss_years) -gross_emis_no_driver_soil_only_dir = '{0}gross_emissions/no_driver/soil_only/standard/{1}/'.format(s3_base_dir, emis_run_date_soil_only) +pattern_gross_emis_no_driver_soil_only = f'gross_emis_no_driver_Mg_CO2e_ha_soil_only_2001_{loss_years}' +gross_emis_no_driver_soil_only_dir = f'{s3_base_dir}gross_emissions/no_driver/soil_only/standard/{emis_run_date_soil_only}/' -pattern_gross_emis_all_gases_all_drivers_soil_only = 'gross_emis_all_gases_all_drivers_Mg_CO2e_ha_soil_only_2001_{}'.format(loss_years) -gross_emis_all_gases_all_drivers_soil_only_dir = '{0}gross_emissions/all_drivers/all_gases/soil_only/standard/{1}/'.format(s3_base_dir, emis_run_date_soil_only) +pattern_gross_emis_all_gases_all_drivers_soil_only = f'gross_emis_all_gases_all_drivers_Mg_CO2e_ha_soil_only_2001_{loss_years}' +gross_emis_all_gases_all_drivers_soil_only_dir = f'{s3_base_dir}gross_emissions/all_drivers/all_gases/soil_only/standard/{emis_run_date_soil_only}/' -pattern_gross_emis_co2_only_all_drivers_soil_only = 'gross_emis_CO2_only_all_drivers_Mg_CO2e_ha_soil_only_2001_{}'.format(loss_years) -gross_emis_co2_only_all_drivers_soil_only_dir = '{0}gross_emissions/all_drivers/CO2_only/soil_only/standard/{1}/'.format(s3_base_dir, emis_run_date_soil_only) +pattern_gross_emis_co2_only_all_drivers_soil_only = f'gross_emis_CO2_only_all_drivers_Mg_CO2e_ha_soil_only_2001_{loss_years}' +gross_emis_co2_only_all_drivers_soil_only_dir = f'{s3_base_dir}gross_emissions/all_drivers/CO2_only/soil_only/standard/{emis_run_date_soil_only}/' -pattern_gross_emis_non_co2_all_drivers_soil_only = 'gross_emis_non_CO2_all_drivers_Mg_CO2e_ha_soil_only_2001_{}'.format(loss_years) -gross_emis_non_co2_all_drivers_soil_only_dir = '{0}gross_emissions/all_drivers/non_CO2/soil_only/standard/{1}/'.format(s3_base_dir, emis_run_date_soil_only) +pattern_gross_emis_non_co2_all_drivers_soil_only = f'gross_emis_non_CO2_all_drivers_Mg_CO2e_ha_soil_only_2001_{loss_years}' +gross_emis_non_co2_all_drivers_soil_only_dir = f'{s3_base_dir}gross_emissions/all_drivers/non_CO2/soil_only/standard/{emis_run_date_soil_only}/' -pattern_gross_emis_nodes_soil_only = 'gross_emis_decision_tree_nodes_soil_only_2001_{}'.format(loss_years) -gross_emis_nodes_soil_only_dir = '{0}gross_emissions/decision_tree_nodes/soil_only/standard/{1}/'.format(s3_base_dir, emis_run_date_soil_only) +pattern_gross_emis_nodes_soil_only = f'gross_emis_decision_tree_nodes_soil_only_2001_{loss_years}' +gross_emis_nodes_soil_only_dir = f'{s3_base_dir}gross_emissions/decision_tree_nodes/soil_only/standard/{emis_run_date_soil_only}/' ### Net flux ###### # Net emissions for all forest types and all carbon emitted_pools in all pixels -pattern_net_flux = 'net_flux_Mg_CO2e_ha_biomass_soil_2001_{}'.format(loss_years) -net_flux_dir = os.path.join(s3_base_dir, 'net_flux_all_forest_types_all_drivers/biomass_soil/standard/full_extent/per_hectare/20220316/') +pattern_net_flux = f'net_flux_Mg_CO2e_ha_biomass_soil_2001_{loss_years}' +net_flux_dir = os.path.join(s3_base_dir, 'net_flux_all_forest_types_all_drivers/biomass_soil/standard/full_extent/per_hectare/20230407/') # Net emissions for all forest types and all carbon emitted_pools in forest extent -pattern_net_flux_forest_extent = 'net_flux_Mg_CO2e_ha_biomass_soil_forest_extent_2001_{}'.format(loss_years) -net_flux_forest_extent_dir = os.path.join(s3_base_dir, 'net_flux_all_forest_types_all_drivers/biomass_soil/standard/forest_extent/per_hectare/20220316/') +pattern_net_flux_forest_extent = f'net_flux_Mg_CO2e_ha_biomass_soil_forest_extent_2001_{loss_years}' +net_flux_forest_extent_dir = os.path.join(s3_base_dir, 'net_flux_all_forest_types_all_drivers/biomass_soil/standard/forest_extent/per_hectare/20230407/') ### Per pixel model outputs ###### # Gross removals per pixel in all pixels -pattern_cumul_gain_AGCO2_BGCO2_all_types_per_pixel_full_extent = 'gross_removals_AGCO2_BGCO2_Mg_pixel_all_forest_types_full_extent_2001_{}'.format(loss_years) -cumul_gain_AGCO2_BGCO2_all_types_per_pixel_full_extent_dir = os.path.join(s3_base_dir, 'gross_removals_AGCO2_BGCO2_all_forest_types/standard/full_extent/per_pixel/20220309/') +pattern_cumul_gain_AGCO2_BGCO2_all_types_per_pixel_full_extent = f'gross_removals_AGCO2_BGCO2_Mg_pixel_all_forest_types_full_extent_2001_{loss_years}' +cumul_gain_AGCO2_BGCO2_all_types_per_pixel_full_extent_dir = os.path.join(s3_base_dir, 'gross_removals_AGCO2_BGCO2_all_forest_types/standard/full_extent/per_pixel/20230407/') # Gross removals per pixel in forest extent -pattern_cumul_gain_AGCO2_BGCO2_all_types_per_pixel_forest_extent = 'gross_removals_AGCO2_BGCO2_Mg_pixel_all_forest_types_forest_extent_2001_{}'.format(loss_years) -cumul_gain_AGCO2_BGCO2_all_types_per_pixel_forest_extent_dir = os.path.join(s3_base_dir, 'gross_removals_AGCO2_BGCO2_all_forest_types/standard/forest_extent/per_pixel/20220309/') +pattern_cumul_gain_AGCO2_BGCO2_all_types_per_pixel_forest_extent = f'gross_removals_AGCO2_BGCO2_Mg_pixel_all_forest_types_forest_extent_2001_{loss_years}' +cumul_gain_AGCO2_BGCO2_all_types_per_pixel_forest_extent_dir = os.path.join(s3_base_dir, 'gross_removals_AGCO2_BGCO2_all_forest_types/standard/forest_extent/per_pixel/20230407/') # Gross emissions per pixel in all pixels -pattern_gross_emis_all_gases_all_drivers_biomass_soil_per_pixel_full_extent = 'gross_emis_all_gases_all_drivers_Mg_CO2e_pixel_biomass_soil_full_extent_2001_{}'.format(loss_years) -gross_emis_all_gases_all_drivers_biomass_soil_per_pixel_full_extent_dir = os.path.join(s3_base_dir, 'gross_emissions/all_drivers/all_gases/biomass_soil/standard/full_extent/per_pixel/20220316/') +pattern_gross_emis_all_gases_all_drivers_biomass_soil_per_pixel_full_extent = f'gross_emis_all_gases_all_drivers_Mg_CO2e_pixel_biomass_soil_full_extent_2001_{loss_years}' +gross_emis_all_gases_all_drivers_biomass_soil_per_pixel_full_extent_dir = os.path.join(s3_base_dir, 'gross_emissions/all_drivers/all_gases/biomass_soil/standard/full_extent/per_pixel/20230407/') # Gross emissions per pixel in forest extent -pattern_gross_emis_all_gases_all_drivers_biomass_soil_per_pixel_forest_extent = 'gross_emis_all_gases_all_drivers_Mg_CO2e_pixel_biomass_soil_forest_extent_2001_{}'.format(loss_years) -gross_emis_all_gases_all_drivers_biomass_soil_per_pixel_forest_extent_dir = os.path.join(s3_base_dir, 'gross_emissions/all_drivers/all_gases/biomass_soil/standard/forest_extent/per_pixel/20220316/') +pattern_gross_emis_all_gases_all_drivers_biomass_soil_per_pixel_forest_extent = f'gross_emis_all_gases_all_drivers_Mg_CO2e_pixel_biomass_soil_forest_extent_2001_{loss_years}' +gross_emis_all_gases_all_drivers_biomass_soil_per_pixel_forest_extent_dir = os.path.join(s3_base_dir, 'gross_emissions/all_drivers/all_gases/biomass_soil/standard/forest_extent/per_pixel/20230407/') # Net flux per pixel in all pixels -pattern_net_flux_per_pixel_full_extent = 'net_flux_Mg_CO2e_pixel_biomass_soil_full_extent_2001_{}'.format(loss_years) -net_flux_per_pixel_full_extent_dir = os.path.join(s3_base_dir, 'net_flux_all_forest_types_all_drivers/biomass_soil/standard/full_extent/per_pixel/20220316/') +pattern_net_flux_per_pixel_full_extent = f'net_flux_Mg_CO2e_pixel_biomass_soil_full_extent_2001_{loss_years}' +net_flux_per_pixel_full_extent_dir = os.path.join(s3_base_dir, 'net_flux_all_forest_types_all_drivers/biomass_soil/standard/full_extent/per_pixel/20230407/') # Net flux per pixel in forest extent -pattern_net_flux_per_pixel_forest_extent = 'net_flux_Mg_CO2e_pixel_biomass_soil_forest_extent_2001_{}'.format(loss_years) -net_flux_per_pixel_forest_extent_dir = os.path.join(s3_base_dir, 'net_flux_all_forest_types_all_drivers/biomass_soil/standard/forest_extent/per_pixel/20220316/') +pattern_net_flux_per_pixel_forest_extent = f'net_flux_Mg_CO2e_pixel_biomass_soil_forest_extent_2001_{loss_years}' +net_flux_per_pixel_forest_extent_dir = os.path.join(s3_base_dir, 'net_flux_all_forest_types_all_drivers/biomass_soil/standard/forest_extent/per_pixel/20230407/') ### 4x4 km aggregation tiles for mapping ###### -pattern_aggreg = '0_04deg_modelv{}'.format(version_filename) -pattern_aggreg_sensit_perc_diff = 'net_flux_0_04deg_modelv{}_perc_diff_std'.format(version_filename) -pattern_aggreg_sensit_sign_change = 'net_flux_0_04deg_modelv{}_sign_change_std'.format(version_filename) +pattern_aggreg = f'0_04deg_modelv{version_filename}' +pattern_aggreg_sensit_perc_diff = f'net_flux_0_04deg_modelv{version_filename}_perc_diff_std' +pattern_aggreg_sensit_sign_change = f'net_flux_0_04deg_modelv{version_filename}_sign_change_std' -output_aggreg_dir = os.path.join(s3_base_dir, '0_04deg_output_aggregation/biomass_soil/standard/20220316/') +output_aggreg_dir = os.path.join(s3_base_dir, '0_04deg_output_aggregation/biomass_soil/standard/20230407/') @@ -660,11 +725,11 @@ # Standard deviation for annual aboveground biomass removal factors using IPCC default removal rates pattern_stdev_annual_gain_AGB_IPCC_defaults = 'annual_removal_factor_stdev_AGB_Mg_ha_IPCC_defaults_all_ages' -stdev_annual_gain_AGB_IPCC_defaults_dir = os.path.join(s3_base_dir, 'stdev_annual_removal_factor_AGB_IPCC_defaults_all_ages/standard/20220309/') +stdev_annual_gain_AGB_IPCC_defaults_dir = os.path.join(s3_base_dir, 'stdev_annual_removal_factor_AGB_IPCC_defaults_all_ages/standard/20230315/') # Standard deviation for aboveground and belowground removal factors for all forest types pattern_stdev_annual_gain_AGC_all_types = 'annual_removal_factor_stdev_AGC_Mg_ha_all_forest_types' -stdev_annual_gain_AGC_all_types_dir = os.path.join(s3_base_dir, 'stdev_annual_removal_factor_AGC_all_forest_types/standard/20220309/') +stdev_annual_gain_AGC_all_types_dir = os.path.join(s3_base_dir, 'stdev_annual_removal_factor_AGC_all_forest_types/standard/20230315/') # Raw mineral soil C file site @@ -678,6 +743,13 @@ stdev_soil_C_full_extent_2000_dir = os.path.join(s3_base_dir, 'stdev_soil_carbon_full_extent/standard/20200828/') +### Testing materials +###### + +test_data_dir = '/usr/local/app/test/test_data/' +test_data_out_dir = f'{test_data_dir}tmp_out/' +pattern_test_suffix= 'top_005deg' +pattern_comparison_suffix = f'comparison_{pattern_test_suffix}' ### Sensitivity analysis ###### @@ -686,13 +758,15 @@ 'biomass_swap', 'US_removals', 'no_primary_gain', 'legal_Amazon_loss', 'Mekong_loss'] model_type_arg_help = 'Argument for whether the model is being run in standard form or as a sensitivity analysis run. ' \ - '{0} = Standard model. {1} = Maximize gain years. {2} = Shifting agriculture is treated as commodity-driven deforestation. ' \ + '{0} = Standard model. ' \ + '{1} = Maximize gain years. ' \ + '{2} = Shifting agriculture is treated as commodity-driven deforestation. ' \ '{3} = Commodity-driven deforestation results in grassland rather than cropland.' \ '{4} = Replace Baccini AGB map with Saatchi biomass map. ' \ '{5} = Use US-specific removals. {6} = Assume primary forests and IFLs have a removal rate of 0.' \ '{7} = Use Brazilian national loss data from PRODES for the legal Amazon.'\ '{8} = Use Hansen v2.0 loss data for the Mekong (first loss year only).'\ - .format(sensitivity_list[0], sensitivity_list[1], sensitivity_list[2], sensitivity_list[3], sensitivity_list[4], + .format(sensitivity_list[0], sensitivity_list[1], sensitivity_list[2], sensitivity_list[3], sensitivity_list[4], sensitivity_list[5], sensitivity_list[6], sensitivity_list[7], sensitivity_list[8]) # ## US-specific removals @@ -746,10 +820,10 @@ Brazil_annual_loss_raw_dir = os.path.join(s3_base_dir, 'sensit_analysis_legal_Amazon_loss/annual_loss/raw/20200920/') -pattern_Brazil_annual_loss_merged = 'legal_Amazon_annual_loss_2001_20{}_merged'.format(loss_years) +pattern_Brazil_annual_loss_merged = f'legal_Amazon_annual_loss_2001_20{loss_years}_merged' Brazil_annual_loss_merged_dir = os.path.join(s3_base_dir, 'sensit_analysis_legal_Amazon_loss/annual_loss/processed/combined/20200920/') -pattern_Brazil_annual_loss_processed = 'legal_Amazon_annual_loss_2001_20{}'.format(loss_years) +pattern_Brazil_annual_loss_processed = f'legal_Amazon_annual_loss_2001_20{loss_years}' Brazil_annual_loss_processed_dir = os.path.join(s3_base_dir, 'sensit_analysis_legal_Amazon_loss/annual_loss/processed/tiles/20200920/') ## Mekong loss (Hansen v2.0) diff --git a/data_import.bat b/data_import.bat index bbf7e558..2abb17f8 100644 --- a/data_import.bat +++ b/data_import.bat @@ -2,11 +2,11 @@ :: Lines must be uncommented according to the model being imported, e.g., standard, maxgain, soil_only, etc. :: David Gibbs, david.gibbs@wri.org -FOR %%I IN (output\carbonflux_20210324_0439\iso\summary\*.csv) DO psql -d flux_model -U postgres -c "\copy standard_iso_summary_20210323 FROM %%I CSV HEADER DELIMITER e'\t' -FOR %%I IN (output\carbonflux_20210324_0439\iso\change\*.csv) DO psql -d flux_model -U postgres -c "\copy standard_iso_change_20210323 FROM %%I CSV HEADER DELIMITER e'\t' +FOR %%I IN (output\carbonflux_20220418_1744\iso\summary\*.csv) DO psql -d flux_model -U postgres -c "\copy standard_iso_summary_20220316 FROM %%I CSV HEADER DELIMITER e'\t' +FOR %%I IN (output\carbonflux_20220418_1744\iso\change\*.csv) DO psql -d flux_model -U postgres -c "\copy standard_iso_change_20220316 FROM %%I CSV HEADER DELIMITER e'\t' -::FOR %%I IN (output\soil_only\iso\summary\*.csv) DO psql -d flux_model -U postgres -c "\copy soil_only_iso_summary_20200904 FROM %%I CSV HEADER DELIMITER e'\t' -::FOR %%I IN (output\soil_only\iso\change\*.csv) DO psql -d flux_model -U postgres -c "\copy soil_only_iso_change_20200904 FROM %%I CSV HEADER DELIMITER e'\t' +::FOR %%I IN (output\carbon_sensitivity_soil_only_20210326_0003\iso\summary\*.csv) DO psql -d flux_model -U postgres -c "\copy soil_only_iso_summary_20210324 FROM %%I CSV HEADER DELIMITER e'\t' +::FOR %%I IN (output\carbon_sensitivity_soil_only_20210326_0003\iso\change\*.csv) DO psql -d flux_model -U postgres -c "\copy soil_only_iso_change_20210324 FROM %%I CSV HEADER DELIMITER e'\t' ::FOR %%I IN (output\maxgain\iso\summary\*.csv) DO psql -d flux_model -U postgres -c "\copy maxgain_iso_summary_20200921 FROM %%I CSV HEADER DELIMITER e'\t' ::FOR %%I IN (output\maxgain\iso\change\*.csv) DO psql -d flux_model -U postgres -c "\copy maxgain_iso_change FROM %%I CSV HEADER DELIMITER e'\t' diff --git a/removals/continent_ecozone_tiles.py b/data_prep/continent_ecozone_tiles.py similarity index 99% rename from removals/continent_ecozone_tiles.py rename to data_prep/continent_ecozone_tiles.py index 882498e8..b6796b66 100644 --- a/removals/continent_ecozone_tiles.py +++ b/data_prep/continent_ecozone_tiles.py @@ -19,8 +19,7 @@ import numpy as np import datetime from scipy import stats -import sys -sys.path.append('../') + import constants_and_names as cn import universal_util as uu diff --git a/carbon_pools/create_inputs_for_C_pools.py b/data_prep/create_inputs_for_C_pools.py similarity index 99% rename from carbon_pools/create_inputs_for_C_pools.py rename to data_prep/create_inputs_for_C_pools.py index c99f9eef..8eac48e5 100644 --- a/carbon_pools/create_inputs_for_C_pools.py +++ b/data_prep/create_inputs_for_C_pools.py @@ -7,8 +7,7 @@ import rasterio import numpy as np from scipy import stats -import sys -sys.path.append('../') + import universal_util as uu import constants_and_names as cn diff --git a/data_prep/model_extent.py b/data_prep/model_extent.py index c32709f4..135500ca 100644 --- a/data_prep/model_extent.py +++ b/data_prep/model_extent.py @@ -1,47 +1,50 @@ +""" +Function to create model extent tiles +""" + import datetime import numpy as np import os import rasterio -import logging -import sys -sys.path.append('../') +from memory_profiler import profile + import constants_and_names as cn import universal_util as uu # @uu.counter -def model_extent(tile_id, pattern, sensit_type, no_upload): +# @profile +def model_extent(tile_id, pattern): + """ + :param tile_id: tile to be processed, identified by its tile id + :param pattern: pattern for output tile names + :return: tile where pixels = 1 are included in the model and pixels = 0 are not included in the model + """ # I don't know why, but this needs to be here and not just in mp_model_extent - os.chdir(cn.docker_base_dir) + os.chdir(cn.docker_tile_dir) - uu.print_log("Delineating model extent:", tile_id) + uu.print_log(f'Delineating model extent: {tile_id}') # Start time start = datetime.datetime.now() # Names of the input tiles - mangrove = '{0}_{1}.tif'.format(tile_id, cn.pattern_mangrove_biomass_2000) - gain = '{0}_{1}.tif'.format(cn.pattern_gain, tile_id) - pre_2000_plantations = '{0}_{1}.tif'.format(tile_id, cn.pattern_plant_pre_2000) + mangrove = f'{tile_id}_{cn.pattern_mangrove_biomass_2000}.tif' + gain = f'{tile_id}_{cn.pattern_gain_ec2}.tif' # Tree cover tile name depends on the sensitivity analysis. # PRODES extent 2000 stands in for Hansen TCD - if sensit_type == 'legal_Amazon_loss': - tcd = '{0}_{1}.tif'.format(tile_id, cn.pattern_Brazil_forest_extent_2000_processed) - uu.print_log("Using PRODES extent 2000 tile {0} for {1} sensitivity analysis".format(tile_id, sensit_type)) + if cn.SENSIT_TYPE == 'legal_Amazon_loss': + tcd = f'{tile_id}_{cn.pattern_Brazil_forest_extent_2000_processed}.tif' + uu.print_log(f'Using PRODES extent 2000 tile {tile_id} for {cn.SENSIT_TYPE} sensitivity analysis') else: - tcd = '{0}_{1}.tif'.format(cn.pattern_tcd, tile_id) - uu.print_log("Using Hansen tcd tile {0} for {1} model run".format(tile_id, sensit_type)) + tcd = f'{cn.pattern_tcd}_{tile_id}.tif' + uu.print_log(f'Using Hansen tcd tile {tile_id} for {cn.SENSIT_TYPE} model run') # Biomass tile name depends on the sensitivity analysis - if sensit_type == 'biomass_swap': - biomass = '{0}_{1}.tif'.format(tile_id, cn.pattern_JPL_unmasked_processed) - uu.print_log("Using JPL biomass tile {0} for {1} sensitivity analysis".format(tile_id, sensit_type)) - else: - biomass = '{0}_{1}.tif'.format(tile_id, cn.pattern_WHRC_biomass_2000_unmasked) - uu.print_log("Using WHRC biomass tile {0} for {1} model run".format(tile_id, sensit_type)) + biomass = uu.sensit_tile_rename_biomass(cn.SENSIT_TYPE, tile_id) - out_tile = '{0}_{1}.tif'.format(tile_id, pattern) + out_tile = uu.make_tile_name(tile_id, pattern) # Opens biomass tile with rasterio.open(tcd) as tcd_src: @@ -63,47 +66,41 @@ def model_extent(tile_id, pattern, sensit_type, no_upload): # Checks whether each input tile exists try: mangroves_src = rasterio.open(mangrove) - uu.print_log(" Mangrove tile found for {}".format(tile_id)) - except: - uu.print_log(" No mangrove tile found for {}".format(tile_id)) + uu.print_log(f' Mangrove tile found for {tile_id}') + except rasterio.errors.RasterioIOError: + uu.print_log(f' Mangrove tile not found for {tile_id}') try: gain_src = rasterio.open(gain) - uu.print_log(" Gain tile found for {}".format(tile_id)) - except: - uu.print_log(" No gain tile found for {}".format(tile_id)) + uu.print_log(f' Gain tile found for {tile_id}') + except rasterio.errors.RasterioIOError: + uu.print_log(f' Gain tile not found for {tile_id}') try: biomass_src = rasterio.open(biomass) - uu.print_log(" Biomass tile found for {}".format(tile_id)) - except: - uu.print_log(" No biomass tile found for {}".format(tile_id)) - - try: - pre_2000_plantations_src = rasterio.open(pre_2000_plantations) - uu.print_log(" Pre-2000 plantation tile found for {}".format(tile_id)) - except: - uu.print_log(" No pre-2000 plantation tile found for {}".format(tile_id)) + uu.print_log(f' Biomass tile found for {tile_id}') + except rasterio.errors.RasterioIOError: + uu.print_log(f' Biomass tile not found for {tile_id}') # Opens the output tile, giving it the metadata of the input tiles dst = rasterio.open(out_tile, 'w', **kwargs) # Adds metadata tags to the output raster - uu.add_rasterio_tags(dst, sensit_type) + uu.add_universal_metadata_rasterio(dst) dst.update_tags( units='unitless. 1 = in model extent. 0 = not in model extent') - if sensit_type == 'biomass_swap': + if cn.SENSIT_TYPE == 'biomass_swap': dst.update_tags( - source='Pixels with ((Hansen 2000 tree cover AND NASA JPL AGB2000) OR Hansen gain OR mangrove biomass 2000) NOT pre-2000 plantations') + source='Pixels with ((Hansen 2000 tree cover AND NASA JPL AGB2000) OR Hansen gain OR mangrove biomass 2000)') else: dst.update_tags( - source='Pixels with ((Hansen 2000 tree cover AND WHRC AGB2000) OR Hansen gain OR mangrove biomass 2000) NOT pre-2000 plantations') + source='Pixels with ((Hansen 2000 tree cover AND WHRC AGB2000) OR Hansen gain OR mangrove biomass 2000)') dst.update_tags( extent='Full model extent. This defines which pixels are included in the model.') - uu.print_log(" Creating model extent for {}".format(tile_id)) + uu.print_log(f' Creating model extent for {tile_id}') uu.check_memory() @@ -115,36 +112,29 @@ def model_extent(tile_id, pattern, sensit_type, no_upload): # If the tile does not exist, it creates an array of 0s. try: mangrove_window = mangroves_src.read(1, window=window).astype('uint8') - except: + except UnboundLocalError: mangrove_window = np.zeros((window.height, window.width), dtype=int) try: gain_window = gain_src.read(1, window=window) - except: + except UnboundLocalError: gain_window = np.zeros((window.height, window.width), dtype=int) try: biomass_window = biomass_src.read(1, window=window) - except: + except UnboundLocalError: biomass_window = np.zeros((window.height, window.width), dtype=int) try: tcd_window = tcd_src.read(1, window=window) - except: + except UnboundLocalError: tcd_window = np.zeros((window.height, window.width), dtype=int) - try: - pre_2000_plantations_window = pre_2000_plantations_src.read(1, window=window) - except: - pre_2000_plantations_window = np.zeros((window.height, window.width), dtype=int) # Array of pixels that have both biomass and tree cover density tcd_with_biomass_window = np.where((biomass_window > 0) & (tcd_window > 0), 1, 0) # For all moel types except legal_Amazon_loss sensitivity analysis - if sensit_type != 'legal_Amazon_loss': + if cn.SENSIT_TYPE != 'legal_Amazon_loss': # Array of pixels with (biomass AND tcd) OR mangrove biomass OR Hansen gain - forest_extent = np.where((tcd_with_biomass_window == 1) | (mangrove_window > 1) | (gain_window == 1), 1, 0) - - # extent now WITHOUT pre-2000 plantations - forest_extent = np.where((forest_extent == 1) & (pre_2000_plantations_window == 0), 1, 0).astype('uint8') + forest_extent = np.where((tcd_with_biomass_window == 1) | (mangrove_window > 1) | (gain_window == 1), 1, 0).astype('uint8') # For legal_Amazon_loss sensitivity analysis else: @@ -156,7 +146,5 @@ def model_extent(tile_id, pattern, sensit_type, no_upload): # Writes the output window to the output dst.write_band(1, forest_extent, window=window) - - # Prints information about the tile that was just processed - uu.end_of_fx_summary(start, tile_id, pattern, no_upload) \ No newline at end of file + uu.end_of_fx_summary(start, tile_id, pattern) diff --git a/removals/mp_continent_ecozone_tiles.py b/data_prep/mp_continent_ecozone_tiles.py similarity index 90% rename from removals/mp_continent_ecozone_tiles.py rename to data_prep/mp_continent_ecozone_tiles.py index b513deb9..d774a026 100644 --- a/removals/mp_continent_ecozone_tiles.py +++ b/data_prep/mp_continent_ecozone_tiles.py @@ -17,31 +17,32 @@ import multiprocessing -import continent_ecozone_tiles from subprocess import Popen, PIPE, STDOUT, check_call import datetime import argparse import os import sys -sys.path.append('../') import constants_and_names as cn import universal_util as uu +from . import continent_ecozone_tiles def mp_continent_ecozone_tiles(tile_id_list, run_date = None): - os.chdir(cn.docker_base_dir) + os.chdir(cn.docker_tile_dir) # If a full model run is specified, the correct set of tiles for the particular script is listed if tile_id_list == 'all': # List of tiles to run in the model - tile_id_list = uu.create_combined_tile_list(cn.pattern_WHRC_biomass_2000_non_mang_non_planted, cn.mangrove_biomass_2000_dir) + tile_id_list = uu.create_combined_tile_list( + [cn.pattern_WHRC_biomass_2000_non_mang_non_planted, cn.mangrove_biomass_2000_dir], + sensit_type = cn.SENSIT_TYPE) uu.print_log(tile_id_list) - uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n") + uu.print_log(f'There are {str(len(tile_id_list))} tiles to process', "\n") # if the continent-ecozone shapefile hasn't already been downloaded, it will be downloaded and unzipped - uu.s3_file_download(cn.cont_eco_s3_zip, cn.docker_base_dir, 'std') + uu.s3_file_download(cn.cont_eco_s3_zip, cn.docker_tile_dir, 'std') # Unzips ecozone shapefile cmd = ['unzip', cn.cont_eco_zip] @@ -88,6 +89,6 @@ def mp_continent_ecozone_tiles(tile_id_list, run_date = None): no_upload = True # Create the output log - uu.initiate_log(tile_id_list=tile_id_list, run_date=run_date) + uu.initiate_log(tile_id_list) mp_continent_ecozone_tiles(tile_id_list=tile_id_list, run_date=run_date) \ No newline at end of file diff --git a/carbon_pools/mp_create_inputs_for_C_pools.py b/data_prep/mp_create_inputs_for_C_pools.py similarity index 93% rename from carbon_pools/mp_create_inputs_for_C_pools.py rename to data_prep/mp_create_inputs_for_C_pools.py index 72596b67..f8faa853 100644 --- a/carbon_pools/mp_create_inputs_for_C_pools.py +++ b/data_prep/mp_create_inputs_for_C_pools.py @@ -7,16 +7,15 @@ import os import argparse import datetime -import create_inputs_for_C_pools import multiprocessing import sys -sys.path.append('../') import constants_and_names as cn import universal_util as uu +from . import create_inputs_for_C_pools def mp_create_inputs_for_C_pools(tile_id_list, run_date = None, no_upload = None): - os.chdir(cn.docker_base_dir) + os.chdir(cn.docker_tile_dir) sensit_type = 'std' # If a full model run is specified, the correct set of tiles for the particular script is listed @@ -41,10 +40,10 @@ def mp_create_inputs_for_C_pools(tile_id_list, run_date = None, no_upload = None input_files = [cn.fao_ecozone_raw_dir, cn.precip_raw_dir] for input in input_files: - uu.s3_file_download('{}'.format(input), cn.docker_base_dir, sensit_type) + uu.s3_file_download('{}'.format(input), cn.docker_tile_dir, sensit_type) uu.print_log("Unzipping boreal/temperate/tropical file (from FAO ecozones)") - cmd = ['unzip', '{}'.format(cn.pattern_fao_ecozone_raw), '-d', cn.docker_base_dir] + cmd = ['unzip', '{}'.format(cn.pattern_fao_ecozone_raw), '-d', cn.docker_tile_dir] uu.log_subprocess_output_full(cmd) uu.print_log("Copying elevation (srtm) files") @@ -86,13 +85,13 @@ def mp_create_inputs_for_C_pools(tile_id_list, run_date = None, no_upload = None args = parser.parse_args() tile_id_list = args.tile_id_list run_date = args.run_date - no_upload = args.no_upload + no_upload = args.NO_UPLOAD # Disables upload to s3 if no AWS credentials are found in environment if not uu.check_aws_creds(): no_upload = True # Create the output log - uu.initiate_log(tile_id_list, run_date=run_date, no_upload=no_upload) + uu.initiate_log(tile_id_list) mp_create_inputs_for_C_pools(tile_id_list, run_date=run_date, no_upload=no_upload) \ No newline at end of file diff --git a/data_prep/mp_mangrove_processing.py b/data_prep/mp_mangrove_processing.py index 0b9bc2ba..993c36b0 100644 --- a/data_prep/mp_mangrove_processing.py +++ b/data_prep/mp_mangrove_processing.py @@ -9,13 +9,12 @@ from functools import partial import os from subprocess import Popen, PIPE, STDOUT, check_call -sys.path.append('../') import constants_and_names as cn import universal_util as uu def mp_mangrove_processing(tile_id_list, run_date = None, no_upload = None): - os.chdir(cn.docker_base_dir) + os.chdir(cn.docker_tile_dir) # If a full model run is specified, the correct set of tiles for the particular script is listed if tile_id_list == 'all': @@ -23,11 +22,11 @@ def mp_mangrove_processing(tile_id_list, run_date = None, no_upload = None): tile_id_list = uu.tile_list_s3(cn.pixel_area_dir) uu.print_log(tile_id_list) - uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n") + uu.print_log(f'There are {str(len(tile_id_list))} tiles to process', "\n") # Downloads zipped raw mangrove files - uu.s3_file_download(os.path.join(cn.mangrove_biomass_raw_dir, cn.mangrove_biomass_raw_file), cn.docker_base_dir, 'std') + uu.s3_file_download(os.path.join(cn.mangrove_biomass_raw_dir, cn.mangrove_biomass_raw_file), cn.docker_tile_dir, 'std') # Unzips mangrove images into a flat structure (all tifs into main folder using -j argument) # NOTE: Unzipping some tifs (e.g., Australia, Indonesia) takes a very long time, so don't worry if the script appears to stop on that. @@ -46,13 +45,13 @@ def mp_mangrove_processing(tile_id_list, run_date = None, no_upload = None): processes=int(cn.count/4) uu.print_log('Mangrove preprocessing max processors=', processes) pool = multiprocessing.Pool(processes) - pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt, - no_upload=no_upload), tile_id_list) + pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), + tile_id_list) # # For single processor use, for testing purposes # for tile_id in tile_id_list: # - # mangrove_processing.create_mangrove_tiles(tile_id, source_raster, out_pattern, no_upload) + # mangrove_processing.create_mangrove_tiles(tile_id, source_raster, out_pattern) # Checks if each tile has data in it. Only tiles with data are uploaded. upload_dir = cn.mangrove_biomass_2000_dir @@ -76,13 +75,13 @@ def mp_mangrove_processing(tile_id_list, run_date = None, no_upload = None): args = parser.parse_args() tile_id_list = args.tile_id_list run_date = args.run_date - no_upload = args.no_upload + no_upload = args.NO_UPLOAD # Disables upload to s3 if no AWS credentials are found in environment if not uu.check_aws_creds(): no_upload = True # Create the output log - uu.initiate_log(tile_id_list=tile_id_list, run_date=run_date, no_upload=no_upload) + uu.initiate_log(tile_id_list) mp_mangrove_processing(tile_id_list=tile_id_list, run_date=run_date, no_upload=no_upload) \ No newline at end of file diff --git a/data_prep/mp_model_extent.py b/data_prep/mp_model_extent.py index 67a05680..0956bf65 100644 --- a/data_prep/mp_model_extent.py +++ b/data_prep/mp_model_extent.py @@ -1,62 +1,63 @@ -''' +""" This script creates a binary raster of the model extent at the pixel level. -The model extent is ((TCD2000>0 AND WHRC AGB2000>0) OR Hansen gain=1 OR mangrove AGB2000>0) NOT IN pre-2000 plantations +The model extent is ((TCD2000>0 AND WHRC AGB2000>0) OR Hansen gain=1 OR mangrove AGB2000>0). The rest of the model uses this to mask its extent. For biomass_swap sensitivity analysis, NASA JPL AGB 2000 replaces WHRC 2000. For legal_Amazon_loss sensitivity analysis, PRODES 2000 forest extent replaces Hansen tree cover 2000 and Hansen gain pixels and mangrove pixels outside of (PRODES extent AND WHRC AGB) are not included. -''' +python -m data_prep.mp_model_extent -t std -l 00N_000E -nu +python -m data_prep.mp_model_extent -t std -l all +""" -import multiprocessing -from functools import partial -import pandas as pd -import datetime import argparse -from subprocess import Popen, PIPE, STDOUT, check_call +from functools import partial +import multiprocessing import os import sys -sys.path.append('../') + import constants_and_names as cn import universal_util as uu -sys.path.append(os.path.join(cn.docker_app,'data_prep')) -import model_extent +from . import model_extent -def mp_model_extent(sensit_type, tile_id_list, run_date = None, no_upload = None): +def mp_model_extent(tile_id_list): + """ + :param tile_id_list: list of tile ids to process + :return: 1 set of tiles where pixels = 1 are included in the model and pixels = 0 are not included in the model + """ - os.chdir(cn.docker_base_dir) + os.chdir(cn.docker_tile_dir) # If a full model run is specified, the correct set of tiles for the particular script is listed if tile_id_list == 'all': # List of tiles to run in the model. Which biomass tiles to use depends on sensitivity analysis - if sensit_type == 'biomass_swap': - tile_id_list = uu.tile_list_s3(cn.JPL_processed_dir, sensit_type) - elif sensit_type == 'legal_Amazon_loss': - tile_id_list = uu.tile_list_s3(cn.Brazil_forest_extent_2000_processed_dir, sensit_type) + if cn.SENSIT_TYPE == 'biomass_swap': + tile_id_list = uu.tile_list_s3(cn.JPL_processed_dir, cn.SENSIT_TYPE) + elif cn.SENSIT_TYPE == 'legal_Amazon_loss': + tile_id_list = uu.tile_list_s3(cn.Brazil_forest_extent_2000_processed_dir, cn.SENSIT_TYPE) else: - tile_id_list = uu.create_combined_tile_list(cn.WHRC_biomass_2000_unmasked_dir, - cn.mangrove_biomass_2000_dir, - cn.gain_dir, cn.tcd_dir - ) + tile_id_list = uu.create_combined_tile_list( + [cn.WHRC_biomass_2000_unmasked_dir, cn.mangrove_biomass_2000_dir, cn.gain_dir, cn.tcd_dir, + cn.annual_gain_AGC_BGC_planted_forest_unmasked_dir], + sensit_type=cn.SENSIT_TYPE) uu.print_log(tile_id_list) - uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n") + uu.print_log(f'There are {str(len(tile_id_list))} tiles to process', "\n") # Files to download for this script. download_dict = { cn.mangrove_biomass_2000_dir: [cn.pattern_mangrove_biomass_2000], - cn.gain_dir: [cn.pattern_gain], - cn.plant_pre_2000_processed_dir: [cn.pattern_plant_pre_2000] + cn.gain_dir: [cn.pattern_gain_data_lake] } - if sensit_type == 'legal_Amazon_loss': + if cn.SENSIT_TYPE == 'legal_Amazon_loss': download_dict[cn.Brazil_forest_extent_2000_processed_dir] = [cn.pattern_Brazil_forest_extent_2000_processed] else: download_dict[cn.tcd_dir] = [cn.pattern_tcd] - if sensit_type == 'biomass_swap': + if cn.SENSIT_TYPE == 'biomass_swap': download_dict[cn.JPL_processed_dir] = [cn.pattern_JPL_unmasked_processed] else: download_dict[cn.WHRC_biomass_2000_unmasked_dir] = [cn.pattern_WHRC_biomass_2000_unmasked] @@ -68,68 +69,66 @@ def mp_model_extent(sensit_type, tile_id_list, run_date = None, no_upload = None # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list for key, values in download_dict.items(): - dir = key + directory = key pattern = values[0] - uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list) - + uu.s3_flexible_download(directory, pattern, cn.docker_tile_dir, cn.SENSIT_TYPE, tile_id_list) # If the model run isn't the standard one, the output directory and file names are changed - if sensit_type != 'std': - uu.print_log("Changing output directory and file name pattern based on sensitivity analysis") - output_dir_list = uu.alter_dirs(sensit_type, output_dir_list) - output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list) + if cn.SENSIT_TYPE != 'std': + uu.print_log('Changing output directory and file name pattern based on sensitivity analysis') + output_dir_list = uu.alter_dirs(cn.SENSIT_TYPE, output_dir_list) + output_pattern_list = uu.alter_patterns(cn.SENSIT_TYPE, output_pattern_list) # A date can optionally be provided by the full model script or a run of this script. # This replaces the date in constants_and_names. # Only done if output upload is enabled. - if run_date is not None and no_upload is not None: - output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date) + if cn.RUN_DATE is not None and cn.NO_UPLOAD is False: + output_dir_list = uu.replace_output_dir_date(output_dir_list, cn.RUN_DATE) # Creates a single filename pattern to pass to the multiprocessor call pattern = output_pattern_list[0] - # This configuration of the multiprocessing call is necessary for passing multiple arguments to the main function - # It is based on the example here: http://spencerimp.blogspot.com/2015/12/python-multiprocess-with-multiple.html - if cn.count == 96: - if sensit_type == 'biomass_swap': - processes = 38 - else: - processes = 45 # 30 processors = 480 GB peak (sporadic decreases followed by sustained increases); - # 36 = 550 GB peak; 40 = 590 GB peak; 42 = 631 GB peak; 43 = 690 GB peak; 45 = too high + if cn.SINGLE_PROCESSOR: + for tile_id in tile_id_list: + model_extent.model_extent(tile_id, pattern) else: - processes = 3 - uu.print_log('Model extent processors=', processes) - pool = multiprocessing.Pool(processes) - pool.map(partial(model_extent.model_extent, pattern=pattern, sensit_type=sensit_type, no_upload=no_upload), tile_id_list) - pool.close() - pool.join() - - # # For single processor use - # for tile_id in tile_id_list: - # model_extent.model_extent(tile_id, pattern, sensit_type, no_upload) + # This configuration of the multiprocessing call is necessary for passing multiple arguments to the main function + # It is based on the example here: http://spencerimp.blogspot.com/2015/12/python-multiprocess-with-multiple.html + if cn.count == 96: + if cn.SENSIT_TYPE == 'biomass_swap': + processes = 38 + else: + processes = 45 # 30 processors = 480 GB peak (sporadic decreases followed by sustained increases); + # 36 = 550 GB peak; 40 = 590 GB peak; 42 = 631 GB peak; 43 = 690 GB peak; 45 = too high + else: + processes = 3 + uu.print_log('Model extent processors=', processes) + with multiprocessing.Pool(processes) as pool: + pool.map(partial(model_extent.model_extent, pattern=pattern), tile_id_list) + pool.close() + pool.join() + # No single-processor versions of these check-if-empty functions output_pattern = output_pattern_list[0] if cn.count <= 2: # For local tests processes = 1 - uu.print_log( - "Checking for empty tiles of {0} pattern with {1} processors using light function...".format(output_pattern, processes)) - pool = multiprocessing.Pool(processes) - pool.map(partial(uu.check_and_delete_if_empty_light, output_pattern=output_pattern), tile_id_list) - pool.close() - pool.join() + uu.print_log(f'Checking for empty tiles of {output_pattern} pattern with {output_pattern} processors using light function...') + with multiprocessing.Pool(processes) as pool: + pool.map(partial(uu.check_and_delete_if_empty_light, output_pattern=output_pattern), tile_id_list) + pool.close() + pool.join() else: processes = 58 # 50 processors = 620 GB peak; 55 = 640 GB; 58 = 650 GB (continues to increase very slowly several hundred tiles in) - uu.print_log("Checking for empty tiles of {0} pattern with {1} processors...".format(output_pattern, processes)) - pool = multiprocessing.Pool(processes) - pool.map(partial(uu.check_and_delete_if_empty, output_pattern=output_pattern), tile_id_list) - pool.close() - pool.join() + uu.print_log(f'Checking for empty tiles of {output_pattern} pattern with {output_pattern} processors...') + with multiprocessing.Pool(processes) as pool: + pool.map(partial(uu.check_and_delete_if_empty, output_pattern=output_pattern), tile_id_list) + pool.close() + pool.join() # If no_upload flag is not activated (by choice or by lack of AWS credentials), output is uploaded - if not no_upload: - + if not cn.NO_UPLOAD: uu.upload_final_set(output_dir_list[0], output_pattern_list[0]) @@ -140,29 +139,34 @@ def mp_model_extent(sensit_type, tile_id_list, run_date = None, no_upload = None parser = argparse.ArgumentParser( description='Create tiles of the pixels included in the model (model extent)') parser.add_argument('--model-type', '-t', required=True, - help='{}'.format(cn.model_type_arg_help)) + help=f'{cn.model_type_arg_help}') parser.add_argument('--tile_id_list', '-l', required=True, help='List of tile ids to use in the model. Should be of form 00N_110E or 00N_110E,00N_120E or all.') parser.add_argument('--run-date', '-d', required=False, help='Date of run. Must be format YYYYMMDD.') parser.add_argument('--no-upload', '-nu', action='store_true', help='Disables uploading of outputs to s3') + parser.add_argument('--single-processor', '-sp', action='store_true', + help='Uses single processing rather than multiprocessing') args = parser.parse_args() - sensit_type = args.model_type + + # Sets global variables to the command line arguments + cn.SENSIT_TYPE = args.model_type + cn.RUN_DATE = args.run_date + cn.NO_UPLOAD = args.no_upload + cn.SINGLE_PROCESSOR = args.single_processor + tile_id_list = args.tile_id_list - run_date = args.run_date - no_upload = args.no_upload # Disables upload to s3 if no AWS credentials are found in environment if not uu.check_aws_creds(): - no_upload = True + cn.NO_UPLOAD = True # Create the output log - uu.initiate_log(tile_id_list=tile_id_list, sensit_type=sensit_type, run_date=run_date, no_upload=no_upload) + uu.initiate_log(tile_id_list) # Checks whether the sensitivity analysis and tile_id_list arguments are valid - uu.check_sensit_type(sensit_type) + uu.check_sensit_type(cn.SENSIT_TYPE) tile_id_list = uu.tile_id_list_check(tile_id_list) - mp_model_extent(sensit_type=sensit_type, tile_id_list=tile_id_list, run_date=run_date, no_upload=no_upload) - + mp_model_extent(tile_id_list=tile_id_list) diff --git a/data_prep/mp_peatland_processing.py b/data_prep/mp_peatland_processing.py new file mode 100644 index 00000000..42da8895 --- /dev/null +++ b/data_prep/mp_peatland_processing.py @@ -0,0 +1,157 @@ +''' +This script makes mask tiles of where peat pixels are. Peat is represented by 1s; non-peat is no-data. +Between 40N and 60S, Gumbricht et al. 2017 (CIFOR) peat is used. +Miettinen et al. 2016 (IDN/MYS), Hastie et al. 2022 (Peru), and Crezee et al. 2022 (Congo basin) supplement it. +Outside that band (>40N, since there are no tiles at >60S), Xu et al. 2018 is used to mask peat. +Between 40N and 60S, Xu et al. 2018 is not used. + +It's important to run a test tile on each peat source. That means running several test tiles. Possible tiles include: +00N_000E: just Gumbricht et al. +00N_010E: Gumbricht et al. and Crezee et al. +00N_110E: Gumbricht et al. and Miettinen et al. +00N_080W: Gumbricht et al. and Hastie et al. +50N_080W: Xu et al. + +python -m data_prep.mp_peatland_processing -l 00N_000E,00N_010E,00N_110E,00N_080W,50N_080W -nu +python -m data_prep.mp_peatland_processing -l all +''' + + +import argparse +from functools import partial +import multiprocessing +import os +import sys + +import constants_and_names as cn +import universal_util as uu +from . import peatland_processing + + +def mp_peatland_processing(tile_id_list): + + os.chdir(cn.docker_tile_dir) + + # If a full model run is specified, the correct set of tiles for the particular script is listed + if tile_id_list == 'all': + # List of tiles to run in the model + tile_id_list = uu.tile_list_s3(cn.pixel_area_dir) + + uu.print_log(tile_id_list) + uu.print_log(f'There are {str(len(tile_id_list))} tiles to process', "\n") + + + # List of output directories and output file name patterns + output_dir_list = [cn.peat_mask_dir] + output_pattern_list = [cn.pattern_peat_mask] + + + # A date can optionally be provided by the full model script or a run of this script. + # This replaces the date in constants_and_names. + # Only done if output upload is enabled. + if cn.RUN_DATE is not None and cn.NO_UPLOAD is False: + output_dir_list = uu.replace_output_dir_date(output_dir_list, cn.RUN_DATE) + + # NOTE: Locally merged in ArcMap all the Xu et al. 2018 peat shapefiles that are above 40N into a single shapefile: + # Xu_et_al_north_of_40N__20230228.shp. Only merged the Xu et al. shapefiles that were north of 40N because + # below that latitude, the model uses Gumbricht (CIFOR) 2017. + + # Downloads peat layers + uu.s3_file_download(os.path.join(cn.peat_unprocessed_dir, cn.Gumbricht_peat_name), cn.docker_tile_dir, cn.SENSIT_TYPE) + uu.s3_file_download(os.path.join(cn.peat_unprocessed_dir, cn.Miettinen_peat_zip), cn.docker_tile_dir, cn.SENSIT_TYPE) + uu.s3_file_download(os.path.join(cn.peat_unprocessed_dir, cn.Xu_peat_zip), cn.docker_tile_dir, cn.SENSIT_TYPE) + uu.s3_file_download(os.path.join(cn.peat_unprocessed_dir, cn.Crezee_name), cn.docker_tile_dir, cn.SENSIT_TYPE) + uu.s3_file_download(os.path.join(cn.peat_unprocessed_dir, cn.Hastie_name), cn.docker_tile_dir, cn.SENSIT_TYPE) + + # Unzips the Miettinen et al. peat shapefile (IDN and MYS) + cmd = ['unzip', '-o', '-j', cn.Miettinen_peat_zip] + uu.log_subprocess_output_full(cmd) + + # Unzips the Xu et al. peat shapefile (>40 deg N) + cmd = ['unzip', '-o', '-j', cn.Xu_peat_zip] + uu.log_subprocess_output_full(cmd) + + # Converts the Miettinen IDN/MYS peat shapefile to a raster + uu.print_log('Rasterizing Miettinen map...') + cmd= ['gdal_rasterize', '-burn', '1', '-co', 'COMPRESS=DEFLATE', '-tr', '{}'.format(cn.Hansen_res), '{}'.format(cn.Hansen_res), + '-tap', '-ot', 'Byte', '-a_nodata', '0', cn.Miettinen_peat_shp, cn.Miettinen_peat_tif] + uu.log_subprocess_output_full(cmd) + uu.print_log(' Miettinen IDN/MYS peat rasterized') + + # Masks the Crezee raster to just the peat classes (codes 4 and 5). + uu.print_log('Masking Crezee map to just peat class...') + Crezee_calc = f'--calc=(A>=4)' + Crezee_outfilearg = f'--outfile={cn.Crezee_peat_name}' + cmd = ['gdal_calc.py', '-A', cn.Crezee_name, Crezee_calc, Crezee_outfilearg, + '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=DEFLATE', '--type', 'Byte'] + uu.log_subprocess_output_full(cmd) + + if cn.SINGLE_PROCESSOR: + for tile_id in tile_id_list: + peatland_processing.create_peat_mask_tiles(tile_id) + else: + processes = 70 #30=160 GB peak; 60=280 GB peak; 70=320 GB peak + uu.print_log('Peat map processors=', processes) + with multiprocessing.Pool(processes) as pool: + pool.map(peatland_processing.create_peat_mask_tiles, tile_id_list) + pool.close() + pool.join() + + + # No single-processor versions of these check-if-empty functions + output_pattern = output_pattern_list[0] + if cn.count <= 2: # For local tests + processes = 1 + uu.print_log(f'Checking for empty tiles of {output_pattern} pattern with {output_pattern} processors using light function...') + with multiprocessing.Pool(processes) as pool: + pool.map(partial(uu.check_and_delete_if_empty_light, output_pattern=output_pattern), tile_id_list) + pool.close() + pool.join() + else: + processes = 85 # 58 processors = 220 GB peak; 75=230 GB peak; 85=XXX GB peak + uu.print_log(f'Checking for empty tiles of {output_pattern} pattern with {output_pattern} processors...') + with multiprocessing.Pool(processes) as pool: + pool.map(partial(uu.check_and_delete_if_empty, output_pattern=output_pattern), tile_id_list) + pool.close() + pool.join() + + + # If no_upload flag is not activated (by choice or by lack of AWS credentials), output is uploaded + if not cn.NO_UPLOAD: + uu.upload_final_set(output_dir_list[0], output_pattern_list[0]) + + +if __name__ == '__main__': + + parser = argparse.ArgumentParser( + description='Creates tiles of the extent of peatlands') + parser.add_argument('--tile_id_list', '-l', required=True, + help='List of tile ids to use in the model. Should be of form 00N_110E or 00N_110E,00N_120E or all.') + parser.add_argument('--run-date', '-d', required=False, + help='Date of run. Must be format YYYYMMDD.') + parser.add_argument('--no-upload', '-nu', action='store_true', + help='Disables uploading of outputs to s3') + parser.add_argument('--single-processor', '-sp', action='store_true', + help='Uses single processing rather than multiprocessing') + args = parser.parse_args() + + # Sets global variables to the command line arguments + cn.SENSIT_TYPE = 'std' + cn.RUN_DATE = args.run_date + cn.NO_UPLOAD = args.no_upload + cn.SINGLE_PROCESSOR = args.single_processor + + tile_id_list = args.tile_id_list + + # Disables upload to s3 if no AWS credentials are found in environment + if not uu.check_aws_creds(): + cn.NO_UPLOAD = True + + # Create the output log + uu.initiate_log(tile_id_list) + + # Checks whether the sensitivity analysis and tile_id_list arguments are valid + uu.check_sensit_type(cn.SENSIT_TYPE) + tile_id_list = uu.tile_id_list_check(tile_id_list) + + mp_peatland_processing(tile_id_list=tile_id_list) \ No newline at end of file diff --git a/data_prep/mp_plantation_preparation.py b/data_prep/mp_plantation_preparation.py index 54d6f47f..06f215dc 100644 --- a/data_prep/mp_plantation_preparation.py +++ b/data_prep/mp_plantation_preparation.py @@ -142,7 +142,7 @@ def mp_plantation_preparation(gadm_index_shp, planted_index_shp, tile_id_list, run_date = None, no_upload = None): - os.chdir(cn.docker_base_dir) + os.chdir(cn.docker_tile_dir) # ## Not actually using this but leaving it here in case I want to add this functionality eventually. This # # was to allow users to run plantations for a select (contiguous) area rather than for the whole planet. @@ -197,7 +197,7 @@ def mp_plantation_preparation(gadm_index_shp, planted_index_shp, tile_id_list, r uu.print_log("No GADM 1x1 tile index shapefile provided. Creating 1x1 planted forest country tiles from scratch...") # Downloads and unzips the GADM shapefile, which will be used to create 1x1 tiles of land areas - uu.s3_file_download(cn.gadm_path, cn.docker_base_dir) + uu.s3_file_download(cn.gadm_path, cn.docker_tile_dir) cmd = ['unzip', cn.gadm_zip] # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging process = Popen(cmd, stdout=PIPE, stderr=STDOUT) @@ -230,7 +230,7 @@ def mp_plantation_preparation(gadm_index_shp, planted_index_shp, tile_id_list, r # Creates a shapefile of the boundaries of the 1x1 GADM tiles in countries with planted forests os.system('''gdaltindex {0}_{1}.shp GADM_*.tif'''.format(cn.pattern_gadm_1x1_index, uu.date_time_today)) - cmd = ['aws', 's3', 'cp', cn.docker_base_dir, cn.gadm_plant_1x1_index_dir, '--exclude', '*', '--include', '{}*'.format(cn.pattern_gadm_1x1_index), '--recursive'] + cmd = ['aws', 's3', 'cp', cn.docker_tile_dir, cn.gadm_plant_1x1_index_dir, '--exclude', '*', '--include', '{}*'.format(cn.pattern_gadm_1x1_index), '--recursive'] # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging process = Popen(cmd, stdout=PIPE, stderr=STDOUT) @@ -268,7 +268,7 @@ def mp_plantation_preparation(gadm_index_shp, planted_index_shp, tile_id_list, r uu.print_log('{}/'.format(gadm_index_path)) # Copies the shapefile of 1x1 tiles of extent of countries with planted forests - cmd = ['aws', 's3', 'cp', '{}/'.format(gadm_index_path), cn.docker_base_dir, '--recursive', '--exclude', '*', '--include', '{}*'.format(gadm_index_shp)] + cmd = ['aws', 's3', 'cp', '{}/'.format(gadm_index_path), cn.docker_tile_dir, '--recursive', '--exclude', '*', '--include', '{}*'.format(gadm_index_shp)] # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging process = Popen(cmd, stdout=PIPE, stderr=STDOUT) @@ -315,7 +315,7 @@ def mp_plantation_preparation(gadm_index_shp, planted_index_shp, tile_id_list, r # Creates a shapefile in which each feature is the extent of a plantation extent tile. # This index shapefile can be used the next time this process is run if starting with Entry Point 3. os.system('''gdaltindex {0}_{1}.shp plant_gain_*.tif'''.format(cn.pattern_plant_1x1_index, uu.date_time_today)) - cmd = ['aws', 's3', 'cp', cn.docker_base_dir, cn.gadm_plant_1x1_index_dir, '--exclude', '*', '--include', '{}*'.format(cn.pattern_plant_1x1_index), '--recursive'] + cmd = ['aws', 's3', 'cp', cn.docker_tile_dir, cn.gadm_plant_1x1_index_dir, '--exclude', '*', '--include', '{}*'.format(cn.pattern_plant_1x1_index), '--recursive'] # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging process = Popen(cmd, stdout=PIPE, stderr=STDOUT) @@ -331,7 +331,7 @@ def mp_plantation_preparation(gadm_index_shp, planted_index_shp, tile_id_list, r uu.print_log("Planted forest 1x1 tile index shapefile supplied. Using that to create 1x1 planted forest growth rate and forest type tiles...") # Copies the shapefile of 1x1 tiles of extent of planted forests - cmd = ['aws', 's3', 'cp', '{}/'.format(planted_index_path), cn.docker_base_dir, '--recursive', '--exclude', '*', '--include', + cmd = ['aws', 's3', 'cp', '{}/'.format(planted_index_path), cn.docker_tile_dir, '--recursive', '--exclude', '*', '--include', '{}*'.format(planted_index_shp), '--recursive'] # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging @@ -477,7 +477,7 @@ def mp_plantation_preparation(gadm_index_shp, planted_index_shp, tile_id_list, r args = parser.parse_args() tile_id_list = args.tile_id_list run_date = args.run_date - no_upload = args.no_upload + no_upload = args.NO_UPLOAD # Creates the directory and shapefile names for the two possible arguments (index shapefiles) gadm_index = os.path.split(args.gadm_tile_index) @@ -494,7 +494,7 @@ def mp_plantation_preparation(gadm_index_shp, planted_index_shp, tile_id_list, r no_upload = True # Create the output log - uu.initiate_log(tile_id_list=tile_id_list, sensit_type=sensit_type, run_date=run_date, no_upload=no_upload) + uu.initiate_log(tile_id_list) # Checks whether the sensitivity analysis and tile_id_list arguments are valid uu.check_sensit_type(sensit_type) diff --git a/data_prep/mp_prep_other_inputs_annual.py b/data_prep/mp_prep_other_inputs_annual.py new file mode 100644 index 00000000..99249042 --- /dev/null +++ b/data_prep/mp_prep_other_inputs_annual.py @@ -0,0 +1,202 @@ +''' +This script processes the inputs for the emissions script that haven't been processed by another script. +At this point, that is: climate zone, Indonesia/Malaysia plantations before 2000, tree cover loss drivers (TSC drivers), +combining IFL2000 (extratropics) and primary forests (tropics) into a single layer, +Hansenizing some removal factor standard deviation inputs, Hansenizing the European removal factors, +and Hansenizing three US-specific removal factor inputs. + +python -m data_prep.mp_prep_other_inputs_annual -l 00N_000E -nu +python -m data_prep.mp_prep_other_inputs_annual -l all +''' + +import argparse +import multiprocessing +import datetime +import glob +from functools import partial +import sys +import os + +import constants_and_names as cn +import universal_util as uu + +def mp_prep_other_inputs(tile_id_list): + + os.chdir(cn.docker_tile_dir) + sensit_type='std' + + # If a full model run is specified, the correct set of tiles for the particular script is listed + if tile_id_list == 'all': + # List of tiles to run in the model + tile_id_list = uu.create_combined_tile_list( + [cn.WHRC_biomass_2000_unmasked_dir, cn.mangrove_biomass_2000_dir, cn.gain_dir, cn.tcd_dir, + cn.annual_gain_AGC_BGC_planted_forest_unmasked_dir] + ) + + uu.print_log(tile_id_list) + uu.print_log(f'There are {str(len(tile_id_list))} tiles to process', "\n") + + ''' + Before processing the driver, it needs to be reprojected from Goode Homolosine to WGS84. + gdal_warp is producing a weird output, so I did it in ArcMap for the 2022 update, + with the output cell size being 0.005 x 0.005 degree and the method being nearest. + + arcpy.management.ProjectRaster("TCL_DD_2022_20230407.tif", r"C:\GIS\raw_data\TCL_DD_2022_20230407_wgs84.tif", + 'GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137.0,298.257223563]],PRIMEM["Greenwich",0.0], + UNIT["Degree",0.0174532925199433]]', "NEAREST", "0.005 0.005", None, None, 'PROJCS["WGS_1984_Goode_Homolosine", + GEOGCS["GCS_unknown",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137.0,298.257223563]],PRIMEM["Greenwich",0.0], + UNIT["Degree",0.0174532925199433]],PROJECTION["Goode_Homolosine"],PARAMETER["False_Easting",0.0], + PARAMETER["False_Northing",0.0],PARAMETER["Central_Meridian",0.0],PARAMETER["Option",1.0],UNIT["Meter",1.0]]', "NO_VERTICAL") + + + The 2022 drivers had 0 instead of NoData, so I used Copy Raster to turn the 0 into NoData: + arcpy.management.CopyRaster("TCL_DD_2022_20230407_wgs84.tif", + r"C:\GIS\raw_data\TCL_DD_2022_20230407_wgs84_setnodata.tif", '', None, "0", "NONE", "NONE", '', "NONE", "NONE", "TIFF", "NONE", + "CURRENT_SLICE", "NO_TRANSPOSE") + + ''' + + # List of output directories and output file name patterns + output_dir_list = [ + cn.drivers_processed_dir + # ,cn.TCLF_processed_dir + ] + output_pattern_list = [ + cn.pattern_drivers + # ,cn.pattern_TCLF_processed + ] + + + # If the model run isn't the standard one, the output directory and file names are changed + if sensit_type != 'std': + + uu.print_log('Changing output directory and file name pattern based on sensitivity analysis') + output_dir_list = uu.alter_dirs(sensit_type, output_dir_list) + output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list) + + + # A date can optionally be provided by the full model script or a run of this script. + # This replaces the date in constants_and_names. + # Only done if output upload is enabled. + if cn.RUN_DATE is not None and cn.NO_UPLOAD is False: + output_dir_list = uu.replace_output_dir_date(output_dir_list, cn.RUN_DATE) + + + ### Drivers of tree cover loss processing + uu.print_log("STEP 1: Preprocess drivers of tree cover loss") + + uu.s3_file_download(os.path.join(cn.drivers_raw_dir, cn.pattern_drivers_raw), cn.docker_tile_dir, sensit_type) + + # Creates tree cover loss driver tiles. + # The raw driver tile should have NoData for unassigned drivers as opposed to 0 for unassigned drivers. + # For the 2020 driver update, I reclassified the 0 values as NoData in ArcMap. I also unprojected the global drivers + # map to WGS84 because running the homolosine projection that Jimmy provided was giving incorrect processed results. + source_raster = cn.pattern_drivers_raw + out_pattern = cn.pattern_drivers + dt = 'Byte' + if cn.count == 96: + processes = 87 # 45 processors = 70 GB peak; 70 = 90 GB peak; 80 = 100 GB peak; 87 = 125 GB peak + else: + processes = int(cn.count/2) + uu.print_log("Creating tree cover loss driver tiles with {} processors...".format(processes)) + pool = multiprocessing.Pool(processes) + pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), + tile_id_list) + pool.close() + pool.join() + + + # ### Tree cover loss from fires processing + # uu.print_log("STEP 2: Preprocess tree cover loss from fires") + # + # # TCLF is downloaded to its own folder because it doesn't have a standardized file name pattern. + # # This way, the entire contents of the TCLF folder can be worked on without mixing with other files. + # TCLF_s3_dir = os.path.join(cn.docker_tile_dir, 'TCLF') + # if os.path.exists(TCLF_s3_dir): + # os.rmdir(TCLF_s3_dir) + # os.mkdir(TCLF_s3_dir) + # cmd = ['aws', 's3', 'cp', cn.TCLF_raw_dir, TCLF_s3_dir, '--request-payer', 'requester', + # '--include', '*', '--exclude', 'tiles*', '--exclude', '*geojason', '--exclude', '*Store', '--recursive'] + # uu.log_subprocess_output_full(cmd) + # + # # Creates global vrt of TCLF + # uu.print_log("Creating vrt of TCLF...") + # tclf_vrt = 'TCLF.vrt' + # os.system(f'gdalbuildvrt -srcnodata 0 {tclf_vrt} {TCLF_s3_dir}/*.tif') + # uu.print_log(" TCLF vrt created") + # + # # Creates TCLF tiles + # source_raster = tclf_vrt + # out_pattern = cn.pattern_TCLF_processed + # dt = 'Byte' + # if cn.count == 96: + # processes = 34 # 30 = 510 GB initial peak; 34=600 GB peak + # else: + # processes = int(cn.count/2) + # uu.print_log(f'Creating TCLF tiles with {processes} processors...') + # pool = multiprocessing.Pool(processes) + # pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list) + # pool.close() + # pool.join() + + + for output_pattern in [ + cn.pattern_drivers + # ,cn.pattern_TCLF_processed + ]: + + if cn.count == 96: + processes = 50 # 60 processors = >730 GB peak (for European natural forest forest removal rates); 50 = XXX GB peak + uu.print_log(f'Checking for empty tiles of {output_pattern} pattern with {processes} processors...') + pool = multiprocessing.Pool(processes) + pool.map(partial(uu.check_and_delete_if_empty, output_pattern=output_pattern), tile_id_list) + pool.close() + pool.join() + elif cn.count <= 2: # For local tests + processes = 1 + uu.print_log(f'Checking for empty tiles of {output_pattern} pattern with {processes} processors using light function...') + pool = multiprocessing.Pool(processes) + pool.map(partial(uu.check_and_delete_if_empty_light, output_pattern=output_pattern), tile_id_list) + pool.close() + pool.join() + else: + processes = int(cn.count / 2) + uu.print_log(f'Checking for empty tiles of {output_pattern} pattern with {processes} processors...') + pool = multiprocessing.Pool(processes) + pool.map(partial(uu.check_and_delete_if_empty, output_pattern=output_pattern), tile_id_list) + pool.close() + pool.join() + uu.print_log("\n") + + + # Uploads output tiles to s3 + for i in range(0, len(output_dir_list)): + uu.upload_final_set(output_dir_list[i], output_pattern_list[i]) + + +if __name__ == '__main__': + + parser = argparse.ArgumentParser( + description='Create tiles of the annual AGB and BGB removals rates for mangrove forests') + parser.add_argument('--tile_id_list', '-l', required=True, + help='List of tile ids to use in the model. Should be of form 00N_110E or 00N_110E,00N_120E or all.') + parser.add_argument('--run-date', '-d', required=False, + help='Date of run. Must be format YYYYMMDD.') + parser.add_argument('--no-upload', '-nu', action='store_true', + help='Disables uploading of outputs to s3') + args = parser.parse_args() + tile_id_list = args.tile_id_list + run_date = args.run_date + cn.NO_UPLOAD = args.no_upload + + # Disables upload to s3 if no AWS credentials are found in environment + if not uu.check_aws_creds(): + cn.NO_UPLOAD = True + + # Create the output log + uu.initiate_log(tile_id_list) + + # Checks whether the tile_id_list argument is valid + tile_id_list = uu.tile_id_list_check(tile_id_list) + + mp_prep_other_inputs(tile_id_list=tile_id_list) \ No newline at end of file diff --git a/data_prep/mp_prep_other_inputs.py b/data_prep/mp_prep_other_inputs_one_off.py similarity index 54% rename from data_prep/mp_prep_other_inputs.py rename to data_prep/mp_prep_other_inputs_one_off.py index de38b6f2..d5506f1f 100644 --- a/data_prep/mp_prep_other_inputs.py +++ b/data_prep/mp_prep_other_inputs_one_off.py @@ -4,83 +4,73 @@ combining IFL2000 (extratropics) and primary forests (tropics) into a single layer, Hansenizing some removal factor standard deviation inputs, Hansenizing the European removal factors, and Hansenizing three US-specific removal factor inputs. + +python -m data_prep.mp_prep_other_inputs_one_off -l 00N_000E -nu +python -m data_prep.mp_prep_other_inputs_one_off -l all ''' -from subprocess import Popen, PIPE, STDOUT, check_call import argparse import multiprocessing import datetime from functools import partial -import sys +import rioxarray as rio import os -import prep_other_inputs +import sys +import xarray as xr -sys.path.append('../') import constants_and_names as cn import universal_util as uu -def mp_prep_other_inputs(tile_id_list, run_date, no_upload = None): +from . import prep_other_inputs_one_off - os.chdir(cn.docker_base_dir) +def mp_prep_other_inputs(tile_id_list): + + os.chdir(cn.docker_tile_dir) sensit_type='std' # If a full model run is specified, the correct set of tiles for the particular script is listed if tile_id_list == 'all': # List of tiles to run in the model - ### BUG: THIS SHOULD ALSO INCLUDE cn.annual_gain_AGC_BGC_planted_forest_unmasked_dir IN ITS LIST - tile_id_list = uu.create_combined_tile_list(cn.WHRC_biomass_2000_unmasked_dir, - cn.mangrove_biomass_2000_dir, - set3=cn.gain_dir - ) + tile_id_list = uu.create_combined_tile_list( + [cn.WHRC_biomass_2000_unmasked_dir, cn.mangrove_biomass_2000_dir, cn.gain_dir, cn.tcd_dir, + cn.annual_gain_AGC_BGC_planted_forest_unmasked_dir] + ) uu.print_log(tile_id_list) - uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n") - - ''' - Before processing the driver, it needs to be reprojected from Goode Homolosine to WGS84. - gdal_warp is producing a weird output, so I did it in ArcMap for the 2020 update, - with the output cell size being 0.01 x 0.01 degree and the method being nearest. - - arcpy.ProjectRaster_management(in_raster="C:/GIS/Drivers of loss/2020_drivers__tif__from_Forrest_Follett_20210323/FinalClassification_2020_v2__from_Jimmy_MacCarthy_20210323.tif", - out_raster="C:/GIS/Drivers of loss/2020_drivers__tif__from_Forrest_Follett_20210323/Final_Classification_2020__reproj_nearest_0-005_0-005_deg__20210323.tif", - out_coor_system="GEOGCS['GCS_WGS_1984',DATUM['D_WGS_1984',SPHEROID['WGS_1984',6378137.0,298.257223563]],PRIMEM['Greenwich',0.0],UNIT['Degree',0.0174532925199433]]", - resampling_type="NEAREST", cell_size="0.005 0.005", geographic_transform="", - Registration_Point="", - in_coor_system="PROJCS['WGS_1984_Goode_Homolosine',GEOGCS['GCS_unknown',DATUM['D_WGS_1984',SPHEROID['WGS_1984',6378137.0,298.257223563]],PRIMEM['Greenwich',0.0],UNIT['Degree',0.0174532925199433]],PROJECTION['Goode_Homolosine'],PARAMETER['False_Easting',0.0],PARAMETER['False_Northing',0.0],PARAMETER['Central_Meridian',0.0],PARAMETER['Option',1.0],UNIT['Meter',1.0]]", - vertical="NO_VERTICAL") - ''' + uu.print_log(f'There are {str(len(tile_id_list))} tiles to process', "\n") + # List of output directories and output file name patterns output_dir_list = [ - # cn.climate_zone_processed_dir, cn.plant_pre_2000_processed_dir, - cn.drivers_processed_dir - # cn.ifl_primary_processed_dir, - # cn.annual_gain_AGC_natrl_forest_young_dir, - # cn.stdev_annual_gain_AGC_natrl_forest_young_dir, - # cn.annual_gain_AGC_BGC_natrl_forest_Europe_dir, - # cn.stdev_annual_gain_AGC_BGC_natrl_forest_Europe_dir, - # cn.FIA_forest_group_processed_dir, - # cn.age_cat_natrl_forest_US_dir, - # cn.FIA_regions_processed_dir + cn.climate_zone_processed_dir, cn.plant_pre_2000_processed_dir, + cn.ifl_primary_processed_dir, + cn.annual_gain_AGC_natrl_forest_young_dir, + cn.stdev_annual_gain_AGC_natrl_forest_young_dir, + cn.annual_gain_AGC_BGC_natrl_forest_Europe_dir, + cn.stdev_annual_gain_AGC_BGC_natrl_forest_Europe_dir, + cn.FIA_forest_group_processed_dir, + cn.age_cat_natrl_forest_US_dir, + cn.FIA_regions_processed_dir, + cn.BGB_AGB_ratio_dir ] output_pattern_list = [ - # cn.pattern_climate_zone, cn.pattern_plant_pre_2000, - cn.pattern_drivers - # cn.pattern_ifl_primary, - # cn.pattern_annual_gain_AGC_natrl_forest_young, - # cn.pattern_stdev_annual_gain_AGC_natrl_forest_young, - # cn.pattern_annual_gain_AGC_BGC_natrl_forest_Europe, - # cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_Europe, - # cn.pattern_FIA_forest_group_processed, - # cn.pattern_age_cat_natrl_forest_US, - # cn.pattern_FIA_regions_processed + cn.pattern_climate_zone, cn.pattern_plant_pre_2000, + cn.pattern_ifl_primary, + cn.pattern_annual_gain_AGC_natrl_forest_young, + cn.pattern_stdev_annual_gain_AGC_natrl_forest_young, + cn.pattern_annual_gain_AGC_BGC_natrl_forest_Europe, + cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_Europe, + cn.pattern_FIA_forest_group_processed, + cn.pattern_age_cat_natrl_forest_US, + cn.pattern_FIA_regions_processed, + cn.pattern_BGB_AGB_ratio ] # If the model run isn't the standard one, the output directory and file names are changed if sensit_type != 'std': - uu.print_log("Changing output directory and file name pattern based on sensitivity analysis") + uu.print_log('Changing output directory and file name pattern based on sensitivity analysis') output_dir_list = uu.alter_dirs(sensit_type, output_dir_list) output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list) @@ -88,14 +78,13 @@ def mp_prep_other_inputs(tile_id_list, run_date, no_upload = None): # A date can optionally be provided by the full model script or a run of this script. # This replaces the date in constants_and_names. # Only done if output upload is enabled. - if run_date is not None and no_upload is not None: - output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date) + if cn.RUN_DATE is not None and no_upload is not None: + output_dir_list = uu.replace_output_dir_date(output_dir_list, cn.RUN_DATE) # # Files to process: climate zone, IDN/MYS plantations before 2000, tree cover loss drivers, combine IFL and primary forest # uu.s3_file_download(os.path.join(cn.climate_zone_raw_dir, cn.climate_zone_raw), cn.docker_base_dir, sensit_type) # uu.s3_file_download(os.path.join(cn.plant_pre_2000_raw_dir, '{}.zip'.format(cn.pattern_plant_pre_2000_raw)), cn.docker_base_dir, sensit_type) - uu.s3_file_download(os.path.join(cn.drivers_raw_dir, cn.pattern_drivers_raw), cn.docker_base_dir, sensit_type) # uu.s3_file_download(os.path.join(cn.annual_gain_AGC_BGC_natrl_forest_Europe_raw_dir, cn.name_annual_gain_AGC_BGC_natrl_forest_Europe_raw), cn.docker_base_dir, sensit_type) # uu.s3_file_download(os.path.join(cn.stdev_annual_gain_AGC_BGC_natrl_forest_Europe_raw_dir, cn.name_stdev_annual_gain_AGC_BGC_natrl_forest_Europe_raw), cn.docker_base_dir, sensit_type) # uu.s3_file_download(os.path.join(cn.FIA_regions_raw_dir, cn.name_FIA_regions_raw), cn.docker_base_dir, sensit_type) @@ -104,9 +93,7 @@ def mp_prep_other_inputs(tile_id_list, run_date, no_upload = None): # # For some reason, using uu.s3_file_download or otherwise using AWSCLI as a subprocess doesn't work for this raster. # # Thus, using wget instead. # cmd = ['wget', '{}'.format(cn.annual_gain_AGC_natrl_forest_young_raw_URL), '-P', '{}'.format(cn.docker_base_dir)] - # process = Popen(cmd, stdout=PIPE, stderr=STDOUT) - # with process.stdout: - # uu.log_subprocess_output(process.stdout) + # uu.log_subprocess_output_full(cmd) # uu.s3_file_download(cn.stdev_annual_gain_AGC_natrl_forest_young_raw_URL, cn.docker_base_dir, sensit_type) # cmd = ['aws', 's3', 'cp', cn.primary_raw_dir, cn.docker_base_dir, '--recursive'] # uu.log_subprocess_output_full(cmd) @@ -117,26 +104,8 @@ def mp_prep_other_inputs(tile_id_list, run_date, no_upload = None): # cmd = ['unzip', '-j', '{}.zip'.format(cn.pattern_plant_pre_2000_raw)] # uu.log_subprocess_output_full(cmd) - # Creates tree cover loss driver tiles. - # The raw driver tile should have NoData for unassigned drivers as opposed to 0 for unassigned drivers. - # For the 2020 driver update, I reclassified the 0 values as NoData in ArcMap. I also unprojected the global drivers - # map to WGS84 because running the homolosine projection that Jimmy provided was giving incorrect processed results. - source_raster = cn.pattern_drivers_raw - out_pattern = cn.pattern_drivers - dt = 'Byte' - if cn.count == 96: - processes = 87 # 45 processors = 70 GB peak; 70 = 90 GB peak; 80 = 100 GB peak; 87 = 125 GB peak - else: - processes = int(cn.count/2) - uu.print_log("Creating tree cover loss driver tiles with {} processors...".format(processes)) - pool = multiprocessing.Pool(processes) - pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt, - no_upload=no_upload), tile_id_list) - pool.close() - pool.join() - - # # Creates young natural forest removal rate tiles + # ### Creates young natural forest removal rate tiles # source_raster = cn.name_annual_gain_AGC_natrl_forest_young_raw # out_pattern = cn.pattern_annual_gain_AGC_natrl_forest_young # dt = 'float32' @@ -146,11 +115,12 @@ def mp_prep_other_inputs(tile_id_list, run_date, no_upload = None): # processes = int(cn.count/2) # uu.print_log("Creating young natural forest removals rate tiles with {} processors...".format(processes)) # pool = multiprocessing.Pool(processes) - # pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt, no_upload=no_upload), tile_id_list) + # pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list) # pool.close() # pool.join() # - # # Creates young natural forest removal rate standard deviation tiles + # + # ### Creates young natural forest removal rate standard deviation tiles # source_raster = cn.name_stdev_annual_gain_AGC_natrl_forest_young_raw # out_pattern = cn.pattern_stdev_annual_gain_AGC_natrl_forest_young # dt = 'float32' @@ -160,12 +130,12 @@ def mp_prep_other_inputs(tile_id_list, run_date, no_upload = None): # processes = int(cn.count/2) # uu.print_log("Creating standard deviation for young natural forest removal rate tiles with {} processors...".format(processes)) # pool = multiprocessing.Pool(processes) - # pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt, no_upload=no_upload), tile_id_list) + # pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list) # pool.close() # pool.join() # # - # # Creates pre-2000 oil palm plantation tiles + # ### Creates pre-2000 oil palm plantation tiles # if cn.count == 96: # processes = 80 # 45 processors = 100 GB peak; 80 = XXX GB peak # else: @@ -177,7 +147,7 @@ def mp_prep_other_inputs(tile_id_list, run_date, no_upload = None): # pool.join() # # - # # Creates climate zone tiles + # ### Creates climate zone tiles # if cn.count == 96: # processes = 80 # 45 processors = 230 GB peak (on second step); 80 = XXX GB peak # else: @@ -188,7 +158,8 @@ def mp_prep_other_inputs(tile_id_list, run_date, no_upload = None): # pool.close() # pool.join() # - # # Creates European natural forest removal rate tiles + # + # ### Creates European natural forest removal rate tiles # source_raster = cn.name_annual_gain_AGC_BGC_natrl_forest_Europe_raw # out_pattern = cn.pattern_annual_gain_AGC_BGC_natrl_forest_Europe # dt = 'float32' @@ -198,11 +169,12 @@ def mp_prep_other_inputs(tile_id_list, run_date, no_upload = None): # processes = int(cn.count/2) # uu.print_log("Creating European natural forest removals rate tiles with {} processors...".format(processes)) # pool = multiprocessing.Pool(processes) - # pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt, no_upload=no_upload), tile_id_list) + # pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list) # pool.close() # pool.join() # - # # Creates European natural forest standard deviation of removal rate tiles + # + # ### Creates European natural forest standard deviation of removal rate tiles # source_raster = cn.name_stdev_annual_gain_AGC_BGC_natrl_forest_Europe_raw # out_pattern = cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_Europe # dt = 'float32' @@ -212,11 +184,12 @@ def mp_prep_other_inputs(tile_id_list, run_date, no_upload = None): # processes = int(cn.count/2) # uu.print_log("Creating standard deviation for European natural forest removals rate tiles with {} processors...".format(processes)) # pool = multiprocessing.Pool(processes) - # pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt, no_upload=no_upload), tile_id_list) + # pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list) # pool.close() # pool.join() # # + # ### Creates humid tropical primary forest tiles # # Creates a vrt of the primary forests with nodata=0 from the continental primary forest rasters # uu.print_log("Creating vrt of humid tropial primary forest...") # primary_vrt = 'primary_2001.vrt' @@ -233,12 +206,12 @@ def mp_prep_other_inputs(tile_id_list, run_date, no_upload = None): # processes = int(cn.count/2) # uu.print_log("Creating primary forest tiles with {} processors...".format(processes)) # pool = multiprocessing.Pool(processes) - # pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt, no_upload=no_upload), tile_id_list) + # pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list) # pool.close() # pool.join() # # - # # Creates a combined IFL/primary forest raster + # ### Creates a combined IFL/primary forest raster # # Uses very little memory since it's just file renaming # if cn.count == 96: # processes = 60 # 60 processors = 10 GB peak @@ -251,7 +224,7 @@ def mp_prep_other_inputs(tile_id_list, run_date, no_upload = None): # pool.join() # # - # # Creates forest age category tiles for US forests + # ### Creates forest age category tiles for US forests # source_raster = cn.name_age_cat_natrl_forest_US_raw # out_pattern = cn.pattern_age_cat_natrl_forest_US # dt = 'Byte' @@ -261,11 +234,11 @@ def mp_prep_other_inputs(tile_id_list, run_date, no_upload = None): # processes = int(cn.count/2) # uu.print_log("Creating US forest age category tiles with {} processors...".format(processes)) # pool = multiprocessing.Pool(processes) - # pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt, no_upload=no_upload), tile_id_list) + # pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list) # pool.close() # pool.join() # - # # Creates forest groups for US forests + # ### Creates forest groups for US forests # source_raster = cn.name_FIA_forest_group_raw # out_pattern = cn.pattern_FIA_forest_group_processed # dt = 'Byte' @@ -275,11 +248,11 @@ def mp_prep_other_inputs(tile_id_list, run_date, no_upload = None): # processes = int(cn.count/2) # uu.print_log("Creating US forest group tiles with {} processors...".format(processes)) # pool = multiprocessing.Pool(processes) - # pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt, no_upload=no_upload), tile_id_list) + # pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list) # pool.close() # pool.join() # - # # Creates FIA regions for US forests + # ### Creates FIA regions for US forests # source_raster = cn.name_FIA_regions_raw # out_pattern = cn.pattern_FIA_regions_processed # dt = 'Byte' @@ -289,13 +262,131 @@ def mp_prep_other_inputs(tile_id_list, run_date, no_upload = None): # processes = int(cn.count/2) # uu.print_log("Creating US forest region tiles with {} processors...".format(processes)) # pool = multiprocessing.Pool(processes) - # pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt, no_upload=no_upload), tile_id_list) + # pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list) # pool.close() # pool.join() + + + ### Creates Hansen tiles of AGB:BGB based on Huang et al. 2021: https://essd.copernicus.org/articles/13/4263/2021/ + + # uu.print_log("Downloading raw NetCDF files...") + # cmd = ['aws', 's3', 'cp', cn.AGB_BGB_Huang_raw_dir, '.'] + # uu.log_subprocess_output_full(cmd) + + # # Converts the AGB and BGB NetCDF files to global geotifs. + # # Note that, for some reason, this isn't working in Docker locally; when it gets to the to_raster step, it keeps + # # saying "Killed", perhaps because it's running out of memory (1.87/1.95 GB used). + # # So I did this in Python shell locally outside Docker and it worked fine. + # # Methods for converting NetCDF4 to geotif are from approach 1 at + # # https://help.marine.copernicus.eu/en/articles/5029956-how-to-convert-netcdf-to-geotiff + # # Compression argument from: https://github.com/corteva/rioxarray/issues/112 + # agb = xr.open_dataset(cn.name_raw_AGB_Huang_global) + # # uu.print_log(agb) + # agb_den = agb['ASHOOT'] + # # uu.print_log(agb_den) + # agb_den = agb_den.rio.set_spatial_dims(x_dim='LON', y_dim='LAT') + # uu.print_log(agb_den) + # agb_den.rio.write_crs("epsg:4326", inplace=True) + # # Produces: + # # ERROR 1: PROJ: proj_create_from_database: C:\Program Files\GDAL\projlib\proj.db lacks DATABASE.LAYOUT.VERSION.MAJOR / DATABASE.LAYOUT.VERSION.MINOR metadata. It comes from another PROJ installation. + # # followed by NetCDF properties. But I think this error isn't a problem; the resulting geotif seems fine. + # agb_den.rio.to_raster(cn.name_rasterized_AGB_Huang_global, compress='DEFLATE') + # # Produces: + # # ERROR 1: PROJ: proj_create_from_name: C:\Program Files\GDAL\projlib\proj.db lacks DATABASE.LAYOUT.VERSION.MAJOR / DATABASE.LAYOUT.VERSION.MINOR metadata. It comes from another PROJ installation. + # # ERROR 1: PROJ: proj_create_from_database: C:\Program Files\GDAL\projlib\proj.db lacks DATABASE.LAYOUT.VERSION.MAJOR / DATABASE.LAYOUT.VERSION.MINOR metadata. It comes from another PROJ installation. + # # But I think this error isn't a problem; the resulting geotif seems fine. + # + # bgb = xr.open_dataset(cn.name_raw_BGB_Huang_global) + # # uu.print_log(bgb) + # bgb_den = bgb['AROOT'] + # # uu.print_log(bgb_den) + # bgb_den = bgb_den.rio.set_spatial_dims(x_dim='LON', y_dim='LAT') + # uu.print_log(bgb_den) + # bgb_den.rio.write_crs("epsg:4326", inplace=True) + # # Produces: + # # ERROR 1: PROJ: proj_create_from_database: C:\Program Files\GDAL\projlib\proj.db lacks DATABASE.LAYOUT.VERSION.MAJOR / DATABASE.LAYOUT.VERSION.MINOR metadata. It comes from another PROJ installation. + # # followed by NetCDF properties. But I think this error isn't a problem; the resulting geotif seems fine. + # bgb_den.rio.to_raster(cn.name_rasterized_BGB_Huang_global, compress='DEFLATE') + # # Produces: + # # ERROR 1: PROJ: proj_create_from_name: C:\Program Files\GDAL\projlib\proj.db lacks DATABASE.LAYOUT.VERSION.MAJOR / DATABASE.LAYOUT.VERSION.MINOR metadata. It comes from another PROJ installation. + # # ERROR 1: PROJ: proj_create_from_database: C:\Program Files\GDAL\projlib\proj.db lacks DATABASE.LAYOUT.VERSION.MAJOR / DATABASE.LAYOUT.VERSION.MINOR metadata. It comes from another PROJ installation. + # # But I think this error isn't a problem; the resulting geotif seems fine. + + # uu.print_log("Generating global BGB:AGB map...") # + # out = f'--outfile={cn.name_rasterized_BGB_AGB_Huang_global}' + # calc = '--calc=A/B' + # datatype = f'--type=Float32' # - for output_pattern in [cn.pattern_drivers - # ,cn.pattern_annual_gain_AGC_natrl_forest_young, cn.pattern_stdev_annual_gain_AGC_natrl_forest_young + # # Divides BGB by AGB to get BGB:AGB (root:shoot ratio) + # cmd = ['gdal_calc.py', '-A', cn.name_rasterized_BGB_Huang_global, '-B', cn.name_rasterized_AGB_Huang_global, + # calc, out, '--NoDataValue=0', '--co', 'COMPRESS=DEFLATE', '--overwrite', datatype, '--quiet'] + # uu.log_subprocess_output_full(cmd) + + # The resulting global BGB:AGB map has many gaps, as Huang et al. didn't map AGB and BGB on all land. + # Presumably, most of the places without BGB:AGB don't have much forest, but for completeness it seems good to + # fill the BGB:AGB map gaps, both internally and make sure that continental margins aren't left without BGB:AGB. + # I used gdal_fillnodata.py to do this (https://gdal.org/programs/gdal_fillnodata.html). I tried different + # --max_distance parameters, extending it until the interior of the Sahara was covered. Obviously, there's not much + # carbon flux in the interior of the Sahara but I wanted to have full land coverage, which meant using + # --max_distance=1400 (pixels). Times for different --max_distance values are below. + # I didn't experiment with the --smooth_iterations parameter. + # I confirmed that gdal_fillnodata wasn't changing the original BGB:AGB raster and was just filling the gaps. + # The pixels it assigned to the gaps looked plausible. + + # # time gdal_fillnodata.py BGB_AGB_ratio_global_from_Huang_2021__20230201.tif BGB_AGB_ratio_global_from_Huang_2021__20230201_extended_10.tif -co COMPRESS=DEFLATE -md 10 + # # real 5m7.600s; 6m17.684s + # # user 5m7.600s; 5m38.180s + # # sys 0m5.560s; 0m6.710s + # # + # # time gdal_fillnodata.py BGB_AGB_ratio_global_from_Huang_2021__20230201.tif BGB_AGB_ratio_global_from_Huang_2021__20230201_extended_100.tif -co COMPRESS=DEFLATE -md 100 + # # real 7m44.302s + # # user 7m24.310s + # # sys 0m4.160s + # # + # # time gdal_fillnodata.py BGB_AGB_ratio_global_from_Huang_2021__20230201.tif BGB_AGB_ratio_global_from_Huang_2021__20230201_extended_1000.tif -co COMPRESS=DEFLATE -md 1000 + # # real 51m55.893s + # # user 51m25.800s + # # sys 0m6.510s + # # + # # time gdal_fillnodata.py BGB_AGB_ratio_global_from_Huang_2021__20230201.tif BGB_AGB_ratio_global_from_Huang_2021__20230201_extended_1200.tif -co COMPRESS=DEFLATE -md 1200 + # # real 74m41.544s + # # user 74m5.130s + # # sys 0m7.070s + # # + # # time gdal_fillnodata.py BGB_AGB_ratio_global_from_Huang_2021__20230201.tif BGB_AGB_ratio_global_from_Huang_2021__20230201_extended_1400.tif -co COMPRESS=DEFLATE -md 1400 + # # real + # # user + # # sys + + # cmd = ['gdal_fillnodata.py', + # cn.name_rasterized_BGB_AGB_Huang_global, 'BGB_AGB_ratio_global_from_Huang_2021__20230201_extended_10.tif', + # '-co', 'COMPRESS=DEFLATE', '-md', '10'] + # uu.log_subprocess_output_full(cmd) + + # # upload_final_set isn't uploading the global BGB:AGB map for some reason. + # # It just doesn't show anything in the console and nothing gets uploaded. + # # But I'm not going to try to debug it since it's not an important part of the workflow. + # uu.upload_final_set(cn.AGB_BGB_Huang_rasterized_dir, '_global_from_Huang_2021') + + # Creates BGB:AGB tiles + source_raster = cn.name_rasterized_BGB_AGB_Huang_global_extended + out_pattern = cn.pattern_BGB_AGB_ratio + dt = 'Float32' + if cn.count == 96: + processes = 75 # 15=95 GB peak; 45=280 GB peak; 75=460 GB peak; 85=XXX GB peak + else: + processes = int(cn.count/2) + uu.print_log(f'Creating BGB:AGB {processes} processors...') + pool = multiprocessing.Pool(processes) + pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list) + pool.close() + pool.join() + + + for output_pattern in [ + # cn.pattern_annual_gain_AGC_natrl_forest_young, cn.pattern_stdev_annual_gain_AGC_natrl_forest_young, + cn.pattern_BGB_AGB_ratio ]: # For some reason I can't figure out, the young forest rasters (rate and stdev) have NaN values in some places where 0 (NoData) @@ -311,7 +402,7 @@ def mp_prep_other_inputs(tile_id_list, run_date, no_upload = None): pool.join() if cn.count == 96: - processes = 50 # 60 processors = >730 GB peak (for European natural forest forest removal rates); 50 = XXX GB peak + processes = 50 # 60 processors = >730 GB peak (for European natural forest forest removal rates); 50 = 600 GB peak uu.print_log("Checking for empty tiles of {0} pattern with {1} processors...".format(output_pattern, processes)) pool = multiprocessing.Pool(processes) pool.map(partial(uu.check_and_delete_if_empty, output_pattern=output_pattern), tile_id_list) @@ -331,7 +422,7 @@ def mp_prep_other_inputs(tile_id_list, run_date, no_upload = None): pool.map(partial(uu.check_and_delete_if_empty, output_pattern=output_pattern), tile_id_list) pool.close() pool.join() - uu.print_log('\n') + uu.print_log("\n") # Uploads output tiles to s3 @@ -349,19 +440,25 @@ def mp_prep_other_inputs(tile_id_list, run_date, no_upload = None): help='Date of run. Must be format YYYYMMDD.') parser.add_argument('--no-upload', '-nu', action='store_true', help='Disables uploading of outputs to s3') + parser.add_argument('--single-processor', '-sp', action='store_true', + help='Uses single processing rather than multiprocessing') args = parser.parse_args() + + # Sets global variables to the command line arguments + cn.RUN_DATE = args.run_date + cn.NO_UPLOAD = args.no_upload + cn.SINGLE_PROCESSOR = args.single_processor + tile_id_list = args.tile_id_list - run_date = args.run_date - no_upload = args.no_upload # Disables upload to s3 if no AWS credentials are found in environment if not uu.check_aws_creds(): - no_upload = True + cn.NO_UPLOAD = True # Create the output log - uu.initiate_log(tile_id_list=tile_id_list, run_date=run_date, no_upload=no_upload) + uu.initiate_log(tile_id_list) # Checks whether the tile_id_list argument is valid tile_id_list = uu.tile_id_list_check(tile_id_list) - mp_prep_other_inputs(tile_id_list=tile_id_list, run_date=run_date, no_upload=no_upload) \ No newline at end of file + mp_prep_other_inputs(tile_id_list=tile_id_list) \ No newline at end of file diff --git a/data_prep/mp_rewindow_tiles.py b/data_prep/mp_rewindow_tiles.py deleted file mode 100644 index 1c82d794..00000000 --- a/data_prep/mp_rewindow_tiles.py +++ /dev/null @@ -1,127 +0,0 @@ -''' -Rewindows tiles from 40000x1 pixels to 160x160 pixels for use in aggregate map creation. -Specifically, does tiles that are not model outputs but are used in aggregate map creation: -tree cover density, pixel area, Hansen gain, and mangrove biomass. -This must be done before the model is run so that the aggregate maps can be created successfully -(aggregate map pixels are the sum of the rewindowed 160x160 pixel windows). -''' - - -import multiprocessing -from subprocess import Popen, PIPE, STDOUT, check_call -from functools import partial -import datetime -import argparse -import os -import glob -import sys -sys.path.append('../') -import constants_and_names as cn -import universal_util as uu - - -def mp_rewindow_tiles(tile_id_list, run_date = None, no_upload = None): - - os.chdir(cn.docker_base_dir) - - # Sensitivity analysis model type is not used in this script - sensit_type = 'std' - - # Files to download for this script - download_dict = { - cn.pixel_area_dir: [cn.pattern_pixel_area], - cn.tcd_dir: [cn.pattern_tcd], - cn.gain_dir: [cn.pattern_gain], - cn.mangrove_biomass_2000_dir: [cn.pattern_mangrove_biomass_2000] - } - - uu.print_log("Layers to process are:", download_dict) - - # List of output directories. Mut match order of output patterns. - output_dir_list = [cn.pixel_area_rewindow_dir, cn.tcd_rewindow_dir, - cn.gain_rewindow_dir, cn.mangrove_biomass_2000_rewindow_dir] - - # List of output patterns. Must match order of output directories. - output_pattern_list = [cn.pattern_pixel_area_rewindow, cn.pattern_tcd_rewindow, - cn.pattern_gain_rewindow, cn.pattern_mangrove_biomass_2000_rewindow] - - # A date can optionally be provided. - # This replaces the date in constants_and_names. - # Only done if output upload is enabled. - if run_date is not None and no_upload is not None: - output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date) - - - # Iterates through the types of tiles to be processed - for dir, download_pattern in list(download_dict.items()): - - download_pattern_name = download_pattern[0] - - # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list - # If a full model run is specified, the correct set of tiles for the particular script is listed - if tile_id_list == 'all': - # List of tiles to run in the model - tile_id_list = uu.tile_list_s3(dir, sensit_type) - - uu.s3_flexible_download(dir, download_pattern_name, cn.docker_base_dir, sensit_type, tile_id_list) - - uu.print_log("There are {0} tiles to process for pattern {1}".format(str(len(tile_id_list)), download_pattern_name) + "\n") - uu.print_log("Processing:", dir, "; ", download_pattern_name) - - - # Converts the 10x10 degree Hansen tiles that are in windows of 40000x1 pixels to windows of 160x160 pixels - if cn.count == 96: - # For pixel area: 40 processors = 480 GB peak; 54 = 650 GB peak; 56 = XXX GB peak; 62 = >750 GB peak. - # Much more memory used for pixel area than for other inputs. - processes = 56 - else: - processes = 8 - uu.print_log('Rewindow max processors=', processes) - pool = multiprocessing.Pool(processes) - pool.map(partial(uu.rewindow, download_pattern_name=download_pattern_name, - no_upload=no_upload), tile_id_list) - pool.close() - pool.join() - - # # For single processor use - # for tile_id in tile_id_list: - # - # uu.rewindow(tile_id, download_pattern_name, no_upload) - - - # If no_upload flag is not activated (by choice or by lack of AWS credentials), output is uploaded - if not no_upload: - - uu.print_log("Tiles processed. Uploading to s3 now...") - for i in range(0, len(output_dir_list)): - uu.upload_final_set(output_dir_list[i], output_pattern_list[i]) - - - -if __name__ == '__main__': - - # The argument for what kind of model run is being done: standard conditions or a sensitivity analysis run - parser = argparse.ArgumentParser( - description='Creates 160x160 pixel rewindowed basic input tiles (TCD, gain, mangroves, pixel area)') - parser.add_argument('--tile_id_list', '-l', required=True, - help='List of tile ids to use in the model. Should be of form 00N_110E or 00N_110E,00N_120E or all.') - parser.add_argument('--run-date', '-d', required=False, - help='Date of run. Must be format YYYYMMDD.') - parser.add_argument('--no-upload', '-nu', action='store_true', - help='Disables uploading of outputs to s3') - args = parser.parse_args() - tile_id_list = args.tile_id_list - run_date = args.run_date - no_upload = args.no_upload - - # Disables upload to s3 if no AWS credentials are found in environment - if not uu.check_aws_creds(): - no_upload = True - - # Create the output log - uu.initiate_log(tile_id_list=tile_id_list, run_date=run_date, no_upload=no_upload) - - # Checks whether the tile_id_list argument is valid - tile_id_list = uu.tile_id_list_check(tile_id_list) - - mp_rewindow_tiles(tile_id_list=tile_id_list, run_date=run_date, no_upload=no_upload) \ No newline at end of file diff --git a/emissions/peatland_processing.py b/data_prep/peatland_processing.py similarity index 52% rename from emissions/peatland_processing.py rename to data_prep/peatland_processing.py index 9e9a6499..49e44887 100644 --- a/emissions/peatland_processing.py +++ b/data_prep/peatland_processing.py @@ -1,21 +1,25 @@ ''' This script makes mask tiles of where peat pixels are. Peat is represented by 1s; non-peat is no-data. -Between 40N and 60S, CIFOR peat and Jukka peat (IDN and MYS) are combined to map peat. -Outside that band (>40N, since there are no tiles at >60S), SoilGrids250m is used to mask peat. -Any pixel that is marked as most likely being a histosol subgroup is classified as peat. +Between 40N and 60S, Gumbricht et al. 2017 (CIFOR) peat is used. +Miettinen et al. 2016 (IDN/MYS), Hastie et al. 2022 (Peru), and Crezee et al. 2022 (Congo basin) supplement it. +Outside that band (>40N, since there are no tiles at >60S), Xu et al. 2018 is used to mask peat. +Between 40N and 60S, Xu et al. 2018 is not used. ''' -from subprocess import Popen, PIPE, STDOUT, check_call import os import rasterio from shutil import copyfile import datetime -import sys -sys.path.append('../') + import constants_and_names as cn import universal_util as uu + def create_peat_mask_tiles(tile_id): + """ + :param tile_id: tile to be processed, identified by its tile id + :return: Peat mask: 1 is peat, 0 is no peat + """ # Start time start = datetime.datetime.now() @@ -24,45 +28,38 @@ def create_peat_mask_tiles(tile_id): xmin, ymin, xmax, ymax = uu.coords(tile_id) uu.print_log(" ymax:", ymax, "; ymin:", ymin, "; xmax", xmax, "; xmin:", xmin) - out_tile_no_tag = '{0}_{1}_no_tag.tif'.format(tile_id, cn.pattern_peat_mask) - out_tile = '{0}_{1}.tif'.format(tile_id, cn.pattern_peat_mask) + out_tile_no_tag = f'{tile_id}_{cn.pattern_peat_mask}_no_tag.tif' + out_tile = f'{tile_id}_{cn.pattern_peat_mask}.tif' - # If the tile is outside the band covered by the CIFOR peat raster, SoilGrids250m is used + # If the tile is outside the band covered by the Gumbricht 2017/CIFOR peat raster, Xu et al. 2018 is used. if ymax > 40 or ymax < -60: - uu.print_log("{} is outside CIFOR band. Using SoilGrids250m organic soil mask...".format(tile_id)) - - out_intermediate = '{0}_intermediate.tif'.format(tile_id, cn.pattern_peat_mask) + uu.print_log(f'{tile_id} is outside Gumbricht band. Using Xu et al. 2018 peat map...') - # Cuts the SoilGrids250m global raster to the focal tile - uu.warp_to_Hansen('most_likely_soil_class.vrt', out_intermediate, xmin, ymin, xmax, ymax, 'Byte') - - # Removes all non-histosol sub-groups from the SoilGrids raster. - # Ideally, this would be done once on the entire SoilGrids raster in the main function but I didn't think of that. - # Code 14 is the histosol subgroup in SoilGrids250 (https://files.isric.org/soilgrids/latest/data/wrb/MostProbable.qml). - calc = '--calc=(A==14)' - peat_mask_out_filearg = '--outfile={}'.format(out_tile_no_tag) - cmd = ['gdal_calc.py', '-A', out_intermediate, calc, peat_mask_out_filearg, - '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=DEFLATE', '--type=Byte', '--quiet'] + # Converts the Xu >40N peat shapefile to a raster + cmd = ['gdal_rasterize', '-burn', '1', '-co', 'COMPRESS=DEFLATE', '-tr', str(cn.Hansen_res), str(cn.Hansen_res), + '-te', str(xmin), str(ymin), str(xmax), str(ymax), + '-tap', '-ot', 'Byte', '-a_nodata', '0', cn.Xu_peat_shp, out_tile_no_tag] uu.log_subprocess_output_full(cmd) + uu.print_log(f'{tile_id} created.') - uu.print_log("{} created.".format(tile_id)) - - # If the tile is inside the band covered by CIFOR, CIFOR is used (and Jukka in the tiles where it occurs). - # For some reason, the CIFOR raster has a color scheme that makes it symbolized from 0 to 255. This carries + # If the tile is inside the band covered by Gumbricht 2017/CIFOR, Gumbricht is used. + # Miettinen is added in IDN and MYS, Hastie is added in Peri, and Crezee is added in the Congo basin. + # For some reason, the Gumbricht raster has a color scheme that makes it symbolized from 0 to 255. This carries # over to the output file but that seems like a problem with the output symbology, not the values. # gdalinfo shows that the min and max values are 1, as they should be, and it visualizes correctly in ArcMap. else: - uu.print_log("{} is inside CIFOR band. Using CIFOR/Jukka combination...".format(tile_id)) + uu.print_log(f"{tile_id} is inside Gumbricht band. Using Gumbricht/Miettinen/Crezee/Hastie combination...") - # Combines CIFOR and Jukka (if it occurs there) - cmd = ['gdalwarp', '-t_srs', 'EPSG:4326', '-co', 'COMPRESS=DEFLATE', '-tr', '{}'.format(cn.Hansen_res), '{}'.format(cn.Hansen_res), + # Combines Gumbricht/CIFOR with Miettinen, Hastie, and Crezee (where they occur) + cmd = ['gdalwarp', '-t_srs', 'EPSG:4326', '-co', 'COMPRESS=DEFLATE', '-tr', str(cn.Hansen_res), str(cn.Hansen_res), '-tap', '-te', str(xmin), str(ymin), str(xmax), str(ymax), - '-dstnodata', '0', '-overwrite', '{}'.format(cn.cifor_peat_file), 'jukka_peat.tif', out_tile_no_tag] + '-dstnodata', '0', '-overwrite', + cn.Gumbricht_peat_name, cn.Miettinen_peat_tif, cn.Crezee_peat_name, cn.Hastie_name, out_tile_no_tag] uu.log_subprocess_output_full(cmd) + uu.print_log(f'{tile_id} created.') - uu.print_log("{} created.".format(tile_id)) # All of the below is to add metadata tags to the output peat masks. # For some reason, just doing what's at https://rasterio.readthedocs.io/en/latest/topics/tags.html @@ -70,10 +67,6 @@ def create_peat_mask_tiles(tile_id): # I found it necessary to copy the peat mask and read its windows into a new copy of the file, to which the # metadata tags are added. I'm sure there's an easier way to do this but I couldn't figure out how. # I know it's very convoluted but I really couldn't figure out how to add the tags without erasing the data. - # To make it even stranger, adding the tags before the gdal processing seemed to work fine for the non-tropical - # (SoilGrids) tiles but not for the tropical (CIFOR/Jukka) tiles (i.e. data didn't disappear in the non-tropical - # tiles if I added the tags before the GDAL steps but the tropical data did disappear). - copyfile(out_tile_no_tag, out_tile) uu.print_log("Adding metadata tags to", tile_id) @@ -98,11 +91,11 @@ def create_peat_mask_tiles(tile_id): out_tile_tagged = rasterio.open(out_tile, 'w', **kwargs) # Adds metadata tags to the output raster - uu.add_rasterio_tags(out_tile_tagged, 'std') + uu.add_universal_metadata_rasterio(out_tile_tagged) out_tile_tagged.update_tags( key='1 = peat. 0 = not peat.') out_tile_tagged.update_tags( - source='Jukka for IDN and MYS; CIFOR for rest of tropics; SoilGrids250 (May 2020) most likely histosol for outside tropics') + source='Gumbricht et al. 2017 for <40N; Miettinen et al., Hastie et al. 2022, and Crezee et al. 2022 where they occur; Xu et al. 2018 for >=40N') out_tile_tagged.update_tags( extent='Full extent of input datasets') @@ -118,8 +111,4 @@ def create_peat_mask_tiles(tile_id): os.remove(out_tile_no_tag) # Prints information about the tile that was just processed - uu.end_of_fx_summary(start, tile_id, cn.pattern_peat_mask) - - - - + uu.end_of_fx_summary(start, tile_id, cn.pattern_peat_mask) \ No newline at end of file diff --git a/data_prep/prep_other_inputs.py b/data_prep/prep_other_inputs_one_off.py similarity index 98% rename from data_prep/prep_other_inputs.py rename to data_prep/prep_other_inputs_one_off.py index 1df0bef0..b4e5da87 100644 --- a/data_prep/prep_other_inputs.py +++ b/data_prep/prep_other_inputs_one_off.py @@ -4,14 +4,12 @@ ''' import datetime -from subprocess import Popen, PIPE, STDOUT, check_call import rasterio import os import numpy as np from scipy import stats import os -import sys -sys.path.append('../') + import constants_and_names as cn import universal_util as uu diff --git a/ec2_launch_template_startup_instructions.TXT b/ec2_launch_template_startup_instructions.TXT index 136f5d18..8afdebc3 100644 --- a/ec2_launch_template_startup_instructions.TXT +++ b/ec2_launch_template_startup_instructions.TXT @@ -75,26 +75,18 @@ ln -s /usr/local/bin/docker-compose /usr/bin/docker-compose docker-compose --version ############################# -# Copy latest flux model repo to the home folder +# Clone latest flux model repo to the home folder +# clone command suggested by Logan Byers. It resolves the problem of not being able to pull the repo after it was cloned, which was conflicting with not being able to SSH into the machine more than ~1 minute after it was created. +# This formulation of git clone makes ec2-user the cloner, rather than root. It's no longer necessary to change ownership (chown) of the repo because carbon-budget will already be owned by ec2-user, not root. ############################# cd /home/ec2-user -git clone https://github.com/wri/carbon-budget -cd carbon-budget - -cd /home/ec2-user/carbon-budget/ +su ec2-user -c "git clone https://github.com/wri/carbon-budget" ####################################### # Starts the docker service ####################################### sudo service docker start -###################################### -# Gives the user (ec2-user) various permissions, such as ability to git pull and enter the docker container. -#Based on https://techoverflow.net/2019/05/07/how-to-fix-git-error-cannot-open-git-fetch_head-permission-denied/ -###################################### -cd / -sudo chown -R ec2-user: . - # Replaces htop config file with my preferred configuration mkdir -p /home/ec2-user/.config/htop/ cp /home/ec2-user/carbon-budget/htoprc /home/ec2-user/.config/htop/htoprc \ No newline at end of file diff --git a/emissions/calculate_gross_emissions.py b/emissions/calculate_gross_emissions.py index f4fa95c8..82add9f4 100644 --- a/emissions/calculate_gross_emissions.py +++ b/emissions/calculate_gross_emissions.py @@ -1,17 +1,26 @@ -from subprocess import Popen, PIPE, STDOUT, check_call +""" +Function to call C++ executable that calculates gross emissions +""" + import datetime -import rasterio -from shutil import copyfile -import os -import sys -sys.path.append('../') + import constants_and_names as cn import universal_util as uu -# Calls the c++ script to calculate gross emissions -def calc_emissions(tile_id, emitted_pools, sensit_type, folder, no_upload): - - uu.print_log("Calculating gross emissions for", tile_id, "using", sensit_type, "model type...") +def calc_emissions(tile_id, emitted_pools, folder): + """ + Calls the c++ script to calculate gross emissions + :param tile_id: tile to be processed, identified by its tile id + :param emitted_pools: Whether emissions from soil only is calculated, or emissions from biomass and soil. + Options are: soil_only or biomass_soil. + :param folder: + :return: 10 tiles: 6 tiles with emissions for each driver; CO2 emissions from all drivers; + non-CO2 emissions from all drivers; all gases (CO2 and non-CO2 from all drivers); + emissions decision tree nodes (used for QC). + Units: Mg CO2e/ha over entire model period. + """ + + uu.print_log(f'Calculating gross emissions for {tile_id} using {cn.SENSIT_TYPE} model type...') start = datetime.datetime.now() @@ -20,49 +29,35 @@ def calc_emissions(tile_id, emitted_pools, sensit_type, folder, no_upload): # Runs the correct c++ script given the emitted_pools (biomass+soil or soil_only) and model type selected. # soil_only, no_shiftin_ag, and convert_to_grassland have special gross emissions C++ scripts. # The other sensitivity analyses and the standard model all use the same gross emissions C++ script. - if (emitted_pools == 'soil_only') & (sensit_type == 'std'): - cmd = ['{0}/calc_gross_emissions_soil_only.exe'.format(cn.c_emis_compile_dst), tile_id, sensit_type, folder] + if (emitted_pools == 'soil_only') & (cn.SENSIT_TYPE == 'std'): + cmd = [f'{cn.c_emis_compile_dst}/calc_gross_emissions_soil_only.exe', tile_id, cn.SENSIT_TYPE, folder] - elif (emitted_pools == 'biomass_soil') & (sensit_type in ['convert_to_grassland', 'no_shifting_ag']): - cmd = ['{0}/calc_gross_emissions_{1}.exe'.format(cn.c_emis_compile_dst, sensit_type), tile_id, sensit_type, folder] + elif (emitted_pools == 'biomass_soil') & (cn.SENSIT_TYPE in ['convert_to_grassland', 'no_shifting_ag']): + cmd = [f'{cn.c_emis_compile_dst}/calc_gross_emissions_{cn.SENSIT_TYPE}.exe', tile_id, cn.SENSIT_TYPE, folder] # This C++ script has an extra argument that names the input carbon emitted_pools and output emissions correctly - elif (emitted_pools == 'biomass_soil') & (sensit_type not in ['no_shifting_ag', 'convert_to_grassland']): - cmd = ['{0}/calc_gross_emissions_generic.exe'.format(cn.c_emis_compile_dst), tile_id, sensit_type, folder] + elif (emitted_pools == 'biomass_soil') & (cn.SENSIT_TYPE not in ['no_shifting_ag', 'convert_to_grassland']): + cmd = [f'{cn.c_emis_compile_dst}/calc_gross_emissions_generic.exe', tile_id, cn.SENSIT_TYPE, folder] else: - uu.exception_log(no_upload, 'Pool and/or sensitivity analysis option not valid') + uu.exception_log('Pool and/or sensitivity analysis option not valid') uu.log_subprocess_output_full(cmd) # Identifies which pattern to use for counting tile completion pattern = cn.pattern_gross_emis_commod_biomass_soil - if (emitted_pools == 'biomass_soil') & (sensit_type == 'std'): + if (emitted_pools == 'biomass_soil') & (cn.SENSIT_TYPE == 'std'): pattern = pattern - elif (emitted_pools == 'biomass_soil') & (sensit_type != 'std'): - pattern = pattern + "_" + sensit_type + elif (emitted_pools == 'biomass_soil') & (cn.SENSIT_TYPE != 'std'): + pattern = pattern + "_" + cn.SENSIT_TYPE elif emitted_pools == 'soil_only': pattern = pattern.replace('biomass_soil', 'soil_only') else: - uu.exception_log(no_upload, 'Pool option not valid') + uu.exception_log('Pool option not valid') # Prints information about the tile that was just processed - uu.end_of_fx_summary(start, tile_id, pattern, no_upload) - - -# Adds metadata tags to the output rasters -def add_metadata_tags(tile_id, pattern, sensit_type): - - # Adds metadata tags to output rasters - uu.add_universal_metadata_tags('{0}_{1}.tif'.format(tile_id, pattern), sensit_type) - - cmd = ['gdal_edit.py', '-mo', - 'units=Mg CO2e/ha over model duration (2001-20{})'.format(cn.loss_years), - '-mo', 'source=many data sources', - '-mo', 'extent=Tree cover loss pixels within model extent (and tree cover loss driver, if applicable)', - '{0}_{1}.tif'.format(tile_id, pattern)] - uu.log_subprocess_output_full(cmd) + uu.end_of_fx_summary(start, tile_id, pattern) diff --git a/emissions/cpp_util/calc_gross_emissions_generic.cpp b/emissions/cpp_util/calc_gross_emissions_generic.cpp index 4f60d85e..8c1ae09d 100644 --- a/emissions/cpp_util/calc_gross_emissions_generic.cpp +++ b/emissions/cpp_util/calc_gross_emissions_generic.cpp @@ -11,10 +11,10 @@ // Each end point of the decision tree gets its own code, so that it's easier to tell what branch of the decision tree // each pixel came from. That makes checking the results easier, too. // These codes are summarized in carbon-budget/emissions/node_codes.txt -// Because emissions are separately output for CO2 and non-CO2 gases (CH4 and N20), each model endpoint has a CO2-only and +// Because emissions are separately output for CO2 and non-CO2 gases (CH4 and N2O), each model endpoint has a CO2-only and // a non-CO2 value. These are summed to create a total emissions (all gases) for each pixel. // Compile with: -// c++ ../carbon-budget/emissions/cpp_util/calc_gross_emissions_biomass_soil.cpp -o ../carbon-budget/emissions/cpp_util/calc_gross_emissions_biomass_soil.exe -lgdal +// c++ /usr/local/app/emissions/cpp_util/calc_gross_emissions_generic.cpp -o /usr/local/app/emissions/cpp_util/calc_gross_emissions_generic.exe -lgdal #include @@ -84,6 +84,12 @@ boreal = constants::boreal; int soil_emis_period; // The number of years over which soil emissions are calculated (separate from model years) soil_emis_period = constants::soil_emis_period; +float shiftag_flu; // F_lu for shifting agriculture (fraction of soil C not emitted over 20 years) +shiftag_flu = constants::shiftag_flu; + +float urb_flu; // F_lu for urbanization (fraction of soil C not emitted over 20 years) +urb_flu = constants::urb_flu; + // Input files // Carbon pools @@ -244,8 +250,8 @@ uly=GeoTransform[3]; pixelsize=GeoTransform[1]; // // Manually change this to test the script on a small part of the raster. This starts at top left of the tile. -//xsize = 4500; -//ysize = 3500; +//xsize = 40000; +//ysize = 1100; // Print the raster size and resolution. Should be 40,000 x 40,000 and pixel size 0.00025. cout << "Gross emissions generic model C++ parameters: " << xsize <<", "<< ysize <<", "<< ulx <<", "<< uly << ", "<< pixelsize << endl; @@ -339,7 +345,7 @@ OUTBAND12 = OUTGDAL12->GetRasterBand(1); OUTBAND12->SetNoDataValue(0); // Decision tree node -OUTGDAL20 = OUTDRIVER->Create( out_name20.c_str(), xsize, ysize, 1, GDT_Float32, papszOptions ); +OUTGDAL20 = OUTDRIVER->Create( out_name20.c_str(), xsize, ysize, 1, GDT_UInt16, papszOptions ); OUTGDAL20->SetGeoTransform(adfGeoTransform); OUTGDAL20->SetProjection(OUTPRJ); OUTBAND20 = OUTGDAL20->GetRasterBand(1); OUTBAND20->SetNoDataValue(0); @@ -371,7 +377,7 @@ float out_data6[xsize]; float out_data10[xsize]; float out_data11[xsize]; float out_data12[xsize]; -float out_data20[xsize]; +short int out_data20[xsize]; // Loop over the y coordinates, then the x coordinates for (y=0; y 0 && agc_data[x] > 0) @@ -655,8 +661,6 @@ for(x=0; x 0) // Shifting ag, peat @@ -955,8 +959,6 @@ for(x=0; x 0) // Urbanization, peat @@ -1263,7 +1265,7 @@ CPLErr errcodeOut6 = OUTBAND6->RasterIO( GF_Write, 0, y, xsize, 1, out_data6, xs CPLErr errcodeOut10 = OUTBAND10->RasterIO( GF_Write, 0, y, xsize, 1, out_data10, xsize, 1, GDT_Float32, 0, 0 ); CPLErr errcodeOut11 = OUTBAND11->RasterIO( GF_Write, 0, y, xsize, 1, out_data11, xsize, 1, GDT_Float32, 0, 0 ); CPLErr errcodeOut12 = OUTBAND12->RasterIO( GF_Write, 0, y, xsize, 1, out_data12, xsize, 1, GDT_Float32, 0, 0 ); -CPLErr errcodeOut20 = OUTBAND20->RasterIO( GF_Write, 0, y, xsize, 1, out_data20, xsize, 1, GDT_Float32, 0, 0 ); +CPLErr errcodeOut20 = OUTBAND20->RasterIO( GF_Write, 0, y, xsize, 1, out_data20, xsize, 1, GDT_UInt16, 0, 0 ); // Number of output files int outSize = 10; diff --git a/emissions/cpp_util/calc_gross_emissions_soil_only.cpp b/emissions/cpp_util/calc_gross_emissions_soil_only.cpp index 52476c72..df0006ef 100644 --- a/emissions/cpp_util/calc_gross_emissions_soil_only.cpp +++ b/emissions/cpp_util/calc_gross_emissions_soil_only.cpp @@ -41,7 +41,6 @@ using namespace std; -//to compile: c++ calc_gross_emissions.cpp -o calc_gross_emissions.exe -lgdal int main(int argc, char* argv[]) { // If code is run other than , it will raise this error. @@ -85,6 +84,12 @@ boreal = constants::boreal; int soil_emis_period; // The number of years over which soil emissions are calculated (separate from model years) soil_emis_period = constants::soil_emis_period; +float shiftag_flu; // F_lu for shifting agriculture (fraction of soil C not emitted over 20 years) +shiftag_flu = constants::shiftag_flu; + +float urb_flu; // F_lu for urbanization (fraction of soil C not emitted over 20 years) +urb_flu = constants::urb_flu; + // Input files // Carbon pools use the standard names for this sensitivity analysis @@ -316,7 +321,7 @@ OUTBAND12 = OUTGDAL12->GetRasterBand(1); OUTBAND12->SetNoDataValue(0); // Decision tree node -OUTGDAL20 = OUTDRIVER->Create( out_name20.c_str(), xsize, ysize, 1, GDT_Float32, papszOptions ); +OUTGDAL20 = OUTDRIVER->Create( out_name20.c_str(), xsize, ysize, 1, GDT_UInt16, papszOptions ); OUTGDAL20->SetGeoTransform(adfGeoTransform); OUTGDAL20->SetProjection(OUTPRJ); OUTBAND20 = OUTGDAL20->GetRasterBand(1); OUTBAND20->SetNoDataValue(0); @@ -348,25 +353,50 @@ float out_data6[xsize]; float out_data10[xsize]; float out_data11[xsize]; float out_data12[xsize]; -float out_data20[xsize]; +short int out_data20[xsize]; // Loop over the y coordinates, then the x coordinates for (y=0; yRasterIO(GF_Read, 0, y, xsize, 1, agc_data, xsize, 1, GDT_Float32, 0, 0); -INBAND2->RasterIO(GF_Read, 0, y, xsize, 1, bgc_data, xsize, 1, GDT_Float32, 0, 0); -INBAND3->RasterIO(GF_Read, 0, y, xsize, 1, drivermodel_data, xsize, 1, GDT_Float32, 0, 0); -INBAND4->RasterIO(GF_Read, 0, y, xsize, 1, loss_data, xsize, 1, GDT_Float32, 0, 0); -INBAND5->RasterIO(GF_Read, 0, y, xsize, 1, peat_data, xsize, 1, GDT_Float32, 0, 0); -INBAND6->RasterIO(GF_Read, 0, y, xsize, 1, burn_data, xsize, 1, GDT_Float32, 0, 0); -INBAND7->RasterIO(GF_Read, 0, y, xsize, 1, ifl_primary_data, xsize, 1, GDT_Float32, 0, 0); -INBAND8->RasterIO(GF_Read, 0, y, xsize, 1, ecozone_data, xsize, 1, GDT_Float32, 0, 0); -INBAND9->RasterIO(GF_Read, 0, y, xsize, 1, climate_data, xsize, 1, GDT_Float32, 0, 0); -INBAND10->RasterIO(GF_Read, 0, y, xsize, 1, dead_data, xsize, 1, GDT_Float32, 0, 0); -INBAND11->RasterIO(GF_Read, 0, y, xsize, 1, litter_data, xsize, 1, GDT_Float32, 0, 0); -INBAND12->RasterIO(GF_Read, 0, y, xsize, 1, soil_data, xsize, 1, GDT_Float32, 0, 0); -INBAND13->RasterIO(GF_Read, 0, y, xsize, 1, plant_data, xsize, 1, GDT_Float32, 0, 0); +// The following RasterIO reads (and the RasterIO writes at the end) produced compile warnings about unused results +// (warning: ignoring return value of 'CPLErr GDALRasterBand::RasterIO(GDALRWFlag, int, int, int, int, void*, int, int, GDALDataType, GSpacing, GSpacing, GDALRasterIOExtraArg*)', declared with attribute warn_unused_result [-Wunused-result]). +// I asked how to handle or silence the warnings at https://stackoverflow.com/questions/72410931/how-to-handle-warn-unused-result-wunused-result/72410978#72410978. +// The code below handles the warnings by directing them to arguments, which are then checked. +// For cerr instead of std::err: https://www.geeksforgeeks.org/cerr-standard-error-stream-object-in-cpp/ + +// Error code returned by each line saved as their own argument +CPLErr errcodeIn1 = INBAND1->RasterIO(GF_Read, 0, y, xsize, 1, agc_data, xsize, 1, GDT_Float32, 0, 0); +CPLErr errcodeIn2 = INBAND2->RasterIO(GF_Read, 0, y, xsize, 1, bgc_data, xsize, 1, GDT_Float32, 0, 0); +CPLErr errcodeIn3 = INBAND3->RasterIO(GF_Read, 0, y, xsize, 1, drivermodel_data, xsize, 1, GDT_Float32, 0, 0); +CPLErr errcodeIn4 = INBAND4->RasterIO(GF_Read, 0, y, xsize, 1, loss_data, xsize, 1, GDT_Float32, 0, 0); +CPLErr errcodeIn5 = INBAND5->RasterIO(GF_Read, 0, y, xsize, 1, peat_data, xsize, 1, GDT_Float32, 0, 0); +CPLErr errcodeIn6 = INBAND6->RasterIO(GF_Read, 0, y, xsize, 1, burn_data, xsize, 1, GDT_Float32, 0, 0); +CPLErr errcodeIn7 = INBAND7->RasterIO(GF_Read, 0, y, xsize, 1, ifl_primary_data, xsize, 1, GDT_Float32, 0, 0); +CPLErr errcodeIn8 = INBAND8->RasterIO(GF_Read, 0, y, xsize, 1, ecozone_data, xsize, 1, GDT_Float32, 0, 0); +CPLErr errcodeIn9 = INBAND9->RasterIO(GF_Read, 0, y, xsize, 1, climate_data, xsize, 1, GDT_Float32, 0, 0); +CPLErr errcodeIn10 = INBAND10->RasterIO(GF_Read, 0, y, xsize, 1, dead_data, xsize, 1, GDT_Float32, 0, 0); +CPLErr errcodeIn11 = INBAND11->RasterIO(GF_Read, 0, y, xsize, 1, litter_data, xsize, 1, GDT_Float32, 0, 0); +CPLErr errcodeIn12 = INBAND12->RasterIO(GF_Read, 0, y, xsize, 1, soil_data, xsize, 1, GDT_Float32, 0, 0); +CPLErr errcodeIn13 = INBAND13->RasterIO(GF_Read, 0, y, xsize, 1, plant_data, xsize, 1, GDT_Float32, 0, 0); + +// Number of input files +int inSize = 13; + +// Array of error codes returned from each input +CPLErr errcodeInArray [inSize] = {errcodeIn1, errcodeIn2, errcodeIn3, errcodeIn4, errcodeIn5, errcodeIn6, errcodeIn7, +errcodeIn8, errcodeIn9, errcodeIn10, errcodeIn11, errcodeIn12, errcodeIn13}; + +// Iterates through the input error codes to make sure that the error code is acceptable +int j; + +for (j=0; j 0 && agc_data[x] > 0) @@ -607,8 +637,6 @@ for(x=0; x 0) // Shifting ag, peat @@ -651,7 +679,7 @@ for(x=0; x 0) // Urbanization, peat @@ -1169,16 +1195,41 @@ for(x=0; xRasterIO( GF_Write, 0, y, xsize, 1, out_data1, xsize, 1, GDT_Float32, 0, 0 ); -OUTBAND2->RasterIO( GF_Write, 0, y, xsize, 1, out_data2, xsize, 1, GDT_Float32, 0, 0 ); -OUTBAND3->RasterIO( GF_Write, 0, y, xsize, 1, out_data3, xsize, 1, GDT_Float32, 0, 0 ); -OUTBAND4->RasterIO( GF_Write, 0, y, xsize, 1, out_data4, xsize, 1, GDT_Float32, 0, 0 ); -OUTBAND5->RasterIO( GF_Write, 0, y, xsize, 1, out_data5, xsize, 1, GDT_Float32, 0, 0 ); -OUTBAND6->RasterIO( GF_Write, 0, y, xsize, 1, out_data6, xsize, 1, GDT_Float32, 0, 0 ); -OUTBAND10->RasterIO( GF_Write, 0, y, xsize, 1, out_data10, xsize, 1, GDT_Float32, 0, 0 ); -OUTBAND11->RasterIO( GF_Write, 0, y, xsize, 1, out_data11, xsize, 1, GDT_Float32, 0, 0 ); -OUTBAND12->RasterIO( GF_Write, 0, y, xsize, 1, out_data12, xsize, 1, GDT_Float32, 0, 0 ); -OUTBAND20->RasterIO( GF_Write, 0, y, xsize, 1, out_data20, xsize, 1, GDT_Float32, 0, 0 ); +// The following RasterIO writes (and the RasterIO reads at the start) produced compile warnings about unused results +// (warning: ignoring return value of 'CPLErr GDALRasterBand::RasterIO(GDALRWFlag, int, int, int, int, void*, int, int, GDALDataType, GSpacing, GSpacing, GDALRasterIOExtraArg*)', declared with attribute warn_unused_result [-Wunused-result]). +// I asked how to handle or silence the warnings at https://stackoverflow.com/questions/72410931/how-to-handle-warn-unused-result-wunused-result/72410978#72410978. +// The code below handles the warnings by directing them to arguments, which are then checked. +// For cerr instead of std::err: https://www.geeksforgeeks.org/cerr-standard-error-stream-object-in-cpp/ + +// Error code returned by each line saved as their own argument +CPLErr errcodeOut1 = OUTBAND1->RasterIO( GF_Write, 0, y, xsize, 1, out_data1, xsize, 1, GDT_Float32, 0, 0 ); +CPLErr errcodeOut2 = OUTBAND2->RasterIO( GF_Write, 0, y, xsize, 1, out_data2, xsize, 1, GDT_Float32, 0, 0 ); +CPLErr errcodeOut3 = OUTBAND3->RasterIO( GF_Write, 0, y, xsize, 1, out_data3, xsize, 1, GDT_Float32, 0, 0 ); +CPLErr errcodeOut4 = OUTBAND4->RasterIO( GF_Write, 0, y, xsize, 1, out_data4, xsize, 1, GDT_Float32, 0, 0 ); +CPLErr errcodeOut5 = OUTBAND5->RasterIO( GF_Write, 0, y, xsize, 1, out_data5, xsize, 1, GDT_Float32, 0, 0 ); +CPLErr errcodeOut6 = OUTBAND6->RasterIO( GF_Write, 0, y, xsize, 1, out_data6, xsize, 1, GDT_Float32, 0, 0 ); +CPLErr errcodeOut10 = OUTBAND10->RasterIO( GF_Write, 0, y, xsize, 1, out_data10, xsize, 1, GDT_Float32, 0, 0 ); +CPLErr errcodeOut11 = OUTBAND11->RasterIO( GF_Write, 0, y, xsize, 1, out_data11, xsize, 1, GDT_Float32, 0, 0 ); +CPLErr errcodeOut12 = OUTBAND12->RasterIO( GF_Write, 0, y, xsize, 1, out_data12, xsize, 1, GDT_Float32, 0, 0 ); +CPLErr errcodeOut20 = OUTBAND20->RasterIO( GF_Write, 0, y, xsize, 1, out_data20, xsize, 1, GDT_UInt16, 0, 0 ); + +// Number of output files +int outSize = 10; + +// Array of error codes returned from each output +CPLErr errcodeOutArray [outSize] = {errcodeOut1, errcodeOut2, errcodeOut3, errcodeOut4, errcodeOut5, errcodeOut6, +errcodeOut10, errcodeOut11, errcodeOut12, errcodeOut20}; + +// Iterates through the output error codes to make sure that the error code is acceptable +int k; + +for (k=0; k.cpp -o /home/dgibbs/carbon-budget/emissions/cpp_util/calc_gross_emissions_.exe -lgdal -Run by typing python mp_calculate_gross_emissions.py -p [POOL_OPTION] -t [MODEL_TYPE] -l [TILE_LIST] -d [RUN_DATE] -The Python script will call the compiled C++ code as needed. +c++ /usr/local/app/carbon-budget/emissions/cpp_util/calc_gross_emissions_.cpp -o /usr/local/app/emissions/cpp_util/calc_gross_emissions_.exe -lgdal The other C++ scripts (equations.cpp and flu_val.cpp) do not need to be compiled separately. + +Run the emissions model with: +python -m emissions.mp_calculate_gross_emissions -t [MODEL_TYPE] -p [POOL_OPTION] -l [TILE_LIST] [optional_arguments] The --pools-to-use argument specifies whether to calculate gross emissions from biomass+soil or just from soil. The --model-type argument specifies whether the model run is a sensitivity analysis or standard run. Emissions from each driver (including loss that had no driver assigned) gets its own tile, as does all emissions combined. -Emissions from all drivers is also output as emissions due to CO2 only and emissions due to other GHG (CH4 and N2O). +Emissions from all drivers is also output as emissions due to CO2 only and emissions due to non-CO2 GHGs (CH4 and N2O). The other output shows which branch of the decision tree that determines the emissions equation applies to each pixel. These codes are summarized in carbon-budget/emissions/node_codes.txt -''' -import multiprocessing +python -m emissions.mp_calculate_gross_emissions -t std -l 00N_000E -nu +python -m emissions.mp_calculate_gross_emissions -t std -l all +""" + import argparse -import datetime -import os from functools import partial +import multiprocessing +import os import sys -sys.path.append('../') + import constants_and_names as cn import universal_util as uu -sys.path.append(os.path.join(cn.docker_app,'emissions')) -import calculate_gross_emissions -def mp_calculate_gross_emissions(sensit_type, tile_id_list, emitted_pools, run_date = None, no_upload = None): +from . import calculate_gross_emissions - os.chdir(cn.docker_base_dir) +def mp_calculate_gross_emissions(tile_id_list, emitted_pools): + """ + :param tile_id_list: list of tile ids to process + :param emitted_pools: Whether emissions from soil only is calculated, or emissions from biomass and soil. + Options are: soil_only or biomass_soil. + :return: 10 sets of tiles: 6 sets of tiles with emissions for each driver; CO2 emissions from all drivers; + non-CO2 emissions from all drivers; all gases (CO2 and non-CO2 from all drivers); + emissions decision tree nodes (used for QC). + Units: Mg CO2e/ha over entire model period. + """ - folder = cn.docker_base_dir + os.chdir(cn.docker_tile_dir) + + folder = cn.docker_tile_dir # If a full model run is specified, the correct set of tiles for the particular script is listed # If the tile_list argument is an s3 folder, the list of tiles in it is created if tile_id_list == 'all': # List of tiles to run in the model - tile_id_list = uu.tile_list_s3(cn.AGC_emis_year_dir, sensit_type) + tile_id_list = uu.tile_list_s3(cn.AGC_emis_year_dir, cn.SENSIT_TYPE) uu.print_log(tile_id_list) - uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n") + uu.print_log(f'There are {str(len(tile_id_list))} tiles to process', "\n") # Files to download for this script @@ -63,13 +77,13 @@ def mp_calculate_gross_emissions(sensit_type, tile_id_list, emitted_pools, run_d cn.drivers_processed_dir: [cn.pattern_drivers], cn.climate_zone_processed_dir: [cn.pattern_climate_zone], cn.bor_tem_trop_processed_dir: [cn.pattern_bor_tem_trop_processed], - cn.burn_year_dir: [cn.pattern_burn_year] + cn.TCLF_processed_dir: [cn.pattern_TCLF_processed] } # Special loss tiles for the Brazil and Mekong sensitivity analyses - if sensit_type == 'legal_Amazon_loss': + if cn.SENSIT_TYPE == 'legal_Amazon_loss': download_dict[cn.Brazil_annual_loss_processed_dir] = [cn.pattern_Brazil_annual_loss_processed] - elif sensit_type == 'Mekong_loss': + elif cn.SENSIT_TYPE == 'Mekong_loss': download_dict[cn.Mekong_loss_processed_dir] = [cn.pattern_Mekong_loss_processed] else: download_dict[cn.loss_dir] = [cn.pattern_loss] @@ -77,7 +91,7 @@ def mp_calculate_gross_emissions(sensit_type, tile_id_list, emitted_pools, run_d # Checks the validity of the emitted_pools argument if (emitted_pools not in ['soil_only', 'biomass_soil']): - uu.exception_log(no_upload, 'Invalid pool input. Please choose soil_only or biomass_soil.') + uu.exception_log('Invalid pool input. Please choose soil_only or biomass_soil.') # Checks if the correct c++ script has been compiled for the pool option selected @@ -108,70 +122,69 @@ def mp_calculate_gross_emissions(sensit_type, tile_id_list, emitted_pools, run_d # Some sensitivity analyses have specific gross emissions scripts. # The rest of the sensitivity analyses and the standard model can all use the same, generic gross emissions script. - if sensit_type in ['no_shifting_ag', 'convert_to_grassland']: - # if os.path.exists('../carbon-budget/emissions/cpp_util/calc_gross_emissions_{}.exe'.format(sensit_type)): - if os.path.exists('{0}/calc_gross_emissions_{1}.exe'.format(cn.c_emis_compile_dst, sensit_type)): - uu.print_log("C++ for {} already compiled.".format(sensit_type)) - else: - uu.exception_log(no_upload, 'Must compile {} model C++...'.format(sensit_type)) + if cn.SENSIT_TYPE in ['no_shifting_ag', 'convert_to_grassland']: + uu.print_log(f'Compiling {cn.SENSIT_TYPE} model C++...') + cmd = ['c++', f'/usr/local/app/emissions/cpp_util/calc_gross_emissions_{cn.SENSIT_TYPE}.cpp', + '-o', f'/usr/local/app/emissions/cpp_util/calc_gross_emissions_{cn.SENSIT_TYPE}.exe', '-lgdal'] + uu.log_subprocess_output_full(cmd) else: - if os.path.exists('{0}/calc_gross_emissions_generic.exe'.format(cn.c_emis_compile_dst)): - uu.print_log("C++ for generic emissions already compiled.") - else: - uu.exception_log(no_upload, 'Must compile generic emissions C++...') - - elif (emitted_pools == 'soil_only') & (sensit_type == 'std'): - if os.path.exists('{0}/calc_gross_emissions_soil_only.exe'.format(cn.c_emis_compile_dst)): - uu.print_log("C++ for soil_only already compiled.") - - # Output file directories for soil_only. Must be in same order as output pattern directories. - output_dir_list = [cn.gross_emis_commod_soil_only_dir, - cn.gross_emis_shifting_ag_soil_only_dir, - cn.gross_emis_forestry_soil_only_dir, - cn.gross_emis_wildfire_soil_only_dir, - cn.gross_emis_urban_soil_only_dir, - cn.gross_emis_no_driver_soil_only_dir, - cn.gross_emis_all_gases_all_drivers_soil_only_dir, - cn.gross_emis_co2_only_all_drivers_soil_only_dir, - cn.gross_emis_non_co2_all_drivers_soil_only_dir, - cn.gross_emis_nodes_soil_only_dir] - - output_pattern_list = [cn.pattern_gross_emis_commod_soil_only, - cn.pattern_gross_emis_shifting_ag_soil_only, - cn.pattern_gross_emis_forestry_soil_only, - cn.pattern_gross_emis_wildfire_soil_only, - cn.pattern_gross_emis_urban_soil_only, - cn.pattern_gross_emis_no_driver_soil_only, - cn.pattern_gross_emis_all_gases_all_drivers_soil_only, - cn.pattern_gross_emis_co2_only_all_drivers_soil_only, - cn.pattern_gross_emis_non_co2_all_drivers_soil_only, - cn.pattern_gross_emis_nodes_soil_only] - - else: - uu.exception_log(no_upload, 'Must compile soil_only C++...') + uu.print_log(f'Compiling generic model C++...') + cmd = ['c++', f'/usr/local/app/emissions/cpp_util/calc_gross_emissions_generic.cpp', + '-o', f'/usr/local/app/emissions/cpp_util/calc_gross_emissions_generic.exe', '-lgdal'] + uu.log_subprocess_output_full(cmd) + + elif (emitted_pools == 'soil_only') & (cn.SENSIT_TYPE == 'std'): + + # Output file directories for soil_only. Must be in same order as output pattern directories. + output_dir_list = [cn.gross_emis_commod_soil_only_dir, + cn.gross_emis_shifting_ag_soil_only_dir, + cn.gross_emis_forestry_soil_only_dir, + cn.gross_emis_wildfire_soil_only_dir, + cn.gross_emis_urban_soil_only_dir, + cn.gross_emis_no_driver_soil_only_dir, + cn.gross_emis_all_gases_all_drivers_soil_only_dir, + cn.gross_emis_co2_only_all_drivers_soil_only_dir, + cn.gross_emis_non_co2_all_drivers_soil_only_dir, + cn.gross_emis_nodes_soil_only_dir] + + output_pattern_list = [cn.pattern_gross_emis_commod_soil_only, + cn.pattern_gross_emis_shifting_ag_soil_only, + cn.pattern_gross_emis_forestry_soil_only, + cn.pattern_gross_emis_wildfire_soil_only, + cn.pattern_gross_emis_urban_soil_only, + cn.pattern_gross_emis_no_driver_soil_only, + cn.pattern_gross_emis_all_gases_all_drivers_soil_only, + cn.pattern_gross_emis_co2_only_all_drivers_soil_only, + cn.pattern_gross_emis_non_co2_all_drivers_soil_only, + cn.pattern_gross_emis_nodes_soil_only] + + uu.print_log(f'Compiling soil_only model C++...') + cmd = ['c++', f'/usr/local/app/emissions/cpp_util/calc_gross_emissions_soil_only.cpp', + '-o', f'/usr/local/app/emissions/cpp_util/calc_gross_emissions_soil_only.exe', '-lgdal'] + uu.log_subprocess_output_full(cmd) else: - uu.exception_log(no_upload, 'Pool and/or sensitivity analysis option not valid') + uu.exception_log('Pool and/or sensitivity analysis option not valid') # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list for key, values in download_dict.items(): - dir = key - pattern = values[0] - uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list) + directory = key + output_pattern = values[0] + uu.s3_flexible_download(directory, output_pattern, cn.docker_tile_dir, cn.SENSIT_TYPE, tile_id_list) # If the model run isn't the standard one, the output directory and file names are changed - if sensit_type != 'std': - uu.print_log("Changing output directory and file name pattern based on sensitivity analysis") - output_dir_list = uu.alter_dirs(sensit_type, output_dir_list) - output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list) + if cn.SENSIT_TYPE != 'std': + uu.print_log('Changing output directory and file name pattern based on sensitivity analysis') + output_dir_list = uu.alter_dirs(cn.SENSIT_TYPE, output_dir_list) + output_pattern_list = uu.alter_patterns(cn.SENSIT_TYPE, output_pattern_list) # A date can optionally be provided by the full model script or a run of this script. # This replaces the date in constants_and_names. # Only done if output upload is enabled. - if run_date is not None and no_upload is not None: - output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date) + if cn.RUN_DATE is not None and cn.NO_UPLOAD is not None: + output_dir_list = uu.replace_output_dir_date(output_dir_list, cn.RUN_DATE) uu.print_log(output_pattern_list) @@ -181,10 +194,10 @@ def mp_calculate_gross_emissions(sensit_type, tile_id_list, emitted_pools, run_d # This function creates "dummy" tiles for all Hansen tiles that currently have non-existent tiles. # That way, the C++ script gets all the necessary input files. # If it doesn't get the necessary inputs, it skips that tile. - uu.print_log("Making blank tiles for inputs that don't currently exist") + uu.print_log('Making blank tiles for inputs that do not currently exist') # All of the inputs that need to have dummy tiles made in order to match the tile list of the carbon emitted_pools pattern_list = [cn.pattern_planted_forest_type_unmasked, cn.pattern_peat_mask, cn.pattern_ifl_primary, - cn.pattern_drivers, cn.pattern_bor_tem_trop_processed, cn.pattern_burn_year, cn.pattern_climate_zone, + cn.pattern_drivers, cn.pattern_bor_tem_trop_processed, cn.pattern_TCLF_processed, cn.pattern_climate_zone, cn.pattern_soil_C_emis_year_2000] @@ -192,70 +205,75 @@ def mp_calculate_gross_emissions(sensit_type, tile_id_list, emitted_pools, run_d # This will be iterated through to delete the tiles at the end of the script. uu.create_blank_tile_txt() - for pattern in pattern_list: - pool = multiprocessing.Pool(processes=80) # 60 = 100 GB peak; 80 = XXX GB peak - pool.map(partial(uu.make_blank_tile, pattern=pattern, folder=folder, - sensit_type=sensit_type), tile_id_list) - pool.close() - pool.join() + if cn.SINGLE_PROCESSOR: + for pattern in pattern_list: + for tile in tile_id_list: + uu.make_blank_tile(tile, pattern, folder) - # # For single processor use - # for pattern in pattern_list: - # for tile in tile_id_list: - # uu.make_blank_tile(tile, pattern, folder, sensit_type) + else: + processes=80 # 60 = 100 GB peak; 80 = XXX GB peak + for output_pattern in pattern_list: + with multiprocessing.Pool(processes) as pool: + pool.map(partial(uu.make_blank_tile, pattern=output_pattern, folder=folder), + tile_id_list) + pool.close() + pool.join() # Calculates gross emissions for each tile - # count/4 uses about 390 GB on a r4.16xlarge spot machine. - # processes=18 uses about 440 GB on an r4.16xlarge spot machine. - if cn.count == 96: - if sensit_type == 'biomass_swap': - processes = 15 # 15 processors = XXX GB peak - else: - processes = 19 # 17 = 650 GB peak; 18 = 677 GB peak; 19 = 716 GB peak - else: - processes = 9 - uu.print_log('Gross emissions max processors=', processes) - pool = multiprocessing.Pool(processes) - pool.map(partial(calculate_gross_emissions.calc_emissions, emitted_pools=emitted_pools, sensit_type=sensit_type, - folder=folder, no_upload=no_upload), tile_id_list) - pool.close() - pool.join() + if cn.SINGLE_PROCESSOR: + for tile in tile_id_list: + calculate_gross_emissions.calc_emissions(tile, emitted_pools, folder) - # # For single processor use - # for tile in tile_id_list: - # calculate_gross_emissions.calc_emissions(tile, emitted_pools, sensit_type, folder, no_upload) + else: + # count/4 uses about 390 GB on a r4.16xlarge spot machine. + # processes=18 uses about 440 GB on an r4.16xlarge spot machine. + if cn.count == 96: + if cn.SENSIT_TYPE == 'biomass_swap': + processes = 15 # 15 processors = XXX GB peak + else: + processes = 19 # 17 = 650 GB peak; 18 = 677 GB peak; 19 = 720 GB peak + else: + processes = 9 + uu.print_log(f'Gross emissions max processors={processes}') + with multiprocessing.Pool(processes) as pool: + pool.map(partial(calculate_gross_emissions.calc_emissions, emitted_pools=emitted_pools, + folder=folder), + tile_id_list) + pool.close() + pool.join() # Print the list of blank created tiles, delete the tiles, and delete their text file uu.list_and_delete_blank_tiles() + for i, output_pattern in enumerate(output_pattern_list): - for i in range(0, len(output_pattern_list)): - pattern = output_pattern_list[i] + uu.print_log(f'Adding metadata tags for pattern {output_pattern}') - uu.print_log("Adding metadata tags for pattern {}".format(pattern)) + if cn.SINGLE_PROCESSOR: + for tile_id in tile_id_list: + uu.add_emissions_metadata(tile_id, output_pattern) - if cn.count == 96: - processes = 75 # 45 processors = ~30 GB peak; 55 = XXX GB peak; 75 = XXX GB peak else: - processes = 9 - uu.print_log('Adding metadata tags max processors=', processes) - pool = multiprocessing.Pool(processes) - pool.map(partial(calculate_gross_emissions.add_metadata_tags, pattern=pattern, sensit_type=sensit_type), - tile_id_list) - pool.close() - pool.join() + if cn.count == 96: + processes = 75 # 45 processors = ~30 GB peak; 55 = XXX GB peak; 75 = XXX GB peak + else: + processes = 9 + uu.print_log(f'Adding metadata tags max processors={processes}') + with multiprocessing.Pool(processes) as pool: + pool.map(partial(uu.add_emissions_metadata, output_pattern=output_pattern), + tile_id_list) + pool.close() + pool.join() - # for tile_id in tile_id_list: - # calculate_gross_emissions.add_metadata_tags(tile_id, pattern, sensit_type) - # If no_upload flag is not activated (by choice or by lack of AWS credentials), output is uploaded - if not no_upload: + # If cn.NO_UPLOAD flag is not activated (by choice or by lack of AWS credentials), output is uploaded + if not cn.NO_UPLOAD: - for i in range(0, len(output_dir_list)): - uu.upload_final_set(output_dir_list[i], output_pattern_list[i]) + for output_dir, output_pattern in zip(output_dir_list, output_pattern_list): + uu.upload_final_set(output_dir, output_pattern) if __name__ == '__main__': @@ -263,38 +281,42 @@ def mp_calculate_gross_emissions(sensit_type, tile_id_list, emitted_pools, run_d # Two arguments for the script: whether only emissions from biomass (soil_only) is being calculated or emissions from biomass and soil (biomass_soil), # and which model type is being run (standard or sensitivity analysis) parser = argparse.ArgumentParser(description='Calculates gross emissions') - parser.add_argument('--emitted-pools-to-use', '-p', required=True, - help='Options are soil_only or biomass_soil. Former only considers emissions from soil. Latter considers emissions from biomass and soil.') parser.add_argument('--tile_id_list', '-l', required=True, help='List of tile ids to use in the model. Should be of form 00N_110E or 00N_110E,00N_120E or all.') parser.add_argument('--model-type', '-t', required=True, - help='{}'.format(cn.model_type_arg_help)) + help=f'{cn.model_type_arg_help}') parser.add_argument('--run-date', '-d', required=False, help='Date of run. Must be format YYYYMMDD.') parser.add_argument('--no-upload', '-nu', action='store_true', help='Disables uploading of outputs to s3') + parser.add_argument('--single-processor', '-sp', action='store_true', + help='Uses single processing rather than multiprocessing') + parser.add_argument('--emitted-pools-to-use', '-p', required=True, + help='Options are soil_only or biomass_soil. Former only considers emissions from soil. Latter considers emissions from biomass and soil.') args = parser.parse_args() - sensit_type = args.model_type + + # Sets global variables to the command line arguments + cn.SENSIT_TYPE = args.model_type + cn.RUN_DATE = args.run_date + cn.NO_UPLOAD = args.no_upload + cn.SINGLE_PROCESSOR = args.single_processor + cn.EMITTED_POOLS = args.emitted_pools_to_use + tile_id_list = args.tile_id_list - emitted_pools = args.emitted_pools_to_use - run_date = args.run_date - no_upload = args.no_upload # Disables upload to s3 if no AWS credentials are found in environment if not uu.check_aws_creds(): - no_upload = True + cn.NO_UPLOAD = True # Create the output log - uu.initiate_log(tile_id_list=tile_id_list, sensit_type=sensit_type, run_date=run_date, - emitted_pools=emitted_pools, no_upload=no_upload) + uu.initiate_log(tile_id_list) # Checks whether the sensitivity analysis and tile_id_list arguments are valid - uu.check_sensit_type(sensit_type) + uu.check_sensit_type(cn.SENSIT_TYPE) if 's3://' in tile_id_list: tile_id_list = uu.tile_list_s3(tile_id_list, 'std') else: tile_id_list = uu.tile_id_list_check(tile_id_list) - mp_calculate_gross_emissions(sensit_type=sensit_type, tile_id_list=tile_id_list, emitted_pools=emitted_pools, - run_date=run_date, no_upload=no_upload) + mp_calculate_gross_emissions(tile_id_list, cn.EMITTED_POOLS) diff --git a/emissions/mp_peatland_processing.py b/emissions/mp_peatland_processing.py deleted file mode 100644 index 84bcda9d..00000000 --- a/emissions/mp_peatland_processing.py +++ /dev/null @@ -1,120 +0,0 @@ -''' -This script makes mask tiles of where peat pixels are. Peat is represented by 1s; non-peat is no-data. -Between 40N and 60S, CIFOR peat and Jukka peat (IDN and MYS) are combined to map peat. -Outside that band (>40N, since there are no tiles at >60S), SoilGrids250m is used to mask peat. -Any pixel that is marked as most likely being a histosol subgroup is classified as peat. -Between 40N and 60S, SoilGrids250m is not used. -''' - - -import multiprocessing -import peatland_processing -import argparse -from functools import partial -import datetime -import sys -import os -from subprocess import Popen, PIPE, STDOUT, check_call -sys.path.append('../') -import constants_and_names as cn -import universal_util as uu - -def mp_peatland_processing(tile_id_list, run_date = None): - - os.chdir(cn.docker_base_dir) - - # If a full model run is specified, the correct set of tiles for the particular script is listed - if tile_id_list == 'all': - # List of tiles to run in the model - tile_id_list = uu.tile_list_s3(cn.pixel_area_dir) - - uu.print_log(tile_id_list) - uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n") - - - # List of output directories and output file name patterns - output_dir_list = [cn.peat_mask_dir] - output_pattern_list = [cn.pattern_peat_mask] - - - # A date can optionally be provided by the full model script or a run of this script. - # This replaces the date in constants_and_names. - # Only done if output upload is enabled. - if run_date is not None and no_upload is not None: - output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date) - - - # Download SoilGrids250 most probable soil class rasters. - # There are 459 tiles and it takes about 20 minutes to download them - cmd = ['wget', '--recursive', '--no-parent', '-nH', '--cut-dirs=7', - '--accept', '*.geotiff', '{}'.format(cn.soilgrids250_peat_url)] - uu.log_subprocess_output_full(cmd) - - uu.print_log("Making SoilGrids250 most likely soil class vrt...") - check_call('gdalbuildvrt most_likely_soil_class.vrt *{}*'.format(cn.pattern_soilgrids_most_likely_class), shell=True) - uu.print_log("Done making SoilGrids250 most likely soil class vrt") - - # Downloads peat layers - uu.s3_file_download(os.path.join(cn.peat_unprocessed_dir, cn.cifor_peat_file), cn.docker_base_dir, sensit_type) - uu.s3_file_download(os.path.join(cn.peat_unprocessed_dir, cn.jukka_peat_zip), cn.docker_base_dir, sensit_type) - - # Unzips the Jukka peat shapefile (IDN and MYS) - cmd = ['unzip', '-o', '-j', cn.jukka_peat_zip] - uu.log_subprocess_output_full(cmd) - - jukka_tif = 'jukka_peat.tif' - - # Converts the Jukka peat shapefile to a raster - uu.print_log('Rasterizing jukka peat...') - cmd= ['gdal_rasterize', '-burn', '1', '-co', 'COMPRESS=DEFLATE', '-tr', '{}'.format(cn.Hansen_res), '{}'.format(cn.Hansen_res), - '-tap', '-ot', 'Byte', '-a_nodata', '0', cn.jukka_peat_shp, jukka_tif] - uu.log_subprocess_output_full(cmd) - uu.print_log(' Jukka peat rasterized') - - # For multiprocessor use - # count-10 maxes out at about 100 GB on an r5d.16xlarge - processes=cn.count-5 - uu.print_log('Peatland preprocessing max processors=', processes) - pool = multiprocessing.Pool(processes) - pool.map(peatland_processing.create_peat_mask_tiles, tile_id_list) - pool.close() - pool.join() - - # # For single processor use, for testing purposes - # for tile_id in tile_id_list: - # - # peatland_processing.create_peat_mask_tiles(tile_id) - - output_pattern = output_pattern_list[0] - processes = 50 # 50 processors = XXX GB peak - uu.print_log("Checking for empty tiles of {0} pattern with {1} processors...".format(output_pattern, processes)) - pool = multiprocessing.Pool(processes) - pool.map(partial(uu.check_and_delete_if_empty, output_pattern=output_pattern), tile_id_list) - pool.close() - pool.join() - - uu.print_log("Uploading output files") - uu.upload_final_set(output_dir_list[0], output_pattern_list[0]) - - -if __name__ == '__main__': - - parser = argparse.ArgumentParser( - description='Creates tiles of the extent of peatlands') - parser.add_argument('--tile_id_list', '-l', required=True, - help='List of tile ids to use in the model. Should be of form 00N_110E or 00N_110E,00N_120E or all.') - parser.add_argument('--run-date', '-d', required=False, - help='Date of run. Must be format YYYYMMDD.') - args = parser.parse_args() - tile_id_list = args.tile_id_list - run_date = args.run_date - - sensit_type='std' - - # Create the output log - uu.initiate_log(tile_id_list=tile_id_list, sensit_type=sensit_type, run_date=run_date) - - # Checks whether the tile_id_list argument is valid - tile_id_list = uu.tile_id_list_check(tile_id_list) - - mp_peatland_processing(tile_id_list=tile_id_list, run_date=run_date) \ No newline at end of file diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 00000000..32037e98 --- /dev/null +++ b/pytest.ini @@ -0,0 +1,5 @@ +[pytest] +markers = + rasterio: mark test as using file system or network. + +testpaths = test diff --git a/readme.md b/readme.md index 9176b346..6e3d09e0 100644 --- a/readme.md +++ b/readme.md @@ -1,89 +1,96 @@ -## Global forest carbon flux model +## Global forest carbon flux framework ### Purpose and scope -This model maps gross annual greenhouse gas emissions from forests, -gross carbon removals (sequestration) by forests, and the difference between them -(net flux), all between 2001 and 2021. -Gross emissions includes CO2, NH4, and N20 and all carbon pools (abovegroung biomass, belowground biomass, +This framework maps gross greenhouse gas emissions from forests, +gross carbon removals (sequestration) by forests, and the difference between them (net flux), all between 2001 and 2022. +Gross emissions includes CO2, NH4, and N20 and all carbon pools (aboveground biomass, belowground biomass, dead wood, litter, and soil), and gross removals includes removals into aboveground and belowground biomass carbon. -Although the model is run for all tree canopy densities (per Hansen et al. 2013), it is most relevant to -pixels with canopy density >30% in 2000 or pixels which subsequently had tree cover gain (per Hansen et al. 2013). -It covers planted forests in most of the world, mangroves, and non-mangrove natural forests, and excludes palm oil plantations that existed more than 20 years ago. -It essentially spatially applies IPCC national greenhouse gas inventory rules (2016 guidelines) for forests. -It covers only forests converting to non-forests, non-forests converted to forests and forests remaining forests (no other land -use transitions). The model is described and published in Harris et al. (2021) Nature Climate Change -"Global maps of twenty-first century forest carbon fluxes" (https://www.nature.com/articles/s41558-020-00976-6). -Although the published model covered 2001-2019, the same methods were used to update the model to include 2021. +Although the framework is run for all tree canopy densities in 2000 (per Hansen et al. 2013), it is most relevant to +pixels with canopy density >30% in 2000 or pixels which subsequently had tree cover gain (per Potapov et al. 2022). +In addition to natural terrestrial forests, it also covers planted forests in most of the world, mangroves, and non-mangrove natural forests. +The framework essentially spatially applies IPCC national greenhouse gas inventory rules (2016 guidelines) for forests. +It covers only forests converted to non-forests, non-forests converted to forests and forests remaining forests (no other land +use transitions). The framework is described and published in [Harris et al. (2021) Nature Climate Change +"Global maps of twenty-first century forest carbon fluxes"](https://www.nature.com/articles/s41558-020-00976-6). +Although the original manuscript covered 2001-2019, the same methods were used to update the framework to include 2022, +with a few changes to some input layers and constants. You can read about the changes since publication +[here](https://www.globalforestwatch.org/blog/data-and-research/whats-new-carbon-flux-monitoring). ### Inputs -Well over twenty inputs are needed to run this model. Most are spatial, but some are tabular. +Well over twenty inputs are needed for this framework. Most are spatial, but some are tabular. All spatial data are converted to 10x10 degree raster tiles at 0.00025x0.00025 degree resolution -(approximately 30x30 m at the equator) before inclusion in the model. The tabular data are generally annual biomass removal (i.e. -sequestration) factors (e.g., mangroves, planted forests, natural forests), which are then applied to spatial data. +(approximately 30x30 m at the equator) before ingestion. Spatial data include annual tree cover loss, biomass densities in 2000, drivers of tree cover loss, -ecozones, tree cover extent in 2000, elevation, etc. Different inputs are needed for different -steps in the model. This repository includes scripts for processing all of the needed inputs. -Many inputs can be processed the same way (e.g., many rasters can be processed using the same gdal function) but some need special treatment. -The input processing scripts are scattered among almost all the folders, unfortunately, a historical legacy of how I built this out -which I haven't fixed. The data prep scripts are generally in the folder for which their outputs are most relevant. +ecozones, tree cover extent in 2000, elevation, etc. +Many inputs can be processed the same way (e.g., many rasters can be processed using the same `gdal` function) but some need special treatment. +The input processing scripts are mostly in the `data_prep` folder but a few are unfortunately in other folders. +The tabular data are generally annual biomass removal (i.e. +sequestration) factors (e.g., mangroves, planted forests, natural forests), which are then applied to spatial data. +Different inputs are needed for different steps in the framework. Inputs can either be downloaded from AWS s3 storage or used if found locally in the folder `/usr/local/tiles/` in the Docker container -(see below for more on the Docker container). -The model looks for files locally before downloading them. -The model can still be run without AWS credentials; inputs will be downloaded from s3 but outputs will not be uploaded to s3. +in which the framework runs (see below for more on the Docker container). +The framework looks for files locally before downloading them in order to reduce run time. +The framework can still be run without AWS credentials; inputs will be downloaded from s3 but outputs will not be uploaded to s3. In that case, outputs will only be stored locally. +A complete list of inputs, including changes made to the framework, can be found +[here](http://gfw2-data.s3.amazonaws.com/climate/carbon_model/Table_S3_data_sources__updated_20230406.pdf). + ### Outputs -There are three key outputs produced: gross GHG emissions, gross removals, and net flux, all totaled for 2001-2021. +There are three key outputs produced: gross GHG emissions, gross removals, and net flux, all summed per pixel for 2001-2022. These are produced at two resolutions: 0.00025x0.00025 degrees (approximately 30x30 m at the equator) in 10x10 degree rasters (to make outputs a manageable size), and 0.04x0.04 degrees (approximately 4x4km at the equator) as global rasters for static maps. -Model runs also automatically generate a txt log. This log includes nearly everything that is output in the console. -This log is useful for documenting model runs and checking for mistakes/errors in retrospect, although it does not capture errors that terminate the model. -For example, users can examine it to see if the correct input tiles were downloaded or if the intended tiles were used during the model run. +Framework runs also automatically generate a .txt log. This log includes nearly everything that is output in the console. +This log is useful for documenting framework runs and checking for mistakes/errors in retrospect, +although it does not capture errors that terminate runs. +For example, users can examine it to see if the correct input tiles were downloaded or if the intended tiles were used when running the framework. -Output rasters and model logs are uploaded to s3 unless the `--no-upload` flag (`-nu`) is activated as a command line argument +Output rasters and logs are uploaded to s3 unless the `--no-upload` flag (`-nu`) is activated as a command line argument or no AWS s3 credentials are supplied to the Docker container. -When either of these happens, neither raster outputs nor logs are uploaded to s3. This is good for local test runs or versions -of the model that are independent of s3 (that is, inputs are stored locally and no on s3, and the user does not have -a connection to s3 storage or s3 credentials). +This is good for local test runs or versions of the framework that are independent of s3 +(that is, inputs are stored locally and not on s3, and the user does not have a connection to s3 storage or s3 credentials). #### 30-m output rasters -The 30-m outputs are used for zonal statistics analyses (i.e. emissions, removals, or net in polygons of interest) +The 30-m outputs are used for zonal statistics (i.e. emissions, removals, or net flux in polygons of interest) and mapping on the Global Forest Watch web platform or at small scales (where 30-m pixels can be distinguished). -Individual emissions can be assigned years based on Hansen loss during further analyses -but removals and net flux are cumulative over the entire model run and cannot be assigned specific years. -This 30-m output is in megagrams (Mg) CO2e/ha 2001-2021 (i.e. densities) and includes all tree cover densities ("full extent"): -`(((TCD2000>0 AND WHRC AGB2000>0) OR Hansen gain=1 OR mangrove AGB2000>0) NOT IN pre-2000 plantations)`. -However, the model is designed to be used specifically for forests, so the model creates three derivative 30-m -outputs for each key output (gross emissions, gross removals, net flux) as well -(only for the standard model, not for sensitivity analyses): - -1) Per pixel values for the full model extent (all tree cover densities): - `(((TCD2000>0 AND WHRC AGB2000>0) OR Hansen gain=1 OR mangrove AGB2000>0) NOT IN pre-2000 plantations)` -2) Per hectare values for forest pixels only (colloquially, TCD>30 or Hansen gain pixels): +Individual emissions pixels can be assigned specific years based on Hansen loss during further analyses +but removals and net flux are cumulative over the entire framework run and cannot be assigned specific years. +This 30-m output is in megagrams (Mg) CO2e/ha 2001-2022 (i.e. densities) and includes all tree cover densities ("full extent"): +`((TCD2000>0 AND WHRC AGB2000>0) OR Hansen gain=1 OR mangrove AGB2000>0)`. +However, the framework is designed to be used specifically for forests, so the framework creates three derivative 30-m +outputs for each key output (gross emissions, gross removals, net flux) as well (only for the standard version, not for sensitivity analyses). +To that end, the "forest extent" rasters also have pre-2000 oil palm plantations in Indonesia and Malaysia removed +from them because carbon emissions and removals in those pixels would represent agricultural/tree crop emissions, +not forest/forest loss. + +1) Mg CO2e per pixel values for the full extent (all tree cover densities): + `((TCD2000>0 AND WHRC AGB2000>0) OR Hansen gain=1 OR mangrove AGB2000>0)` +2) Mg CO2e per hectare values for forest pixels only (colloquially, TCD>30 or Hansen gain pixels): `(((TCD2000>30 AND WHRC AGB2000>0) OR Hansen gain=1 OR mangrove AGB2000>0) NOT IN pre-2000 plantations)` -3) Per pixel values for forest pixels only (colloquially, TCD>30 or Hansen gain pixels): +3) Mg CO2e per pixel values for forest pixels only (colloquially, TCD>30 or Hansen gain pixels): `(((TCD2000>30 AND WHRC AGB2000>0) OR Hansen gain=1 OR mangrove AGB2000>0) NOT IN pre-2000 plantations)` The per hectare outputs are used for making pixel-level maps (essentially showing emission and removal factors), while the per pixel outputs are used for getting total values within areas because the values -of those pixels can be summed within areas of interest. The per pixel maps are `per hectare * pixel area/10000`. +of those pixels can be summed within areas of interest. The per pixel maps are calculated by `per hectare * pixel area/10000`. (The pixels of the per hectare outputs should not be summed but they can be averaged in areas of interest.) -Statistics from this model should always be based on the "forest extent" rasters, not the "full extent" rasters. -The full model extent outputs should generally not be used but are created by the model in case they are needed. +Statistics from this framework should always be based on the "forest extent" rasters, not the "full extent" rasters. +The full extent outputs should generally not be used but are created by the framework in case they are needed. -In addition to these three key outputs, there are many intermediate output rasters from the model, +In addition to these three key outputs, there are many intermediate output rasters from the framework, some of which may be useful for QC, analyses by area of interest, or other purposes. All of these are at 0.00025x0.00025 degree resolution and reported as per hectare values (as opposed to per pixel values), if applicable. Intermediate outputs include the annual aboveground and belowground biomass removal rates for all kinds of forests, the type of removal factor applied to each pixel, the carbon pool densities in 2000, carbon pool densities in the year of tree cover loss, and the number of years in which removals occurred. -Almost all model output have metadata associated with them, viewable using the `gdalinfo` command line utility (https://gdal.org/programs/gdalinfo.html). -Metadata includes units, date created, model version, geographic extent, and more. Unfortunately, the metadata are not viewable +Almost all framework output have metadata associated with them, +viewable using the `gdalinfo` command line utility (https://gdal.org/programs/gdalinfo.html). +Metadata includes units, date created, framework version, geographic extent, and more. Unfortunately, the metadata are not viewable when looking at file properties in ArcMap or in the versions of these files downloadable from the Global Forest Watch Open Data Portal (https://data.globalforestwatch.org/). @@ -96,31 +103,36 @@ per pixel 30-m rasters, not the "full extent" 30-m rasters. They should not be u #### A note on signs Although gross emissions are traditionally given positive (+) values and -gross removals are traditionally given negative (-) values, the 30-m gross removals rasters are positive, while the 4-km gross removals rasters are negative. +gross removals are traditionally given negative (-) values, +the 30-m gross removals rasters are positive, while the 4-km gross removals rasters are negative. Net flux at both scales can be positive or negative depending on the balance of emissions and removals in the area of interest (negative for net sink, positive for net source). -### Running the model -The model runs from the command line inside a Linux Docker container. -Once you have Docker configured on your system, have cloned this repository, -and have configured access to AWS (if desired, or have the input files stored in the correct local folder), -you will be able to run the model. +### Running the framework +The framework runs from the command line inside a Linux Docker container. +Once you have Docker configured on your system (download from Docker website), +have cloned this repository (on the command line in the folder you want to clone to, `git clone https://github.com/wri/carbon-budget`), +and have configured access to AWS (if desired), you will be able to run the framework. +You can run the framework anywhere that the Docker container can be launched. That includes local computers (good for +running test areas) and AWS ec2 instances (good for larger areas/global runs). -There are two ways to run the model: as a series of individual scripts, or from a master script, which runs the individual scripts sequentially. -Which one to use depends on what you are trying to do. Generally, the individual scripts (which correspond to specific model stages) are +There are two ways to run the framework: as a series of individual scripts, or from a master script, which runs the individual scripts sequentially. +Which one to use depends on what you are trying to do. +Generally, the individual scripts (which correspond to specific framework stages) are more appropriate for development and testing, while the master script is better for running -the main part of the model from start to finish in one go. In either case, the code must be cloned from this repository -(on the command line in the folder you want to clone to, `git clone https://github.com/wri/carbon-budget`). -Run globally, both options iterate through a list of ~275 10 x 10 degree tiles. (Different model stages have different numbers of tiles.) -Run all tiles in the model extent fully through one model stage before starting on the next stage. -(The master script does this automatically.) If a user wants to run the model on just one or a few tiles, +the main part of the framework from start to finish in one go. +Run globally, both options iterate through a list of ~275 10 x 10 degree tiles. (Different framework stages have different numbers of tiles.) +Run all tiles in the framework extent fully through one framework stage before starting on the next stage. +(The master script does this automatically.) If a user wants to run the framework on just one or a few tiles, that can be done through a command line argument (`--tile-id-list` or `-l`). If individual tiles are listed, only those will be run. This is a natural system for testing or for -running the model for individual countries. You can see the tile boundaries in pixel_area_tile_footprints.zip. -For example, to run the model for Madagascar, only tiles 10S_040E, 10S_050E, and 20S_040E need to be run and the +running the framework for smaller areas. You can see the tile boundaries in `pixel_area_tile_footprints.zip` in this repo. +For example, to run the framework for Madagascar, only tiles 10S_040E, 10S_050E, and 20S_040E need to be run and the command line argument would be `-l 10S_040E,10S_050E,20S_040E`. +#### Building the Docker container + You can do the following on the command line in the same folder as the repository on your system. This will enter the command line in the Docker container @@ -129,14 +141,14 @@ In my setup, `C:/GIS/Carbon_model/test_tiles/docker_output/` on my computer is m the Docker container in `docker-compose.yaml`. If running on another computer, you will need to change the local folder being mapped in `docker-compose.yaml` to match your computer's directory structure. I do this for development and testing. -If you want the model to be able to download from and upload to s3, you will also need to provide +If you want the framework to be able to download from and upload to s3, you will also need to provide your own AWS secret key and access key as environment variables (`-e`) in the `docker-compose run` command: `docker-compose build` `docker-compose run --rm -e AWS_SECRET_ACCESS_KEY=... -e AWS_ACCESS_KEY_ID=... carbon-budget` -If you don't have AWS credentials, you can still run the model in the docker container but uploads will +If you don't have AWS credentials, you can still run the framework in the docker container but uploads will not occur. In this situation, you need all the basic input files for all tiles in the docker folder `/usr/local/tiles/` on your computer: @@ -144,38 +156,37 @@ on your computer: `docker-compose run --rm carbon-budget` -For runs on an AWS r5d spot machine (for full model runs), use `docker build`. -You need to supply AWS credentials for the model to work because otherwise you won't be able to get -output tiles off of the spot machine. +For runs on an AWS r5d ec2 instance (for full framework runs), use `docker build`. +You need to supply AWS credentials for the framework to work because otherwise you won't be able to get +output tiles off of the spot machine and you will lose your outputs when you terminate the spot machine. `docker build . -t gfw/carbon-budget` `docker run --rm -it -e AWS_SECRET_ACCESS_KEY=... -e AWS_ACCESS_KEY_ID=... gfw/carbon-budget` -Before doing a model run, confirm that the dates of the relevant input and output s3 folders are correct in `constants_and_names.py`. +Before doing a framework run, confirm that the dates of the relevant input and output s3 folders are correct in `constants_and_names.py`. Depending on what exactly the user is running, the user may have to change lots of dates in the s3 folders or change none. -Unfortunately, I can't really give better guidance than that; it really depends on what part of the model is being run and how. +Unfortunately, I can't really give better guidance than that; it really depends on what part of the framework is being run and how. (I want to make the situations under which users change folder dates more consistent eventually.) -The model can be run either using multiple processors or one processor. The former is for large scale model runs, -while the latter is for model development or running on small-ish countries that use only a few tiles. -The user can switch between these two versions by commenting out -the appropriate code chunks in each script. The single-processor option is commented out by default. +The framework can be run either using multiple processors or one processor. The former is for large scale framework runs, +while the latter is for framework development or running on small-ish countries that use only a few tiles. +The user can limit use to just one processor with the `-sp` command line flag. One important thing to note is that if a user tries to use too many processors, the system will run out of memory and -can crash (particularly on AWS EC2 instances). Thus, it is important not to use too many processors at once. -Generally, the limitation in running the model is the amount of memory available on the system rather than the number of processors. +can crash (particularly on AWS ec2 instances). Thus, it is important not to use too many processors at once. +Generally, the limitation in running the framework is the amount of memory available on the system rather than the number of processors. Each script has been somewhat calibrated to use a safe number of processors for an r5d.24xlarge EC2 instance, and often the number of processors being used is 1/2 or 1/3 of the actual number available. If the tiles were smaller (e.g., 1x1 degree), more processors could be used but then there'd also be more tiles to process, so I'm not sure that would be any faster. -Users can track memory usage in realtime using the `htop` command line utility in the Docker container. +Users can track memory usage in real time using the `htop` command line utility in the Docker container. #### Individual scripts -The flux model is comprised of many separate scripts (or stages), each of which can be run separately and -has its own inputs and output(s). Combined, these comprise the flux model. There are several data preparation -scripts, several for the removals (sequestration/gain) model, a few to generate carbon pools, one for calculating -gross emissions, one for calculating net flux, one for aggregating key results into coarser -resolution rasters for mapping, and one for creating per-pixel and forest-extent outputs (supplementary outputs). +The flux framework is comprised of many separate scripts (or stages), each of which can be run separately and +has its own inputs and output(s). There are several data preparation +scripts, several for the removals (sequestration/gain) framework, a few to generate carbon pools, one for calculating +gross emissions, one for calculating net flux, one for creating derivative outputs +(aggregating key results into coarser resolution rasters for mapping and creating per-pixel and forest-extent outputs). Each script really has two parts: its `mp_` (multiprocessing) part and the part that actually does the calculations on each 10x10 degree tile. The `mp_` scripts (e.g., `mp_create_model_extent.py`) are the ones that are run. They download input files, @@ -184,145 +195,167 @@ then initiate the actual work done on each tile in the script without the `mp_` The order in which the individual stages must be run is very specific; many scripts depend on the outputs of other scripts. Looking at the files that must be downloaded for the script to run will show what files must already be created and therefore what scripts must have already been -run. Alternatively, you can look at the top of `run_full_model.py` to see the order in which model stages are run. +run. Alternatively, you can look at the top of `run_full_model.py` to see the order in which framework stages are run. The date component of the output directory on s3 generally must be changed in `constants_and_names.py` for each output file. -##### Running the emissions model -The gross emissions script is the only part of the model that uses C++. Thus, it must be manually compiled before running. -There are a few different versions of the emissions script: one for the standard model and a few other for -sensitivity analyses. -The command for compiling the C++ script is (subbing in the actual file name): +Stages are run from the project folder as Python modules: `/usr/local/app# python -m [folder.script] [arguments]` + +For example: + +Extent stage: `/usr/local/app# python -m data_prep.mp_model_extent -l 00N_000E -t std -nu` + +Carbon pool creation stage: `/usr/local/app# python -m carbon_pools.mp_create_carbon_pools -l 00N_000E,10S_050W -t std -ce loss -d 20239999` + +##### Running the emissions stage +The gross emissions script is the only part of the framework that uses C++. Thus, the appropriate version of the C++ +emissions file must be compiled for emissions to run. +There are a few different versions of the emissions C++ script: one for the standard version and a few other for +sensitivity analyses. +`mp_calculate_gross_emissions.py` will compile the correct C++ file each time it is run, so the C++ file does not +need to be compiled manually. +However, for completeness, the command for compiling the C++ script is (subbing in the actual file name): `c++ /usr/local/app/emissions/cpp_util/calc_gross_emissions_[VERSION].cpp -o /usr/local/app/emissions/cpp_util/calc_gross_emissions_[VERSION].exe -lgdal` -For the standard model and the sensitivity analyses that don't specifically affect emissions, it is: +For the standard framework and the sensitivity analyses that don't specifically affect emissions, it is: `c++ /usr/local/app/emissions/cpp_util/calc_gross_emissions_generic.cpp -o /usr/local/app/emissions/cpp_util/calc_gross_emissions_generic.exe -lgdal` +`mp_calculate_gross_emissions.py` can also be used to calculate emissions from soil only. +This is set by the `-p` argument: `biomass_soil` or `soil_only`. + +Emissions stage: `/usr/local/app# python -m emissions.mp_calculate_gross_emissions -l 30N_090W,10S_010E -t std -p biomass_soil -d 20239999` + #### Master script -The master script runs through all of the non-preparatory scripts in the model: some removal factor creation, gross removals, carbon -pool generation, gross emissions, net flux, aggregation, and supplementary output creation. -It includes all the arguments needed to run -every script. Thus, the table below also explains the potential arguments for the individual model stages. -The user can control what model components are run to some extent and set the date part of -the output directories. The emissions C++ code has to be be compiled before running the master script (see below). +The master script runs through all of the non-preparatory scripts in the framework: some removal factor creation, gross removals, carbon +pool generation, gross emissions for biomass+soil, gross emissions for soil only, +net flux, aggregation, and derivative output creation. +It includes all the arguments needed to run every script. +Thus, the table below also explains the potential arguments for the individual framework stages. +The user can control what framework components are run to some extent and set the date part of +the output directories. The order in which the arguments are used does not matter (does not need to match the table below). Preparatory scripts like creating soil carbon tiles or mangrove tiles are not included in the master script because -they are run very infrequently. +they are run very infrequently. | Argument | Short argument | Required/Optional | Relevant stage | Description | | -------- | ----- | ----------- | ------- | ------ | -| `model-type` | `-t` | Required | All | Standard model (`std`) or a sensitivity analysis. Refer to `constants_and_names.py` for valid list of sensitivity analyses. | -| `stages` | `-s` | Required | All | The model stage at which the model should start. `all` will run the following stages in this order: model_extent, forest_age_category_IPCC, annual_removals_IPCC, annual_removals_all_forest_types, gain_year_count, gross_removals_all_forest_types, carbon_pools, gross_emissions, net_flux, aggregate, create_supplementary_outputs | +| `model-type` | `-t` | Required | All | Standard version (`std`) or a sensitivity analysis. Refer to `constants_and_names.py` for valid list of sensitivity analyses. | +| `stages` | `-s` | Required | All | The framework stage at which the run should start. `all` will run the following stages in this order: model_extent, forest_age_category_IPCC, annual_removals_IPCC, annual_removals_all_forest_types, gain_year_count, gross_removals_all_forest_types, carbon_pools, gross_emissions_biomass_soil, gross_emissions_soil_only, net_flux, create_derivative_outputs | +| `tile-id-list` | `-l` | Required | All | List of tile ids to use in the framework. Should be of form `00N_110E` or `00N_110E,00N_120E` or `all` | | `run-through` | `-r` | Optional | All | If activated, run stage provided in `stages` argument and all following stages. Otherwise, run only stage in `stages` argument. Activated with flag. | -| `run-date` | `-d` | Required | All | Date of run. Must be format YYYYMMDD. This sets the output folder in s3. | -| `tile-id-list` | `-l` | Required | All | List of tile ids to use in the model. Should be of form 00N_110E or 00N_110E,00N_120E or all | -| `no-upload` | `-nu` | Optional | All | No files are uploaded to s3 during or after model run (including logs and model outputs). Use for testing to save time. When AWS credentials are not available, upload is automatically disabled and this flag does not have to be manually activated. | -| `save-intermdiates` | `-si`| Optional | `run_full_model.py` | Intermediate outputs are not deleted within `run_full_model.py`. Use for local model runs. If uploading to s3 is not enabled, intermediate files are automatically saved. | +| `run-date` | `-d` | Optional | All | Date of run. Must be format YYYYMMDD. This sets the output folder in s3. | +| `no-upload` | `-nu` | Optional | All | No files are uploaded to s3 during or after framework run (including logs and framework outputs). Use for testing to save time. When AWS credentials are not available, upload is automatically disabled and this flag does not have to be manually activated. | +| `single-processor` | `-sp` | Optional | All | Tile processing will be done without `multiprocessing` module whenever possible, i.e. no parallel processing. Use for testing. | | `log-note` | `-ln`| Optional | All | Adds text to the beginning of the log | | `carbon-pool-extent` | `-ce` | Optional | Carbon pool creation | Extent over which carbon pools should be calculated: loss or 2000 or loss,2000 or 2000,loss | -| `pools-to-use` | `-p` | Optional | Emissions| Options are soil_only or biomass_soil. Former only considers emissions from soil. Latter considers emissions from biomass and soil. | -| `tcd-threshold` | `-tcd`| Optional | Aggregation | Tree cover density threshold above which pixels will be included in the aggregation. Defaults to 30. | -| `std-net-flux-aggreg` | `-std` | Optional | Aggregation | The s3 standard model net flux aggregated tif, for comparison with the sensitivity analysis map. | +| `std-net-flux-aggreg` | `-std` | Optional | Aggregation | The s3 standard framework net flux aggregated tif, for comparison with the sensitivity analysis map. | +| `save-intermdiates` | `-si`| Optional | `run_full_model.py` | Intermediate outputs are not deleted within `run_full_model.py`. Use for local framework runs. If uploading to s3 is not enabled, intermediate files are automatically saved. | | `mangroves` | `-ma` | Optional | `run_full_model.py` | Create mangrove removal factor tiles as the first stage. Activate with flag. | | `us-rates` | `-us` | Optional | `run_full_model.py` | Create US-specific removal factor tiles as the first stage (or second stage, if mangroves are enabled). Activate with flag. | -These are some sample commands for running the flux model in various configurations. You wouldn't necessarily want to use all of these; -they simply illustrate different configurations for the command line arguments. - -Run 00N_000E in standard model; save intermediate outputs; upload outputs to s3; run all model stages; -starting from the beginning; get carbon pools at time of loss; emissions from biomass and soil: +These are some sample commands for running the flux framework in various configurations. You wouldn't necessarily want to use all of these; +they simply illustrate different configurations for the command line arguments. +Like the individual framework stages, the full framework run script is also run from the project folder with the `-m` flag. -`python run_full_model.py -si -t std -s all -r -d 20229999 -l 00N_000E -ce loss -p biomass_soil -tcd 30 -ln "00N_000E test"` +Run: standard version; save intermediate outputs; run framework from annual_removals_IPCC; +upload to folder with date 20239999; run 00N_000E; get carbon pools at time of loss; add a log note; +use multiprocessing (implicit because no `-sp` flag); only run listed stage (implicit because no -r flag) -Run 00N_110E in standard model; save intermediate outputs; don't upload outputs to s3; -start at forest_age_category_IPCC step; run all stages after that; get carbon pools at time of loss; emissions from biomass and soil: +`python -m run_full_model -t std -si -s annual_removals_IPCC -d 20239999 -l 00N_000E -ce loss -ln "00N_000E test"` -`python run_full_model.py -si -nu -t std -s forest_age_category_IPCC -r -d 20229999 -l 00N_000E -ce loss -p biomass_soil -tcd 30 -ln "00N_000E test"` +Run: standard version; save intermediate outputs; run framework from annual_removals_IPCC; run all subsequent framework stages; +do not upload outputs to s3; run 00N_000E; get carbon pools at time of loss; add a log note; +use multiprocessing (implicit because no -sp flag) -Run 00N_000E and 00N_110E in standard model; don't save intermediate outputs; do upload outputs to s3; -run model_extent step; don't run sunsequent steps (no `-r` flag); run mangrove step beforehand: +`python -m run_full_model -t std -si -s annual_removals_IPCC -r -nu -l 00N_000E -ce loss -ln "00N_000E test"` -`python run_full_model.py -t std -s model_extent -d 20229999 -l 00N_000E,00N_110E -ma -ln "Two tile test"` +Run: standard version; save intermediate outputs; run framework from the beginning; run all framework stages; +upload to folder with date 20239999; run 00N_000E; get carbon pools at time of loss; add a log note; +use multiprocessing (implicit because no -sp flag) -Run 00N_000E, 00N_110E, and 30N_090W in standard model; save intermediate outputs; do upload outputs to s3; -start at gross_emissions step; run all stages after that; emissions from soil only: +`python -m run_full_model -t std -si -s all -r -d 20239999 -l 00N_000E -ce loss -ln "00N_000E test"` -`python run_full_model.py -si -t std -s gross_emissions -r -d 20229999 -l 00N_000E,00N_110E,30N_090W -p soil_only -tcd 30 -ln "Three tile test"` +Run: standard version; save intermediate outputs; run framework from the beginning; run all framework stages; +upload to folder with date 20239999; run 00N_000E, 10N_110E, and 50N_080W; get carbon pools at time of loss; +add a log note; use multiprocessing (implicit because no -sp flag) -FULL STANDARD MODEL RUN: Run all tiles in standard model; save intermediate outputs; do upload outputs to s3; -run all model stages; starting from the beginning; get carbon pools at time of loss; emissions from biomass and soil: +`python -m run_full_model -t std -si -s all -r -d 20239999 -l 00N_000E,10N_110E,50N_080W -ce loss -ln "00N_000E test"` -`python run_full_model.py -si -t std -s all -r -l all -ce loss -p biomass_soil -tcd 30 -ln "Run all tiles"` +Run: standard version; run framework from the beginning; run all framework stages; +upload to folder with date 20239999; run 00N_000E and 00N_010E; get carbon pools at time of loss; +use singleprocessing; add a log note; do not save intermediate outputs (implicit because no -si flag) -Run three tiles in biomass_swap sensitivity analysis; don't upload intermediates (forces saving of intermediate outputs); -run model_extent stage; don't continue after that stage (no run-through); get carbon pools at time of loss; emissions from biomass and soil; -compare aggregated outputs to specified file (although not used in this specific launch because only the first step runs): +`python -m run_full_model -t std -s all -r -nu -d 20239999 -l 00N_000E,00N_010E -ce loss -sp -ln "Two tile test"` -`python run_full_model.py -nu -t biomass_swap -s model_extent -r false -d 20229999 -l 00N_000E,00N_110E,40N_90W -ce loss -p biomass_soil -tcd 30 -sagg s3://gfw2-data/climate/carbon_model/0_04deg_output_aggregation/biomass_soil/standard/20200914/net_flux_Mt_CO2e_biomass_soil_per_year_tcd30_0_4deg_modelv1_2_0_std_20200914.tif -ln "Multi-tile test"` +FULL STANDARD FRAMEWORK RUN: standard framework; save intermediate outputs; run framework from the beginning; run all framework stages; +run all tiles; get carbon pools at time of loss; add a log note; +upload outputs to s3 with dates specified in `constants_and_names.py` (implicit because no -nu flag); +use multiprocessing (implicit because no -sp flag) +`python -m run_full_model -t std -si -s all -r -l all -ce loss -ln "Running all tiles"` ### Sensitivity analysis -Several variations of the model are included; these are the sensitivity variants, as they use different inputs or parameters. +NOT SUPPORTED AT THIS TIME. + +Several variations of the framework are included; these are the sensitivity variants, as they use different inputs or parameters. They can be run by changing the `--model-type` (`-t`) argument from `std` to an option found in `constants_and_names.py`. -Each sensitivity analysis variant starts at a different stage in the model and runs to the final stage, +Each sensitivity analysis variant starts at a different stage in the framework and runs to the final stage, except that sensitivity analyses do not include the creation of the supplementary outputs (per pixel tiles, forest extent tiles). Some use all tiles and some use a smaller extent. | Sensitivity analysis | Description | Extent | Starting stage | | -------- | ----------- | ------ | ------ | -| `std` | Standard model | Global | `mp_model_extent.py` | +| `std` | Standard framework | Global | `mp_model_extent.py` | | `maxgain` | Maximum number of years of gain (removals) for gain-only and loss-and-gain pixels | Global | `gain_year_count_all_forest_types.py` | | `no_shifting_ag` | Shifting agriculture driver is replaced with commodity-driven deforestation driver | Global | `mp_calculate_gross_emissions.py` | -| `convert_to_grassland` | Forest is assumed to be converted to grassland instead of cropland in the emissions model| Global | `mp_calculate_gross_emissions.py` | +| `convert_to_grassland` | Forest is assumed to be converted to grassland instead of cropland in the emissions framework| Global | `mp_calculate_gross_emissions.py` | | `biomass_swap` | Uses Saatchi 1-km AGB map instead of Baccini 30-m map for starting carbon densities | Extent of Saatchi map, which is generally the tropics| `mp_model_extent.py` | | `US_removals` | Uses IPCC default removal factors for the US instead of US-specific removal factors from USFS FIA | Continental US | `mp_annual_gain_rate_AGC_BGC_all_forest_types.py` | | `no_primary_gain` | Primary forests and IFLs are assumed to not have any removals| Global | `mp_forest_age_category_IPCC.py` | | `legal_Amazon_loss` | Uses Brazil's PRODES annual deforestation system instead of Hansen loss | Legal Amazon| `mp_model_extent.py` | -| `Mekong_loss` | Uses Hansen loss v2.0 (multiple loss in same pixel). NOTE: Not used for flux model v1.2.0, so this is not currently supported. | Mekong region | N/A | +| `Mekong_loss` | Uses Hansen loss v2.0 (multiple loss in same pixel). NOTE: Not used for flux framework v1.2.0, so this is not currently supported. | Mekong region | N/A | -### Updating the model with new tree cover loss -For the current general configuration of the model, these are the changes that need to be made to update the -model with a new year of tree cover loss data. In the order in which the changes would be needed for rerunning the model: +### Updating the framework with new tree cover loss +For the current general configuration of the framework, these are the changes that need to be made to update the +framework with a new year of tree cover loss data. In the order in which the changes would be needed for rerunning the framework: -1) Update the model version variable `version` in `constants_and_names.py`. +1) Update the framework version variable `version` in `constants_and_names.py`. 2) Change the tree cover loss tile source to the new tree cover loss tiles in `constants_and_names.py`. Change the tree cover loss tile pattern in `constants_and_names.py`. 3) Change the number of loss years variable `loss_years` in `constants_and_names.py`. -4) In `constants.h` (emissions/cpp_util/), change the number of model years (`int model_years`) and the loss tile pattern (`char lossyear[]`). - -5) In `equations.cpp` (emissions/cpp_util/), change the number of model years (`int model_years`). +4) In `constants.h` (emissions/cpp_util/), change the number of framework years (`int model_years`) + and the loss tile pattern (`char lossyear[]`). -6) Make sure that changes in forest age category produced by `mp_forest_age_category_IPCC.py` - and the number of gain years produced by `mp_gain_year_count_all_forest_types.py` still make sense. +5) In `equations.cpp` (emissions/cpp_util/), change the number of framework years (`int model_years`). -7) Obtain and pre-process the updated drivers of tree cover loss model in `mp_prep_other_inputs.py` - (comment out everything except the drivers lines). Note that the drivers map probably needs to be reprojected to WGS84 - and resampled (0.005x0.005 deg) in ArcMap or similar before processing into 0.00025x0.00025 deg 10x10 tiles using this script. +6) Obtain and pre-process the updated drivers of tree cover loss framework and tree cover loss from fires + using `mp_prep_other_inputs_annual.py`. Note that the drivers map probably needs to be reprojected to WGS84 + and resampled (0.005x0.005 deg) in ArcMap or similar + before processing into 0.00025x0.00025 deg 10x10 tiles using this script. + `mp_prep_other_inputs_annual.py` has some additional notes about that. -8) Create a new year of burned area data using `mp_burn_year.py` (multiple changes to script needed, and potentially - some reworking if the burned area ftp site has changed its structure or download protocol). - Further instructions are at the top of `burn_date/mp_burn_year.py`. +7) Make sure that changes in forest age category produced by `mp_forest_age_category_IPCC.py` + and the number of gain years produced by `mp_gain_year_count_all_forest_types.py` still make sense. -Strictly speaking, if only the drivers, burn year, and tree cover loss are being updated, the model only needs to be -run from forest_age_category_IPCC onwards (loss affects IPCC age category but model extent isn't affected by -any of these inputs). -However, for completeness, I suggest running all stages of the model from model_extent onwards for an update so that -model outputs from all stages have the same version in their metadata and the same dates of output as the model stages -that are actually being changed. A full model run (all tiles, all stages) takes about 18 hours on an r5d.24xlarge +Strictly speaking, if only the drivers, tree cover loss from fires, and tree cover loss are being updated, +the framework only needs to be run from forest_age_category_IPCC onwards (loss affects IPCC age category). +However, for completeness, I suggest running all stages of the framework from model_extent onwards for an update so that +framework outputs from all stages have the same version in their metadata and the same dates of output as the framework stages +that are actually being changed. A full framework run (all tiles, all stages) takes about 18 hours on an r5d.24xlarge EC2 instance with 3.7 TB of storage and 96 processors. -### Other modifications to the model -It is recommended that any changes to the model be tested in a local Docker instance before running on an EC2 instance. -I like to output files to test folders on s3 with dates 20229999 because that is clearly not a real run date. +### Other modifications to the framework +It is recommended that any changes to the framework be tested in a local Docker instance before running on an ec2 instance. +I like to output files to test folders on s3 with dates 20239999 because that is clearly not a real run date. A standard development route is: -1) Make changes to a single model script and run using the single processor option on a single tile (easiest for debugging) in local Docker. +1) Make changes to a single framework script and run using the single processor option on a single tile (easiest for debugging) in local Docker. 2) Run single script on a few representative tiles using a single processor in local Docker. @@ -331,7 +364,7 @@ A standard development route is: 4) Run the master script on a few representative tiles using multiple processor option in local Docker to confirm that changes work when using master script. -5) Run single script on a few representative tiles using multiple processors on EC2 instance (need to commit and push changes to GitHub first). +5) Run single script on a few representative tiles using multiple processors on ec2 instance (need to commit and push changes to GitHub first). 6) Run master script on all tiles using multiple processors on EC2 instance. If the changes likely affected memory usage, make sure to watch memory with `htop` to make sure that too much memory isn't required. @@ -340,14 +373,22 @@ A standard development route is: Depending on the complexity of the changes being made, some of these steps can be ommitted. Or if only a few tiles are being modeled (for a small country), only steps 1-4 need to be done. +### Running framework tests +There is an incipient testing component using `pytest`. It is currently only available for the deadwood and litter +carbon pool creation step of the framework but can be expanded to other aspects of the framework. +Tests can be run from the project folder with the command `pytest`. +You can get more verbose output with `pytest -s`. +To run tests that just have a certain flag (e.g., `rasterio`), you can do `pytest -m rasterio -s`. + + ### Dependencies -Theoretically, this model should run anywhere that the correct Docker container can be started +Theoretically, this framework should run anywhere that the correct Docker container can be started and there is access to the AWS s3 bucket or all inputs are in the correct folder in the Docker container. The Docker container should be self-sufficient in that it is configured to include the right Python packages, C++ compiler, GDAL, etc. It is described in `Dockerfile`, with Python requirements (installed during Docker creation) in `requirements.txt`. -On an AWS EC2 instance, I have only run it on r5d instance types but it might be able to run on others. -At the least, it needs a certain type of memory configuration on the EC2 instance (at least one large SSD volume, I believe). -Otherwise, I do not know the limitations and constraints on running this model in an EC2 instance. +On an AWS ec2 instance, I have only run it on r5d instance types but it might be able to run on others. +At the least, it needs a certain type of memory configuration on the ec2 instance (at least one large SSD volume, I believe). +Otherwise, I do not know the limitations and constraints on running this framework in an ec2 instance. ### Contact information David Gibbs: david.gibbs@wri.org diff --git a/removals/.gitignore b/removals/.gitignore deleted file mode 100644 index c4c4ffc6..00000000 --- a/removals/.gitignore +++ /dev/null @@ -1 +0,0 @@ -*.zip diff --git a/removals/US_removal_rates.py b/removals/US_removal_rates.py index 116f2bb5..694b291c 100644 --- a/removals/US_removal_rates.py +++ b/removals/US_removal_rates.py @@ -18,7 +18,7 @@ def US_removal_rate_calc(tile_id, gain_table_group_region_age_dict, gain_table_g start = datetime.datetime.now() # Names of the input tiles - gain = '{0}_{1}.tif'.format(cn.pattern_gain, tile_id) + gain = f'{tile_id}_{cn.pattern_gain_ec2}.tif' US_age_cat = '{0}_{1}.tif'.format(tile_id, cn.pattern_age_cat_natrl_forest_US) US_forest_group = '{0}_{1}.tif'.format(tile_id, cn.pattern_FIA_forest_group_processed) US_region = '{0}_{1}.tif'.format(tile_id, cn.pattern_FIA_regions_processed) @@ -51,7 +51,7 @@ def US_removal_rate_calc(tile_id, gain_table_group_region_age_dict, gain_table_g agc_bgc_stdev_dst = rasterio.open('{0}_{1}.tif'.format(tile_id, output_pattern_list[1]), 'w', **kwargs) # Adds metadata tags to the output rasters - uu.add_rasterio_tags(agc_bgc_rate_dst, 'std') + uu.add_universal_metadata_rasterio(agc_bgc_rate_dst) agc_bgc_rate_dst.update_tags( units='megagrams aboveground+belowground carbon/ha/yr') agc_bgc_rate_dst.update_tags( @@ -59,7 +59,7 @@ def US_removal_rate_calc(tile_id, gain_table_group_region_age_dict, gain_table_g agc_bgc_rate_dst.update_tags( extent='Continental USA. Applies to pixels for which an FIA region, FIA forest group, and Pan et al. forest age category are available or interpolated.') - uu.add_rasterio_tags(agc_bgc_stdev_dst, 'std') + uu.add_universal_metadata_rasterio(agc_bgc_stdev_dst) agc_bgc_stdev_dst.update_tags( units='standard deviation of removal factor, in megagrams aboveground+belowground carbon/ha/yr') agc_bgc_stdev_dst.update_tags( diff --git a/removals/annual_gain_rate_AGC_BGC_all_forest_types.py b/removals/annual_gain_rate_AGC_BGC_all_forest_types.py index 88702be4..a8210840 100644 --- a/removals/annual_gain_rate_AGC_BGC_all_forest_types.py +++ b/removals/annual_gain_rate_AGC_BGC_all_forest_types.py @@ -1,46 +1,55 @@ +""" +Function to create removal factor tiles with all removal factor sources combined +""" + import datetime import numpy as np -import os import rasterio -import logging -import sys -sys.path.append('../') + import constants_and_names as cn import universal_util as uu -def annual_gain_rate_AGC_BGC_all_forest_types(tile_id, output_pattern_list, sensit_type, no_upload): +def annual_gain_rate_AGC_BGC_all_forest_types(tile_id, output_pattern_list): + """ + :param tile_id: tile to be processed, identified by its tile id + :param output_pattern_list: patterns for output tile names + :return: 5 tiles: removal factor source, aboveground rate, belowground rate, aboveground+belowground rate, + standard deviation for aboveground rate (all removal factor sources combined) + Units: Mg carbon/ha/yr (including for standard deviation tiles) + """ - uu.print_log("Mapping removal rate source and AGB and BGB removal rates:", tile_id) + uu.print_log(f'Mapping removal rate source and AGB and BGB removal rates: {tile_id}') # Start time start = datetime.datetime.now() # Names of the input tiles # Removal factors - model_extent = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_model_extent) - mangrove_AGB = '{0}_{1}.tif'.format(tile_id, cn.pattern_annual_gain_AGB_mangrove) - mangrove_BGB = '{0}_{1}.tif'.format(tile_id, cn.pattern_annual_gain_BGB_mangrove) - europe_AGC_BGC = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_annual_gain_AGC_BGC_natrl_forest_Europe) - plantations_AGC_BGC = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_annual_gain_AGC_BGC_planted_forest_unmasked) - us_AGC_BGC = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_annual_gain_AGC_BGC_natrl_forest_US) - young_AGC = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_annual_gain_AGC_natrl_forest_young) - age_category = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_age_cat_IPCC) - ipcc_AGB_default = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_annual_gain_AGB_IPCC_defaults) + model_extent = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_model_extent) + mangrove_AGB = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_annual_gain_AGB_mangrove) + mangrove_BGB = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_annual_gain_BGB_mangrove) + europe_AGC_BGC = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_annual_gain_AGC_BGC_natrl_forest_Europe) + plantations_AGC_BGC = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_annual_gain_AGC_BGC_planted_forest_unmasked) + us_AGC_BGC = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_annual_gain_AGC_BGC_natrl_forest_US) + young_AGC = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_annual_gain_AGC_natrl_forest_young) + age_category = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_age_cat_IPCC) + ipcc_AGB_default = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_annual_gain_AGB_IPCC_defaults) + BGB_AGB_ratio = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_BGB_AGB_ratio) # Removal factor standard deviations - mangrove_AGB_stdev = '{0}_{1}.tif'.format(tile_id, cn.pattern_stdev_annual_gain_AGB_mangrove) - europe_AGC_BGC_stdev = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_Europe) - plantations_AGC_BGC_stdev = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_stdev_annual_gain_AGC_BGC_planted_forest_unmasked) - us_AGC_BGC_stdev = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_US) - young_AGC_stdev = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_stdev_annual_gain_AGC_natrl_forest_young) - ipcc_AGB_default_stdev = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_stdev_annual_gain_AGB_IPCC_defaults) + mangrove_AGB_stdev = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_stdev_annual_gain_AGB_mangrove) + europe_AGC_BGC_stdev = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_Europe) + plantations_AGC_BGC_stdev = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_stdev_annual_gain_AGC_BGC_planted_forest_unmasked) + us_AGC_BGC_stdev = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_US) + young_AGC_stdev = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_stdev_annual_gain_AGC_natrl_forest_young) + ipcc_AGB_default_stdev = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_stdev_annual_gain_AGB_IPCC_defaults) # Names of the output tiles - removal_forest_type = '{0}_{1}.tif'.format(tile_id, output_pattern_list[0]) - annual_gain_AGC_all_forest_types = '{0}_{1}.tif'.format(tile_id, output_pattern_list[1]) - annual_gain_BGC_all_forest_types = '{0}_{1}.tif'.format(tile_id, output_pattern_list[2]) - annual_gain_AGC_BGC_all_forest_types = '{0}_{1}.tif'.format(tile_id, output_pattern_list[3]) # Not used further in the model. Created just for reference. - stdev_annual_gain_AGC_all_forest_types = '{0}_{1}.tif'.format(tile_id, output_pattern_list[4]) + removal_forest_type = uu.make_tile_name(tile_id, output_pattern_list[0]) + annual_gain_AGC_all_forest_types = uu.make_tile_name(tile_id, output_pattern_list[1]) + annual_gain_BGC_all_forest_types = uu.make_tile_name(tile_id, output_pattern_list[2]) + annual_gain_AGC_BGC_all_forest_types = uu.make_tile_name(tile_id, output_pattern_list[3]) # Not used further in the model. Created just for reference. + stdev_annual_gain_AGC_all_forest_types = uu.make_tile_name(tile_id, output_pattern_list[4]) # Opens biomass tile with rasterio.open(model_extent) as model_extent_src: @@ -64,56 +73,62 @@ def annual_gain_rate_AGC_BGC_all_forest_types(tile_id, output_pattern_list, sens mangrove_AGB_src = rasterio.open(mangrove_AGB) mangrove_BGB_src = rasterio.open(mangrove_BGB) mangrove_AGB_stdev_src = rasterio.open(mangrove_AGB_stdev) - uu.print_log(" Mangrove tiles (AGB and BGB) for {}".format(tile_id)) - except: - uu.print_log(" No mangrove tile for {}".format(tile_id)) + uu.print_log(f' Mangrove tiles (AGB and BGB) found for {tile_id}') + except rasterio.errors.RasterioIOError: + uu.print_log(f' Mangrove tiles (AGB and BGB) not found for {tile_id}') try: europe_AGC_BGC_src = rasterio.open(europe_AGC_BGC) europe_AGC_BGC_stdev_src = rasterio.open(europe_AGC_BGC_stdev) - uu.print_log(" Europe removal factor tile for {}".format(tile_id)) - except: - uu.print_log(" No Europe removal factor tile for {}".format(tile_id)) + uu.print_log(f' Europe removal factor tile found for {tile_id}') + except rasterio.errors.RasterioIOError: + uu.print_log(f' Europe removal factor tile not found for {tile_id}') try: plantations_AGC_BGC_src = rasterio.open(plantations_AGC_BGC) plantations_AGC_BGC_stdev_src = rasterio.open(plantations_AGC_BGC_stdev) - uu.print_log(" Planted forest tile for {}".format(tile_id)) - except: - uu.print_log(" No planted forest tile for {}".format(tile_id)) + uu.print_log(f' Planted forest tile found for {tile_id}') + except rasterio.errors.RasterioIOError: + uu.print_log(f' Planted forest tile not found for {tile_id}') try: us_AGC_BGC_src = rasterio.open(us_AGC_BGC) us_AGC_BGC_stdev_src = rasterio.open(us_AGC_BGC_stdev) - uu.print_log(" US removal factor tile for {}".format(tile_id)) - except: - uu.print_log(" No US removal factor tile for {}".format(tile_id)) + uu.print_log(f' US removal factor tile found for {tile_id}') + except rasterio.errors.RasterioIOError: + uu.print_log(f' US removal factor tile not found for {tile_id}') try: young_AGC_src = rasterio.open(young_AGC) young_AGC_stdev_src = rasterio.open(young_AGC_stdev) - uu.print_log(" Young forest removal factor tile for {}".format(tile_id)) - except: - uu.print_log(" No young forest removal factor tile for {}".format(tile_id)) + uu.print_log(f' Young forest removal factor tile found for {tile_id}') + except rasterio.errors.RasterioIOError: + uu.print_log(f' Young forest removal factor tile not found for {tile_id}') try: age_category_src = rasterio.open(age_category) - uu.print_log(" Age category tile for {}".format(tile_id)) - except: - uu.print_log(" No age category tile for {}".format(tile_id)) + uu.print_log(f' Age category tile found for {tile_id}') + except rasterio.errors.RasterioIOError: + uu.print_log(f' Age category tile not found for {tile_id}') try: ipcc_AGB_default_src = rasterio.open(ipcc_AGB_default) ipcc_AGB_default_stdev_src = rasterio.open(ipcc_AGB_default_stdev) - uu.print_log(" IPCC default removal rate tile for {}".format(tile_id)) - except: - uu.print_log(" No IPCC default removal rate tile for {}".format(tile_id)) + uu.print_log(f' IPCC default removal rate tile found for {tile_id}') + except rasterio.errors.RasterioIOError: + uu.print_log(f' IPCC default removal rate tile not found for {tile_id}') + + try: + BGB_AGB_ratio_src = rasterio.open(BGB_AGB_ratio) + uu.print_log(f' BGB:AGB tile found for {tile_id}') + except rasterio.errors.RasterioIOError: + uu.print_log(f' BGB:AGB tile not found for {tile_id}. Using default BGB:AGB from Mokany instead.') # Opens the output tile, giving it the arguments of the input tiles removal_forest_type_dst = rasterio.open(removal_forest_type, 'w', **kwargs) # Adds metadata tags to the output raster - uu.add_rasterio_tags(removal_forest_type_dst, sensit_type) + uu.add_universal_metadata_rasterio(removal_forest_type_dst) removal_forest_type_dst.update_tags( key='6: mangroves. 5: European-specific rates. 4: planted forests. 3: US-specific rates. 2: young (<20 year) secondary forests. 1: old (>20 year) secondary forests and primary forests. Priority goes to the highest number.') removal_forest_type_dst.update_tags( @@ -130,7 +145,7 @@ def annual_gain_rate_AGC_BGC_all_forest_types(tile_id, output_pattern_list, sens stdev_annual_gain_AGC_all_forest_types_dst = rasterio.open(stdev_annual_gain_AGC_all_forest_types, 'w', **kwargs) # Adds metadata tags to the output raster - uu.add_rasterio_tags(annual_gain_AGC_all_forest_types_dst, sensit_type) + uu.add_universal_metadata_rasterio(annual_gain_AGC_all_forest_types_dst) annual_gain_AGC_all_forest_types_dst.update_tags( units='megagrams aboveground carbon/ha/yr') annual_gain_AGC_all_forest_types_dst.update_tags( @@ -139,7 +154,7 @@ def annual_gain_rate_AGC_BGC_all_forest_types(tile_id, output_pattern_list, sens extent='Full model extent') # Adds metadata tags to the output raster - uu.add_rasterio_tags(annual_gain_BGC_all_forest_types_dst, sensit_type) + uu.add_universal_metadata_rasterio(annual_gain_BGC_all_forest_types_dst) annual_gain_BGC_all_forest_types_dst.update_tags( units='megagrams belowground carbon/ha/yr') annual_gain_BGC_all_forest_types_dst.update_tags( @@ -148,7 +163,7 @@ def annual_gain_rate_AGC_BGC_all_forest_types(tile_id, output_pattern_list, sens extent='Full model extent') # Adds metadata tags to the output raster - uu.add_rasterio_tags(annual_gain_AGC_BGC_all_forest_types_dst, sensit_type) + uu.add_universal_metadata_rasterio(annual_gain_AGC_BGC_all_forest_types_dst) annual_gain_AGC_BGC_all_forest_types_dst.update_tags( units='megagrams aboveground + belowground carbon/ha/yr') annual_gain_AGC_BGC_all_forest_types_dst.update_tags( @@ -157,7 +172,7 @@ def annual_gain_rate_AGC_BGC_all_forest_types(tile_id, output_pattern_list, sens extent='Full model extent') # Adds metadata tags to the output raster - uu.add_rasterio_tags(stdev_annual_gain_AGC_all_forest_types_dst, sensit_type) + uu.add_universal_metadata_rasterio(stdev_annual_gain_AGC_all_forest_types_dst) stdev_annual_gain_AGC_all_forest_types_dst.update_tags( units='standard deviation for removal factor, in terms of megagrams aboveground carbon/ha/yr') stdev_annual_gain_AGC_all_forest_types_dst.update_tags( @@ -165,7 +180,7 @@ def annual_gain_rate_AGC_BGC_all_forest_types(tile_id, output_pattern_list, sens stdev_annual_gain_AGC_all_forest_types_dst.update_tags( extent='Full model extent') - uu.print_log(" Creating removal model forest type tile, AGC removal factor tile, BGC removal factor tile, and AGC removal factor standard deviation tile for {}".format(tile_id)) + uu.print_log(f' Creating removal model forest type tile, AGC removal factor tile, BGC removal factor tile, and AGC removal factor standard deviation tile for {tile_id}') uu.check_memory() @@ -182,9 +197,15 @@ def annual_gain_rate_AGC_BGC_all_forest_types(tile_id, output_pattern_list, sens try: age_category_window = age_category_src.read(1, window=window) - except: + except UnboundLocalError: age_category_window = np.zeros((window.height, window.width), dtype='uint8') + try: + BGB_AGB_ratio_window = BGB_AGB_ratio_src.read(1, window=window) + except UnboundLocalError: + BGB_AGB_ratio_window = np.empty((window.height, window.width), dtype='float32') + BGB_AGB_ratio_window[:] = cn.below_to_above_non_mang + # Lowest priority try: ipcc_AGB_default_rate_window = ipcc_AGB_default_src.read(1, window=window) @@ -195,7 +216,7 @@ def annual_gain_rate_AGC_BGC_all_forest_types(tile_id, output_pattern_list, sens # that don't have rates under this sensitivity analysis to still be included in the model. # Unfortunately, model_extent is slightly different from the IPCC rate extent (no IPCC rates where # there is no ecozone information), but this is a very small difference and not worth worrying about. - if sensit_type == 'no_primary_gain': + if cn.SENSIT_TYPE == 'no_primary_gain': removal_forest_type_window = np.where(model_extent_window != 0, cn.old_natural_rank, removal_forest_type_window).astype('uint8') @@ -207,12 +228,12 @@ def annual_gain_rate_AGC_BGC_all_forest_types(tile_id, output_pattern_list, sens ipcc_AGB_default_rate_window * cn.biomass_to_c_non_mangrove, annual_gain_AGC_all_forest_types_window).astype('float32') annual_gain_BGC_all_forest_types_window = np.where(ipcc_AGB_default_rate_window != 0, - ipcc_AGB_default_rate_window * cn.biomass_to_c_non_mangrove * cn.below_to_above_non_mang, + ipcc_AGB_default_rate_window * cn.biomass_to_c_non_mangrove * BGB_AGB_ratio_window, annual_gain_BGC_all_forest_types_window).astype('float32') stdev_annual_gain_AGC_all_forest_types_window = np.where(ipcc_AGB_default_stdev_window != 0, ipcc_AGB_default_stdev_window * cn.biomass_to_c_non_mangrove, stdev_annual_gain_AGC_all_forest_types_window).astype('float32') - except: + except UnboundLocalError: pass try: # young_AGC_rate_window uses > because of the weird NaN in the tiles. If != is used, the young rate NaN overwrites the IPCC arrays @@ -228,31 +249,31 @@ def annual_gain_rate_AGC_BGC_all_forest_types(tile_id, output_pattern_list, sens young_AGC_rate_window, annual_gain_AGC_all_forest_types_window).astype('float32') annual_gain_BGC_all_forest_types_window = np.where((young_AGC_rate_window > 0) & (age_category_window == 1), - young_AGC_rate_window * cn.below_to_above_non_mang, + young_AGC_rate_window * BGB_AGB_ratio_window, annual_gain_BGC_all_forest_types_window).astype('float32') stdev_annual_gain_AGC_all_forest_types_window = np.where((young_AGC_stdev_window > 0) & (age_category_window == 1), young_AGC_stdev_window, stdev_annual_gain_AGC_all_forest_types_window).astype('float32') - except: + except UnboundLocalError: pass - if sensit_type != 'US_removals': + if cn.SENSIT_TYPE != 'US_removals': try: us_AGC_BGC_rate_window = us_AGC_BGC_src.read(1, window=window) us_AGC_BGC_stdev_window = us_AGC_BGC_stdev_src.read(1, window=window) removal_forest_type_window = np.where(us_AGC_BGC_rate_window != 0, cn.US_rank, removal_forest_type_window).astype('uint8') annual_gain_AGC_all_forest_types_window = np.where(us_AGC_BGC_rate_window != 0, - us_AGC_BGC_rate_window / (1 + cn.below_to_above_non_mang), + us_AGC_BGC_rate_window / (1 + BGB_AGB_ratio_window), annual_gain_AGC_all_forest_types_window).astype('float32') annual_gain_BGC_all_forest_types_window = np.where(us_AGC_BGC_rate_window != 0, (us_AGC_BGC_rate_window) - - (us_AGC_BGC_rate_window / (1 + cn.below_to_above_non_mang)), + (us_AGC_BGC_rate_window / (1 + BGB_AGB_ratio_window)), annual_gain_BGC_all_forest_types_window).astype('float32') stdev_annual_gain_AGC_all_forest_types_window = np.where(us_AGC_BGC_stdev_window != 0, - us_AGC_BGC_stdev_window / (1 + cn.below_to_above_non_mang), + us_AGC_BGC_stdev_window / (1 + BGB_AGB_ratio_window), stdev_annual_gain_AGC_all_forest_types_window).astype('float32') - except: + except UnboundLocalError: pass try: @@ -260,16 +281,16 @@ def annual_gain_rate_AGC_BGC_all_forest_types(tile_id, output_pattern_list, sens plantations_AGC_BGC_stdev_window = plantations_AGC_BGC_stdev_src.read(1, window=window) removal_forest_type_window = np.where(plantations_AGC_BGC_rate_window != 0, cn.planted_forest_rank, removal_forest_type_window).astype('uint8') annual_gain_AGC_all_forest_types_window = np.where(plantations_AGC_BGC_rate_window != 0, - plantations_AGC_BGC_rate_window / (1 + cn.below_to_above_non_mang), + plantations_AGC_BGC_rate_window / (1 + BGB_AGB_ratio_window), annual_gain_AGC_all_forest_types_window).astype('float32') annual_gain_BGC_all_forest_types_window = np.where(plantations_AGC_BGC_rate_window != 0, (plantations_AGC_BGC_rate_window ) - - (plantations_AGC_BGC_rate_window / (1 + cn.below_to_above_non_mang)), + (plantations_AGC_BGC_rate_window / (1 + BGB_AGB_ratio_window)), annual_gain_BGC_all_forest_types_window).astype('float32') stdev_annual_gain_AGC_all_forest_types_window = np.where(plantations_AGC_BGC_stdev_window != 0, - plantations_AGC_BGC_stdev_window / (1 + cn.below_to_above_non_mang), + plantations_AGC_BGC_stdev_window / (1 + BGB_AGB_ratio_window), stdev_annual_gain_AGC_all_forest_types_window).astype('float32') - except: + except UnboundLocalError: pass try: @@ -277,19 +298,19 @@ def annual_gain_rate_AGC_BGC_all_forest_types(tile_id, output_pattern_list, sens europe_AGC_BGC_stdev_window = europe_AGC_BGC_stdev_src.read(1, window=window) removal_forest_type_window = np.where(europe_AGC_BGC_rate_window != 0, cn.europe_rank, removal_forest_type_window).astype('uint8') annual_gain_AGC_all_forest_types_window = np.where(europe_AGC_BGC_rate_window != 0, - europe_AGC_BGC_rate_window / (1 + cn.below_to_above_non_mang), + europe_AGC_BGC_rate_window / (1 + BGB_AGB_ratio_window), annual_gain_AGC_all_forest_types_window).astype('float32') annual_gain_BGC_all_forest_types_window = np.where(europe_AGC_BGC_rate_window != 0, (europe_AGC_BGC_rate_window) - - (europe_AGC_BGC_rate_window / (1 + cn.below_to_above_non_mang)), + (europe_AGC_BGC_rate_window / (1 + BGB_AGB_ratio_window)), annual_gain_BGC_all_forest_types_window).astype('float32') # NOTE: Nancy Harris thought that the European removal standard deviations were 2x too large, # per email on 8/30/2020. Thus, simplest fix is to leave original tiles 2x too large and # correct them only where composited with other stdev sources. stdev_annual_gain_AGC_all_forest_types_window = np.where(europe_AGC_BGC_stdev_window != 0, - (europe_AGC_BGC_stdev_window/2) / (1 + cn.below_to_above_non_mang), + (europe_AGC_BGC_stdev_window/2) / (1 + BGB_AGB_ratio_window), stdev_annual_gain_AGC_all_forest_types_window).astype('float32') - except: + except UnboundLocalError: pass # Highest priority @@ -307,7 +328,7 @@ def annual_gain_rate_AGC_BGC_all_forest_types(tile_id, output_pattern_list, sens stdev_annual_gain_AGC_all_forest_types_window = np.where(mangroves_AGB_stdev_window != 0, mangroves_AGB_stdev_window * cn.biomass_to_c_mangrove, stdev_annual_gain_AGC_all_forest_types_window).astype('float32') - except: + except UnboundLocalError: pass # Masks outputs to model output extent @@ -325,4 +346,4 @@ def annual_gain_rate_AGC_BGC_all_forest_types(tile_id, output_pattern_list, sens stdev_annual_gain_AGC_all_forest_types_dst.write_band(1, stdev_annual_gain_AGC_all_forest_types_window, window=window) # Prints information about the tile that was just processed - uu.end_of_fx_summary(start, tile_id, cn.pattern_removal_forest_type, no_upload) \ No newline at end of file + uu.end_of_fx_summary(start, tile_id, cn.pattern_removal_forest_type) diff --git a/removals/annual_gain_rate_IPCC_defaults.py b/removals/annual_gain_rate_IPCC_defaults.py index 58676f67..1bb145bb 100644 --- a/removals/annual_gain_rate_IPCC_defaults.py +++ b/removals/annual_gain_rate_IPCC_defaults.py @@ -1,16 +1,27 @@ +""" +Function to create removal factor tiles according to IPCC defaults +""" + import datetime import numpy as np import rasterio -import os import sys -sys.path.append('../') + import constants_and_names as cn import universal_util as uu # Necessary to suppress a pandas error later on. https://github.com/numpy/numpy/issues/12987 np.set_printoptions(threshold=sys.maxsize) -def annual_gain_rate(tile_id, sensit_type, gain_table_dict, stdev_table_dict, output_pattern_list, no_upload): +def annual_gain_rate(tile_id, gain_table_dict, stdev_table_dict, output_pattern_list): + """ + :param tile_id: tile to be processed, identified by its tile id + :param gain_table_dict: dictionary of removal factors by continent, ecozone, and age + :param stdev_table_dict: dictionary of standard deviations for removal factors by continent, ecozone, and age + :param output_pattern_list: patterns for output tile names + :return: 3 tiles: aboveground rate, belowground rate, standard deviation for aboveground rate (IPCC rates) + Units: Mg biomass/ha/yr (including for standard deviation tiles) + """ # Converts the forest age category decision tree output values to the three age categories-- # 10000: primary forest; 20000: secondary forest > 20 years; 30000: secondary forest <= 20 years @@ -19,32 +30,39 @@ def annual_gain_rate(tile_id, sensit_type, gain_table_dict, stdev_table_dict, ou # The key in the dictionary is the forest age category decision tree endpoints. age_dict = {0: 0, 1: 10000, 2: 20000, 3: 30000} - uu.print_log("Creating IPCC default biomass removals rates and standard deviation for {}".format(tile_id)) + uu.print_log(f'Creating IPCC default biomass removals rates and standard deviation for {tile_id}') # Start time start = datetime.datetime.now() # Names of the forest age category and continent-ecozone tiles - age_cat = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_age_cat_IPCC) - cont_eco = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_cont_eco_processed) + age_cat = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_age_cat_IPCC) + cont_eco = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_cont_eco_processed) + BGB_AGB_ratio = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_BGB_AGB_ratio) # Names of the output natural forest removals rate tiles (above and belowground) - AGB_IPCC_default_gain_rate = '{0}_{1}.tif'.format(tile_id, output_pattern_list[0]) - BGB_IPCC_default_gain_rate = '{0}_{1}.tif'.format(tile_id, output_pattern_list[1]) - AGB_IPCC_default_gain_stdev = '{0}_{1}.tif'.format(tile_id, output_pattern_list[2]) + AGB_IPCC_default_gain_rate = f'{tile_id}_{output_pattern_list[0]}.tif' + BGB_IPCC_default_gain_rate = f'{tile_id}_{output_pattern_list[1]}.tif' + AGB_IPCC_default_gain_stdev = f'{tile_id}_{output_pattern_list[2]}.tif' # Opens the input tiles if they exist. kips tile if either input doesn't exist. try: age_cat_src = rasterio.open(age_cat) - uu.print_log(" Age category tile found for {}".format(tile_id)) - except: - return uu.print_log(" No age category tile found for {}. Skipping tile.".format(tile_id)) + uu.print_log(f' Age category tile found for {tile_id}') + except rasterio.errors.RasterioIOError: + return uu.print_log(f' Age category tile not found for {tile_id}. Skipping tile.') try: cont_eco_src = rasterio.open(cont_eco) - uu.print_log(" Continent-ecozone tile found for {}".format(tile_id)) - except: - return uu.print_log(" No continent-ecozone tile found for {}. Skipping tile.".format(tile_id)) + uu.print_log(f' Continent-ecozone tile found for {tile_id}') + except rasterio.errors.RasterioIOError: + return uu.print_log(f' Continent-ecozone tile not found for {tile_id}. Skipping tile.') + + try: + BGB_AGB_ratio_src = rasterio.open(BGB_AGB_ratio) + uu.print_log(f' BGB:AGB tile found for {tile_id}') + except rasterio.errors.RasterioIOError: + uu.print_log(f' BGB:AGB tile not found for {tile_id}. Using default BGB:AGB from Mokany instead.') # Grabs metadata about the continent ecozone tile, like its location/projection/cellsize kwargs = cont_eco_src.meta @@ -65,7 +83,7 @@ def annual_gain_rate(tile_id, sensit_type, gain_table_dict, stdev_table_dict, ou # The output files, aboveground and belowground biomass removals rates dst_above = rasterio.open(AGB_IPCC_default_gain_rate, 'w', **kwargs) # Adds metadata tags to the output raster - uu.add_rasterio_tags(dst_above, sensit_type) + uu.add_universal_metadata_rasterio(dst_above) dst_above.update_tags( units='megagrams aboveground biomass (AGB or dry matter)/ha/yr') dst_above.update_tags( @@ -75,7 +93,7 @@ def annual_gain_rate(tile_id, sensit_type, gain_table_dict, stdev_table_dict, ou dst_below = rasterio.open(BGB_IPCC_default_gain_rate, 'w', **kwargs) # Adds metadata tags to the output raster - uu.add_rasterio_tags(dst_below, sensit_type) + uu.add_universal_metadata_rasterio(dst_below) dst_below.update_tags( units='megagrams belowground biomass (AGB or dry matter)/ha/yr') dst_below.update_tags( @@ -85,7 +103,7 @@ def annual_gain_rate(tile_id, sensit_type, gain_table_dict, stdev_table_dict, ou dst_stdev_above = rasterio.open(AGB_IPCC_default_gain_stdev, 'w', **kwargs) # Adds metadata tags to the output raster - uu.add_rasterio_tags(dst_stdev_above, sensit_type) + uu.add_universal_metadata_rasterio(dst_stdev_above) dst_stdev_above.update_tags( units='standard deviation, in terms of megagrams aboveground biomass (AGB or dry matter)/ha/yr') dst_stdev_above.update_tags( @@ -101,14 +119,20 @@ def annual_gain_rate(tile_id, sensit_type, gain_table_dict, stdev_table_dict, ou # Creates a processing window for each input raster try: cont_eco_window = cont_eco_src.read(1, window=window) - except: + except UnboundLocalError: cont_eco_window = np.zeros((window.height, window.width), dtype='uint8') try: age_cat_window = age_cat_src.read(1, window=window) - except: + except UnboundLocalError: age_cat_window = np.zeros((window.height, window.width), dtype='uint8') + try: + BGB_AGB_ratio_window = BGB_AGB_ratio_src.read(1, window=window) + except UnboundLocalError: + BGB_AGB_ratio_window = np.empty((window.height, window.width), dtype='float32') + BGB_AGB_ratio_window[:] = cn.below_to_above_non_mang + # Recodes the input forest age category array with 10 different decision tree end values into the 3 actual age categories age_recode = np.vectorize(age_dict.get)(age_cat_window) @@ -129,7 +153,7 @@ def annual_gain_rate(tile_id, sensit_type, gain_table_dict, stdev_table_dict, ou ## Belowground removal factors # Calculates belowground annual removal rates - gain_rate_BGB = gain_rate_AGB * cn.below_to_above_non_mang + gain_rate_BGB = gain_rate_AGB * BGB_AGB_ratio_window # Writes the output window to the output file dst_below.write_band(1, gain_rate_BGB, window=window) @@ -147,4 +171,4 @@ def annual_gain_rate(tile_id, sensit_type, gain_table_dict, stdev_table_dict, ou dst_stdev_above.write_band(1, gain_stdev_AGB, window=window) # Prints information about the tile that was just processed - uu.end_of_fx_summary(start, tile_id, output_pattern_list[0], no_upload) + uu.end_of_fx_summary(start, tile_id, output_pattern_list[0]) diff --git a/removals/annual_gain_rate_mangrove.py b/removals/annual_gain_rate_mangrove.py index 306ba6e4..752a4148 100644 --- a/removals/annual_gain_rate_mangrove.py +++ b/removals/annual_gain_rate_mangrove.py @@ -7,14 +7,14 @@ import os import rasterio import sys -sys.path.append('../') + import constants_and_names as cn import universal_util as uu # Necessary to suppress a pandas error later on. https://github.com/numpy/numpy/issues/12987 np.set_printoptions(threshold=sys.maxsize) -def annual_gain_rate(tile_id, sensit_type, output_pattern_list, gain_above_dict, gain_below_dict, stdev_dict): +def annual_gain_rate(tile_id, output_pattern_list, gain_above_dict, gain_below_dict, stdev_dict): uu.print_log("Processing:", tile_id) @@ -29,8 +29,8 @@ def annual_gain_rate(tile_id, sensit_type, output_pattern_list, gain_above_dict, return # Name of the input files - mangrove_biomass = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_mangrove_biomass_2000) - cont_eco = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_cont_eco_processed) + mangrove_biomass = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_mangrove_biomass_2000) + cont_eco = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_cont_eco_processed) # Names of the output aboveground and belowground mangrove removals rate tiles AGB_gain_rate = '{0}_{1}.tif'.format(tile_id, output_pattern_list[0]) @@ -60,7 +60,7 @@ def annual_gain_rate(tile_id, sensit_type, output_pattern_list, gain_above_dict, dst_above = rasterio.open(AGB_gain_rate, 'w', **kwargs) # Adds metadata tags to the output raster - uu.add_rasterio_tags(dst_above, sensit_type) + uu.add_universal_metadata_rasterio(dst_above) dst_above.update_tags( units='megagrams aboveground biomass (AGB or dry matter)/ha/yr') dst_above.update_tags( @@ -70,7 +70,7 @@ def annual_gain_rate(tile_id, sensit_type, output_pattern_list, gain_above_dict, dst_below = rasterio.open(BGB_gain_rate, 'w', **kwargs) # Adds metadata tags to the output raster - uu.add_rasterio_tags(dst_below, sensit_type) + uu.add_universal_metadata_rasterio(dst_below) dst_below.update_tags( units='megagrams belowground biomass (BGB or dry matter)/ha/yr') dst_below.update_tags( @@ -80,7 +80,7 @@ def annual_gain_rate(tile_id, sensit_type, output_pattern_list, gain_above_dict, dst_stdev_above = rasterio.open(AGB_gain_stdev, 'w', **kwargs) # Adds metadata tags to the output raster - uu.add_rasterio_tags(dst_stdev_above, sensit_type) + uu.add_universal_metadata_rasterio(dst_stdev_above) dst_stdev_above.update_tags( units='standard deviation, in terms of megagrams aboveground biomass (AGB or dry matter)/ha/yr') dst_stdev_above.update_tags( diff --git a/removals/forest_age_category_IPCC.py b/removals/forest_age_category_IPCC.py index df4a40e0..f36d18c5 100644 --- a/removals/forest_age_category_IPCC.py +++ b/removals/forest_age_category_IPCC.py @@ -1,14 +1,21 @@ +""" +Function to create forest age category tiles +""" + import datetime import numpy as np -import os import rasterio -import logging -import sys -sys.path.append('../') + import constants_and_names as cn import universal_util as uu -def forest_age_category(tile_id, gain_table_dict, pattern, sensit_type, no_upload): +def forest_age_category(tile_id, gain_table_dict, pattern): + """ + :param tile_id: tile to be processed, identified by its tile id + :param gain_table_dict: dictionary of removal factors by continent, ecozone, and forest age category + :param pattern: pattern for output tile names + :return: tile denoting three broad forest age categories: 1- young (<20), 2- middle, 3- old/primary + """ uu.print_log("Assigning forest age categories:", tile_id) @@ -26,30 +33,23 @@ def forest_age_category(tile_id, gain_table_dict, pattern, sensit_type, no_uploa tropics = 1 - uu.print_log(" Tile {} in tropics:".format(tile_id), tropics) + uu.print_log(f' Tile {tile_id} in tropics: {tropics}') # Names of the input tiles - gain = '{0}_{1}.tif'.format(cn.pattern_gain, tile_id) - model_extent = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_model_extent) - ifl_primary = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_ifl_primary) - cont_eco = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_cont_eco_processed) - - # Biomass tile name depends on the sensitivity analysis - if sensit_type == 'biomass_swap': - biomass = '{0}_{1}.tif'.format(tile_id, cn.pattern_JPL_unmasked_processed) - uu.print_log("Using JPL biomass tile for {} sensitivity analysis".format(sensit_type)) - else: - biomass = '{0}_{1}.tif'.format(tile_id, cn.pattern_WHRC_biomass_2000_unmasked) - uu.print_log("Using WHRC biomass tile for {} sensitivity analysis".format(sensit_type)) - - if sensit_type == 'legal_Amazon_loss': - loss = '{0}_{1}.tif'.format(tile_id, cn.pattern_Brazil_annual_loss_processed) - uu.print_log("Using PRODES loss tile {0} for {1} sensitivity analysis".format(tile_id, sensit_type)) - elif sensit_type == 'Mekong_loss': - loss = '{0}_{1}.tif'.format(tile_id, cn.pattern_Mekong_loss_processed) + gain = f'{tile_id}_{cn.pattern_gain_ec2}.tif' + model_extent = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_model_extent) + ifl_primary = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_ifl_primary) + cont_eco = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_cont_eco_processed) + biomass = uu.sensit_tile_rename_biomass(cn.SENSIT_TYPE, tile_id) # Biomass tile name depends on the sensitivity analysis + + if cn.SENSIT_TYPE == 'legal_Amazon_loss': + loss = f'{tile_id}_{cn.pattern_Brazil_annual_loss_processed}.tif' + uu.print_log(f'Using PRODES loss tile {tile_id} for {cn.SENSIT_TYPE} sensitivity analysis') + elif cn.SENSIT_TYPE == 'Mekong_loss': + loss = f'{tile_id}_{cn.pattern_Mekong_loss_processed}.tif' else: - loss = '{0}_{1}.tif'.format(cn.pattern_loss, tile_id) - uu.print_log("Using Hansen loss tile {0} for {1} model run".format(tile_id, sensit_type)) + loss = f'{cn.pattern_loss}_{tile_id}.tif' + uu.print_log(f'Using Hansen loss tile {tile_id} for {cn.SENSIT_TYPE} model run') # Opens biomass tile with rasterio.open(model_extent) as model_extent_src: @@ -63,33 +63,33 @@ def forest_age_category(tile_id, gain_table_dict, pattern, sensit_type, no_uploa # Opens the input tiles if they exist try: cont_eco_src = rasterio.open(cont_eco) - uu.print_log(" Continent-ecozone tile found for {}".format(tile_id)) - except: - uu.print_log(" No continent-ecozone tile found for {}".format(tile_id)) + uu.print_log(f' Continent-ecozone tile found for {tile_id}') + except rasterio.errors.RasterioIOError: + uu.print_log(f' Continent-ecozone tile not found for {tile_id}') try: gain_src = rasterio.open(gain) - uu.print_log(" Gain tile found for {}".format(tile_id)) - except: - uu.print_log(" No gain tile found for {}".format(tile_id)) + uu.print_log(f' Gain tile found for {tile_id}') + except rasterio.errors.RasterioIOError: + uu.print_log(f' Gain tile not found for {tile_id}') try: biomass_src = rasterio.open(biomass) - uu.print_log(" Biomass tile found for {}".format(tile_id)) - except: - uu.print_log(" No biomass tile found for {}".format(tile_id)) + uu.print_log(f' Biomass tile found for {tile_id}') + except rasterio.errors.RasterioIOError: + uu.print_log(f' Biomass tile not found for {tile_id}') try: loss_src = rasterio.open(loss) - uu.print_log(" Loss tile found for {}".format(tile_id)) - except: - uu.print_log(" No loss tile found for {}".format(tile_id)) + uu.print_log(f' Loss tile found for {tile_id}') + except rasterio.errors.RasterioIOError: + uu.print_log(f' Loss tile not found for {tile_id}') try: ifl_primary_src = rasterio.open(ifl_primary) - uu.print_log(" IFL-primary forest tile found for {}".format(tile_id)) - except: - uu.print_log(" No IFL-primary forest tile found for {}".format(tile_id)) + uu.print_log(f' IFL-primary forest tile found for {tile_id}') + except rasterio.errors.RasterioIOError: + uu.print_log(f' IFL-primary forest tile not found for {tile_id}') # Updates kwargs for the output dataset kwargs.update( @@ -100,10 +100,10 @@ def forest_age_category(tile_id, gain_table_dict, pattern, sensit_type, no_uploa ) # Opens the output tile, giving it the arguments of the input tiles - dst = rasterio.open('{0}_{1}.tif'.format(tile_id, pattern), 'w', **kwargs) + dst = rasterio.open(f'{tile_id}_{pattern}.tif', 'w', **kwargs) # Adds metadata tags to the output raster - uu.add_rasterio_tags(dst, sensit_type) + uu.add_universal_metadata_rasterio(dst) dst.update_tags( key='1: young (<20 year) secondary forest; 2: old (>20 year) secondary forest; 3: primary forest or IFL') dst.update_tags( @@ -111,8 +111,7 @@ def forest_age_category(tile_id, gain_table_dict, pattern, sensit_type, no_uploa dst.update_tags( extent='Full model extent, even though these age categories will not be used over the full model extent. They apply to just the rates from IPCC defaults.') - - uu.print_log(" Assigning IPCC age categories for", tile_id) + uu.print_log(f' Assigning IPCC age categories for {tile_id}') uu.check_memory() @@ -124,27 +123,27 @@ def forest_age_category(tile_id, gain_table_dict, pattern, sensit_type, no_uploa try: loss_window = loss_src.read(1, window=window) - except: + except UnboundLocalError: loss_window = np.zeros((window.height, window.width), dtype='uint8') try: gain_window = gain_src.read(1, window=window) - except: + except UnboundLocalError: gain_window = np.zeros((window.height, window.width), dtype='uint8') try: cont_eco_window = cont_eco_src.read(1, window=window) - except: + except UnboundLocalError: cont_eco_window = np.zeros((window.height, window.width), dtype='uint8') try: biomass_window = biomass_src.read(1, window=window) - except: + except UnboundLocalError: biomass_window = np.zeros((window.height, window.width), dtype='float32') try: ifl_primary_window = ifl_primary_src.read(1, window=window) - except: + except UnboundLocalError: ifl_primary_window = np.zeros((window.height, window.width), dtype='uint8') # Creates a numpy array that has the <=20 year secondary forest growth rate x 20 @@ -158,11 +157,12 @@ def forest_age_category(tile_id, gain_table_dict, pattern, sensit_type, no_uploa # Logic tree for assigning age categories begins here # Code 1 = young (<20 years) secondary forest, code 2 = old (>20 year) secondary forest, code 3 = primary forest # model_extent_window ensures that there is both biomass and tree cover in 2000 OR mangroves OR tree cover gain - # WITHOUT pre-2000 plantations # For every model version except legal_Amazon_loss sensitivity analysis, which has its own rules about age assignment - if sensit_type != 'legal_Amazon_loss': + #### Try using this in the future: https://gis.stackexchange.com/questions/419445/comparing-two-rasters-based-on-a-complex-set-of-rules + + if cn.SENSIT_TYPE != 'legal_Amazon_loss': # No change pixels- no loss or gain if tropics == 0: @@ -179,22 +179,18 @@ def forest_age_category(tile_id, gain_table_dict, pattern, sensit_type, no_uploa dst_data[np.where((model_extent_window > 0) & (gain_window == 0) & (loss_window > 0) & (ifl_primary_window ==1))] = 3 # Gain-only pixels - # If there is gain, the pixel doesn't need biomass or canopy cover. It just needs to be outside of plantations and mangroves. - # The role of model_extent_window here is to exclude the pre-2000 plantations. + # If there is gain, the pixel doesn't need biomass or canopy cover. dst_data[np.where((model_extent_window > 0) & (gain_window == 1) & (loss_window == 0))] = 1 - # Pixels with loss and gain - # If there is gain with loss, the pixel doesn't need biomass or canopy cover. It just needs to be outside of plantations and mangroves. - # The role of model_extent_window here is to exclude the pre-2000 plantations. - dst_data[np.where((model_extent_window > 0) & (gain_window == 1) & (loss_window > (cn.gain_years)))] = 1 - dst_data[np.where((model_extent_window > 0) & (gain_window == 1) & (loss_window > 0) & (loss_window <= (cn.gain_years/2)))] = 1 - dst_data[np.where((model_extent_window > 0) & (gain_window == 1) & (loss_window > (cn.gain_years/2)) & (loss_window <= cn.gain_years))] = 1 + # Pixels with loss-and-gain + # If there is gain with loss, the pixel doesn't need biomass or canopy cover. + dst_data[np.where((model_extent_window > 0) & (gain_window == 1) & (loss_window > 0))] = 1 # For legal_Amazon_loss sensitivity analysis else: # Non-loss pixels (could have gain or not. Assuming that if within PRODES extent in 2000, there can't be - # gain, so it's a faulty detection. Thus, gain-only pixels are ignored and become part of no change.) + # gain, so it's a faulty detection. Thus, gain-only pixels are ignored and become part of no-change.) dst_data[np.where((model_extent_window == 1) & (loss_window == 0))] = 3 # primary forest # Loss-only pixels @@ -208,4 +204,4 @@ def forest_age_category(tile_id, gain_table_dict, pattern, sensit_type, no_uploa dst.write_band(1, dst_data, window=window) # Prints information about the tile that was just processed - uu.end_of_fx_summary(start, tile_id, pattern, no_upload) \ No newline at end of file + uu.end_of_fx_summary(start, tile_id, pattern) diff --git a/removals/gain_year_count_all_forest_types.py b/removals/gain_year_count_all_forest_types.py index 847cbf4d..5c569fa2 100644 --- a/removals/gain_year_count_all_forest_types.py +++ b/removals/gain_year_count_all_forest_types.py @@ -1,34 +1,44 @@ -from subprocess import Popen, PIPE, STDOUT, check_call +""" +Functions to create tiles with the number of years of carbon accumulation +""" + import datetime -import rasterio import numpy as np +import rasterio import os -import sys -sys.path.append('../') + import constants_and_names as cn import universal_util as uu -# Gets the names of the input tiles -def tile_names(tile_id, sensit_type): +def tile_names(tile_id): + """ + Gets the names of the input tiles + :param tile_id: tile to be processed, identified by its tile id + :return: names of input tiles + """ # Names of the loss, gain, and model extent tiles - if sensit_type == 'legal_Amazon_loss': - loss = '{0}_{1}.tif'.format(tile_id, cn.pattern_Brazil_annual_loss_processed) + if cn.SENSIT_TYPE == 'legal_Amazon_loss': + loss = f'{tile_id}_{cn.pattern_Brazil_annual_loss_processed}.tif' else: - loss = '{0}_{1}.tif'.format(cn.pattern_loss, tile_id) - gain = '{0}_{1}.tif'.format(cn.pattern_gain, tile_id) - model_extent = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_model_extent) + loss = f'{cn.pattern_loss}_{tile_id}.tif' + gain = f'{tile_id}_{cn.pattern_gain_ec2}.tif' + model_extent = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_model_extent) return loss, gain, model_extent -# Creates gain year count tiles for pixels that only had loss -def create_gain_year_count_loss_only(tile_id, sensit_type, no_upload): +def create_gain_year_count_loss_only(tile_id): + """ + Creates gain year count tiles for pixels that only had loss + :param tile_id: tile to be processed, identified by its tile id + :return: tile with number of years of carbon accumulation in pixels that only had tree cover loss + """ - uu.print_log("Gain year count for loss only pixels:", tile_id) + uu.print_log(f'Gain year count for loss-only pixels: {tile_id}') # Names of the loss, gain and tree cover density tiles - loss, gain, model_extent = tile_names(tile_id, sensit_type) + loss, gain, model_extent = tile_names(tile_id) # start time start = datetime.datetime.now() @@ -36,62 +46,75 @@ def create_gain_year_count_loss_only(tile_id, sensit_type, no_upload): uu.check_memory() if os.path.exists(loss): - uu.print_log(" Loss tile found for {}. Using it in loss only pixel gain year count.".format(tile_id)) + uu.print_log(f' Loss tile found for {tile_id}. Using it in loss-only pixel gain year count.') loss_calc = '--calc=(A>0)*(B==0)*(C>0)*(A-1)' - loss_outfilename = '{}_growth_years_loss_only.tif'.format(tile_id) - loss_outfilearg = '--outfile={}'.format(loss_outfilename) + loss_outfilename = f'{tile_id}_gain_year_count_loss_only.tif' + loss_outfilearg = f'--outfile={loss_outfilename}' cmd = ['gdal_calc.py', '-A', loss, '-B', gain, '-C', model_extent, loss_calc, loss_outfilearg, - '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=DEFLATE', '--type', 'Byte', '--quiet'] + '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=DEFLATE', '--type', 'Byte', '--quiet', + '--hideNoData'] # Need --hideNoData because the non-gain pixels are NoData, not 0. uu.log_subprocess_output_full(cmd) else: - uu.print_log("No loss tile found for {}. Skipping loss only pixel gain year count.".format(tile_id)) + uu.print_log(f' Loss tile not found for {tile_id}. Skipping loss-only pixel gain year count.') # Prints information about the tile that was just processed - uu.end_of_fx_summary(start, tile_id, 'growth_years_loss_only', no_upload) + uu.end_of_fx_summary(start, tile_id, 'gain_year_count_loss_only') -# Creates gain year count tiles for pixels that only had gain -def create_gain_year_count_gain_only_standard(tile_id, sensit_type, no_upload): +def create_gain_year_count_gain_only_standard(tile_id): + """ + Creates gain year count tiles for pixels that only had gain (standard model only) + :param tile_id: tile to be processed, identified by its tile id + :return: tile with number of years of carbon accumulation in pixels that only had tree cover gain + """ - uu.print_log("Gain year count for gain only pixels using standard function:", tile_id) + uu.print_log(f'Gain year count for gain-only pixels using standard function: {tile_id}') # Names of the loss, gain and tree cover density tiles - loss, gain, model_extent = tile_names(tile_id, sensit_type) + loss, gain, model_extent = tile_names(tile_id) # start time start = datetime.datetime.now() uu.check_memory() - # Need to check if loss tile exists because the calc string is depends on the presene/absence of the loss tile - if os.path.exists(loss): - uu.print_log(" Loss tile found for {}. Using it in gain only pixel gain year count.".format(tile_id)) - gain_calc = '--calc=(A==0)*(B==1)*(C>0)*({}/2)'.format(cn.gain_years) - gain_outfilename = '{}_growth_years_gain_only.tif'.format(tile_id) - gain_outfilearg = '--outfile={}'.format(gain_outfilename) + # Need to check if gain tile exists. + if not os.path.exists(gain): + uu.print_log(f' Gain tile not found for {tile_id}. Skipping gain-only pixel gain year count.') + + # Need to check if loss tile exists because the calc string is depends on the presence/absence of the loss tile + elif os.path.exists(loss): + uu.print_log(f' Loss tile found for {tile_id}. Using it in gain-only pixel gain year count.') + gain_calc = f'--calc=(A==0)*(B==1)*(C>0)*({cn.gain_years}/2)' + gain_outfilename = f'{tile_id}_gain_year_count_gain_only.tif' + gain_outfilearg = f'--outfile={gain_outfilename}' cmd = ['gdal_calc.py', '-A', loss, '-B', gain, '-C', model_extent, gain_calc, gain_outfilearg, '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=DEFLATE', '--type', 'Byte', '--quiet'] uu.log_subprocess_output_full(cmd) else: - uu.print_log(" No loss tile found for {}. Not using it for gain only pixel gain year count.".format(tile_id)) - gain_calc = '--calc=(A==1)*(B>0)*({}/2)'.format(cn.gain_years) - gain_outfilename = '{}_growth_years_gain_only.tif'.format(tile_id) - gain_outfilearg = '--outfile={}'.format(gain_outfilename) + uu.print_log(f' Loss tile not found for {tile_id}. Not using it for gain-only pixel gain year count.') + gain_calc = f'--calc=(A==1)*(B>0)*({cn.gain_years}/2)' + gain_outfilename = f'{tile_id}_gain_year_count_gain_only.tif' + gain_outfilearg = f'--outfile={gain_outfilename}' cmd = ['gdal_calc.py', '-A', gain, '-B', model_extent, gain_calc, gain_outfilearg, '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=DEFLATE', '--type', 'Byte', '--quiet'] uu.log_subprocess_output_full(cmd) # Prints information about the tile that was just processed - uu.end_of_fx_summary(start, tile_id, 'growth_years_gain_only', no_upload) + uu.end_of_fx_summary(start, tile_id, 'gain_year_count_gain_only') -# Creates gain year count tiles for pixels that only had gain -def create_gain_year_count_gain_only_maxgain(tile_id, sensit_type, no_upload): +def create_gain_year_count_gain_only_maxgain(tile_id): + """ + Creates gain year count tiles for pixels that only had gain (maximum gain year sensitivity analysis only) + :param tile_id: tile to be processed, identified by its tile id + :return: tile with number of years of carbon accumulation in pixels that only had tree cover gain + """ - uu.print_log("Gain year count for gain only pixels using maxgain function:", tile_id) + uu.print_log(f'Gain year count for gain-only pixels using maxgain function: {tile_id}') # Names of the loss, gain and tree cover density tiles - loss, gain, model_extent = tile_names(tile_id, sensit_type) + loss, gain, model_extent = tile_names(tile_id) # start time start = datetime.datetime.now() @@ -99,69 +122,98 @@ def create_gain_year_count_gain_only_maxgain(tile_id, sensit_type, no_upload): uu.check_memory() if os.path.exists(loss): - uu.print_log(" Loss tile found for {}. Using it in gain only pixel gain year count.".format(tile_id)) - gain_calc = '--calc=(A==0)*(B==1)*(C>0)*({})'.format(cn.loss_years) - gain_outfilename = '{}_growth_years_gain_only.tif'.format(tile_id) - gain_outfilearg = '--outfile={}'.format(gain_outfilename) + uu.print_log(f' Loss tile found for {tile_id}. Using it in gain-only pixel gain year count.') + gain_calc = f'--calc=(A==0)*(B==1)*(C>0)*({cn.loss_years})' + gain_outfilename = f'{tile_id}_gain_year_count_gain_only.tif' + gain_outfilearg = f'--outfile={gain_outfilename}' cmd = ['gdal_calc.py', '-A', loss, '-B', gain, '-C', model_extent, gain_calc, gain_outfilearg, '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=DEFLATE', '--type', 'Byte', '--quiet'] uu.log_subprocess_output_full(cmd) else: - uu.print_log(" No loss tile found for {}. Not using loss for gain only pixel gain year count.".format(tile_id)) - gain_calc = '--calc=(A==1)*(B>0)*({})'.format(cn.loss_years) - gain_outfilename = '{}_growth_years_gain_only.tif'.format(tile_id) - gain_outfilearg = '--outfile={}'.format(gain_outfilename) + uu.print_log(f' Loss tile not found for {tile_id}. Not using loss for gain-only pixel gain year count.') + gain_calc = f'--calc=(A==1)*(B>0)*({cn.loss_years})' + gain_outfilename = f'{tile_id}_gain_year_count_gain_only.tif' + gain_outfilearg = f'--outfile={gain_outfilename}' cmd = ['gdal_calc.py', '-A', gain, '-B', model_extent, gain_calc, gain_outfilearg, '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=DEFLATE', '--type', 'Byte', '--quiet'] uu.log_subprocess_output_full(cmd) # Prints information about the tile that was just processed - uu.end_of_fx_summary(start, tile_id, 'growth_years_gain_only', no_upload) + uu.end_of_fx_summary(start, tile_id, 'gain_year_count_gain_only') -# Creates gain year count tiles for pixels that had neither loss not gain. -# For all models except legal_Amazon_loss. -def create_gain_year_count_no_change_standard(tile_id, sensit_type, no_upload): +def create_gain_year_count_no_change_standard(tile_id): + """ + Creates gain year count tiles for pixels that had neither loss not gain. + For all models except legal_Amazon_loss. + :param tile_id: tile to be processed, identified by its tile id + :return: tile with number of years of carbon accumulation in pixels that had neither loss nor gain + """ uu.print_log("Gain year count for pixels with neither loss nor gain:", tile_id) # Names of the loss, gain and tree cover density tiles - loss, gain, model_extent = tile_names(tile_id, sensit_type) + loss, gain, model_extent = tile_names(tile_id) # start time start = datetime.datetime.now() uu.check_memory() - if os.path.exists(loss): - uu.print_log(" Loss tile found for {}. Using it in no change pixel gain year count.".format(tile_id)) - no_change_calc = '--calc=(A==0)*(B==0)*(C>0)*{}'.format(cn.loss_years) - no_change_outfilename = '{}_growth_years_no_change.tif'.format(tile_id) - no_change_outfilearg = '--outfile={}'.format(no_change_outfilename) + if os.path.exists(loss) and os.path.exists(gain): + uu.print_log(f' Loss and gain tiles found for {tile_id}. Using them in no-change pixel gain year count.') + no_change_calc = f'--calc=(A==0)*(B==0)*(C>0)*{cn.loss_years}' + no_change_outfilename = f'{tile_id}_gain_year_count_no_change.tif' + no_change_outfilearg = f'--outfile={no_change_outfilename}' cmd = ['gdal_calc.py', '-A', loss, '-B', gain, '-C', model_extent, no_change_calc, - no_change_outfilearg, '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=DEFLATE', '--type', 'Byte', '--quiet'] + no_change_outfilearg, '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=DEFLATE', '--type', 'Byte', '--quiet', + '--hideNoData'] # Need --hideNoData because the non-gain pixels are NoData, not 0. uu.log_subprocess_output_full(cmd) - else: - uu.print_log(" No loss tile found for {}. Not using it for no change pixel gain year count.".format(tile_id)) - no_change_calc = '--calc=(A==0)*(B>0)*{}'.format(cn.loss_years) - no_change_outfilename = '{}_growth_years_no_change.tif'.format(tile_id) - no_change_outfilearg = '--outfile={}'.format(no_change_outfilename) + elif os.path.exists(loss): + uu.print_log(f' Gain tile not found for {tile_id}. Not using it for no-change pixel gain year count.') + no_change_calc = f'--calc=(A>0)*{cn.loss_years}' + no_change_outfilename = f'{tile_id}_gain_year_count_no_change.tif' + no_change_outfilearg = f'--outfile={no_change_outfilename}' + cmd = ['gdal_calc.py', '-A', loss, '-B', model_extent, no_change_calc, + no_change_outfilearg, '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=DEFLATE', '--type', 'Byte', + '--quiet', + '--hideNoData'] # Need --hideNoData because the non-gain pixels are NoData, not 0. + uu.log_subprocess_output_full(cmd) + elif os.path.exists(gain): + uu.print_log(f' Loss tile not found for {tile_id}. Not using it for no-change pixel gain year count.') + no_change_calc = f'--calc=(A==0)*(B>0)*{cn.loss_years}' + no_change_outfilename = f'{tile_id}_gain_year_count_no_change.tif' + no_change_outfilearg = f'--outfile={no_change_outfilename}' cmd = ['gdal_calc.py', '-A', gain, '-B', model_extent, no_change_calc, - no_change_outfilearg, '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=DEFLATE', '--type', 'Byte', '--quiet'] + no_change_outfilearg, '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=DEFLATE', '--type', 'Byte', '--quiet', + '--hideNoData'] # Need --hideNoData because the non-gain pixels are NoData, not 0. + uu.log_subprocess_output_full(cmd) + else: + uu.print_log(f' Loss and gain tiles not found for {tile_id}. Not using them for no-change pixel gain year count.') + no_change_calc = f'--calc=(A>0)*{cn.loss_years}' + no_change_outfilename = f'{tile_id}_gain_year_count_no_change.tif' + no_change_outfilearg = f'--outfile={no_change_outfilename}' + cmd = ['gdal_calc.py', '-A', model_extent, no_change_calc, + no_change_outfilearg, '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=DEFLATE', '--type', 'Byte', '--quiet', + '--hideNoData'] # Need --hideNoData because the non-gain pixels are NoData, not 0. uu.log_subprocess_output_full(cmd) # Prints information about the tile that was just processed - uu.end_of_fx_summary(start, tile_id, 'growth_years_no_change', no_upload) + uu.end_of_fx_summary(start, tile_id, 'gain_year_count_no_change') -# Creates gain year count tiles for pixels that did not have loss (doesn't matter if they had gain or not). -# For legal_Amazon_loss sensitivity analysis. -def create_gain_year_count_no_change_legal_Amazon_loss(tile_id, sensit_type, no_upload): +def create_gain_year_count_no_change_legal_Amazon_loss(tile_id): + """ + Creates gain year count tiles for pixels that did not have loss (doesn't matter if they had gain or not) + For legal_Amazon_loss sensitivity analysis. + :param tile_id: tile to be processed, identified by its tile id + :return: tile with number of years of carbon accumulation in pixels that did not have loss + """ - uu.print_log("Gain year count for pixels without loss for legal_Amazon_loss:", tile_id) + uu.print_log(f'Gain year count for pixels without loss for legal_Amazon_loss: {tile_id}') # Names of the loss, gain and tree cover density tiles - loss, gain, model_extent = tile_names(tile_id, sensit_type) + loss, gain, model_extent = tile_names(tile_id) # start time start = datetime.datetime.now() @@ -171,58 +223,65 @@ def create_gain_year_count_no_change_legal_Amazon_loss(tile_id, sensit_type, no_ # For unclear reasons, gdal_calc doesn't register the 0 (NoData) pixels in the loss tile, so I have to convert it # to a vrt so that the 0 pixels are recognized. # This was the case with PRODES loss in model v.1.1.2. - loss_vrt = '{}_loss.vrt'.format(tile_id) - os.system('gdalbuildvrt -vrtnodata None {0} {1}'.format(loss_vrt, loss)) + loss_vrt = f'{tile_id}_loss.vrt' + os.system(f'gdalbuildvrt -vrtnodata None {loss_vrt} {loss}') - no_change_calc = '--calc=(A==0)*(B>0)*{}'.format(cn.loss_years) - no_change_outfilename = '{}_growth_years_no_change.tif'.format(tile_id) - no_change_outfilearg = '--outfile={}'.format(no_change_outfilename) + no_change_calc = f'--calc=(A==0)*(B>0)*{cn.loss_years}' + no_change_outfilename = f'{tile_id}_gain_year_count_no_change.tif' + no_change_outfilearg = f'--outfile={no_change_outfilename}' cmd = ['gdal_calc.py', '-A', loss_vrt, '-B', model_extent, no_change_calc, no_change_outfilearg, '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=DEFLATE', '--type', 'Byte', '--quiet'] uu.log_subprocess_output_full(cmd) - + os.remove(loss_vrt) # Prints information about the tile that was just processed - uu.end_of_fx_summary(start, tile_id, 'growth_years_no_change', no_upload) + uu.end_of_fx_summary(start, tile_id, 'gain_year_count_no_change') -# Creates gain year count tiles for pixels that had both loss and gain -def create_gain_year_count_loss_and_gain_standard(tile_id, sensit_type, no_upload): +def create_gain_year_count_loss_and_gain_standard(tile_id): + """ + Creates gain year count tiles for pixels that had both loss-and-gain (standard model only) + :param tile_id: tile to be processed, identified by its tile id + :return: tile with number of years of carbon accumulation in pixels that had both loss-and-gain + """ - uu.print_log("Loss and gain pixel processing using standard function:", tile_id) + uu.print_log(f'Loss and gain pixel processing using standard function: {tile_id}') # Names of the loss, gain and tree cover density tiles - loss, gain, model_extent = tile_names(tile_id, sensit_type) + loss, gain, model_extent = tile_names(tile_id) # start time start = datetime.datetime.now() uu.check_memory() - if os.path.exists(loss): - uu.print_log(" Loss tile found for {}. Using it in loss and gain pixel gain year count.".format(tile_id)) - loss_and_gain_calc = '--calc=((A>0)*(B==1)*(C>0)*((A-1)+floor(({}+1-A)/2)))'.format(cn.loss_years) - loss_and_gain_outfilename = '{}_growth_years_loss_and_gain.tif'.format(tile_id) - loss_and_gain_outfilearg = '--outfile={}'.format(loss_and_gain_outfilename) + if not os.path.exists(loss) and not os.path.exists(gain): + uu.print_log(f' Loss and gain tiles not found for {tile_id}. Skipping loss-and-gain pixel gain year count.') + else: + uu.print_log(f' Loss and gain tiles found for {tile_id}. Using them in loss-and-gain pixel gain year count.') + loss_and_gain_calc = f'--calc=((A>0)*(B==1)*(C>0)*((A-1)+floor(({cn.loss_years}+1-A)/2)))' + loss_and_gain_outfilename = f'{tile_id}_gain_year_count_loss_and_gain.tif' + loss_and_gain_outfilearg = f'--outfile={loss_and_gain_outfilename}' cmd = ['gdal_calc.py', '-A', loss, '-B', gain, '-C', model_extent, loss_and_gain_calc, loss_and_gain_outfilearg, '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=DEFLATE', '--type', 'Byte', '--quiet'] uu.log_subprocess_output_full(cmd) - else: - uu.print_log(" No loss tile found for {}. Skipping loss and gain pixel gain year count.".format(tile_id)) - # Prints information about the tile that was just processed - uu.end_of_fx_summary(start, tile_id, 'growth_years_loss_and_gain', no_upload) + uu.end_of_fx_summary(start, tile_id, 'gain_year_count_loss_and_gain') -# Creates gain year count tiles for pixels that had both loss and gain -def create_gain_year_count_loss_and_gain_maxgain(tile_id, sensit_type, no_upload): +def create_gain_year_count_loss_and_gain_maxgain(tile_id): + """ + Creates gain year count tiles for pixels that had both loss-and-gain (maxgain sensitivity model only) + :param tile_id: tile to be processed, identified by its tile id + :return: tile with number of years of carbon accumulation in pixels that had both loss-and-gain + """ - uu.print_log("Loss and gain pixel processing using maxgain function:", tile_id) + uu.print_log(f'Loss and gain pixel processing using maxgain function: {tile_id}') # Names of the loss, gain and tree cover density tiles - loss, gain, model_extent = tile_names(tile_id, sensit_type) + loss, gain, model_extent = tile_names(tile_id) # start time start = datetime.datetime.now() @@ -230,38 +289,43 @@ def create_gain_year_count_loss_and_gain_maxgain(tile_id, sensit_type, no_upload uu.check_memory() if os.path.exists(loss): - uu.print_log(" Loss tile found for {}. Using it in loss and gain pixel gain year count".format(tile_id)) - loss_and_gain_calc = '--calc=((A>0)*(B==1)*(C>0)*({}-1))'.format(cn.loss_years) - loss_and_gain_outfilename = '{}_growth_years_loss_and_gain.tif'.format(tile_id) - loss_and_gain_outfilearg = '--outfile={}'.format(loss_and_gain_outfilename) + uu.print_log(f' Loss tile found for {tile_id}. Using it in loss-and-gain pixel gain year count') + loss_and_gain_calc = f'--calc=((A>0)*(B==1)*(C>0)*({cn.loss_years}-1))' + loss_and_gain_outfilename = f'{tile_id}_gain_year_count_loss_and_gain.tif' + loss_and_gain_outfilearg = f'--outfile={loss_and_gain_outfilename}' cmd = ['gdal_calc.py', '-A', loss, '-B', gain, '-C', model_extent, loss_and_gain_calc, loss_and_gain_outfilearg, '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=DEFLATE', '--type', 'Byte', '--quiet'] uu.log_subprocess_output_full(cmd) else: - uu.print_log(" No loss tile found for {}. Skipping loss and gain pixel gain year count.".format(tile_id)) + uu.print_log(f' Loss tile not found for {tile_id}. Skipping loss-and-gain pixel gain year count.') # Prints information about the tile that was just processed - uu.end_of_fx_summary(start, tile_id, 'growth_years_loss_and_gain', no_upload) + uu.end_of_fx_summary(start, tile_id, 'gain_year_count_loss_and_gain') -# Merges the four gain year count tiles above to create a single gain year count tile -def create_gain_year_count_merge(tile_id, pattern, sensit_type, no_upload): +def create_gain_year_count_merge(tile_id, pattern): + """ + Merges the four gain year count tiles above to create a single gain year count tile + :param tile_id: tile to be processed, identified by its tile id + :param pattern: pattern for output tile names + :return: tile with number of years of carbon accumulation in all pixels + """ - uu.print_log("Merging loss, gain, no change, and loss/gain pixels into single gain year count raster for {}".format(tile_id)) + uu.print_log(f'Merging loss-only, gain-only, no-change, and loss/gain pixels into single gain year count raster for {tile_id}') # start time start = datetime.datetime.now() # The four rasters from above that are to be merged - no_change_gain_years = '{}_growth_years_no_change.tif'.format(tile_id) - loss_only_gain_years = '{}_growth_years_loss_only.tif'.format(tile_id) - gain_only_gain_years = '{}_growth_years_gain_only.tif'.format(tile_id) - loss_and_gain_gain_years = '{}_growth_years_loss_and_gain.tif'.format(tile_id) + no_change_gain_years = f'{tile_id}_gain_year_count_no_change.tif' + loss_only_gain_years = f'{tile_id}_gain_year_count_loss_only.tif' + gain_only_gain_years = f'{tile_id}_gain_year_count_gain_only.tif' + loss_and_gain_gain_years = f'{tile_id}_gain_year_count_loss_and_gain.tif' # Names of the output tiles - gain_year_count_merged = '{0}_{1}.tif'.format(tile_id, pattern) + gain_year_count_merged = uu.make_tile_name(tile_id, pattern) - # Opens no change gain year count tile. This should exist for all tiles. + # Opens no-change gain year count tile. This should exist for all tiles. with rasterio.open(no_change_gain_years) as no_change_gain_years_src: # Grabs metadata about the tif, like its location/projection/cellsize @@ -278,32 +342,32 @@ def create_gain_year_count_merge(tile_id, pattern, sensit_type, no_upload): nodata=0 ) - uu.print_log(" No change tile exists for {} by default".format(tile_id)) + uu.print_log(f' No-change tile exists for {tile_id} by default') # Opens the other gain year count tiles. They may not exist for all other tiles. try: loss_only_gain_years_src = rasterio.open(loss_only_gain_years) - uu.print_log(" Loss only tile found for {}".format(tile_id)) - except: - uu.print_log(" No loss only tile found for {}".format(tile_id)) + uu.print_log(f' Loss-only tile found for {tile_id}') + except rasterio.errors.RasterioIOError: + uu.print_log(f' Loss-only tile not found for {tile_id}') try: gain_only_gain_years_src = rasterio.open(gain_only_gain_years) - uu.print_log(" Gain only tile found for {}".format(tile_id)) - except: - uu.print_log(" No gain only tile found for {}".format(tile_id)) + uu.print_log(f' Gain-only tile found for {tile_id}') + except rasterio.errors.RasterioIOError: + uu.print_log(f' Gain-only tile not found for {tile_id}') try: loss_and_gain_gain_years_src = rasterio.open(loss_and_gain_gain_years) - uu.print_log(" Loss and gain tile found for {}".format(tile_id)) - except: - uu.print_log(" No loss and gain tile found for {}".format(tile_id)) + uu.print_log(f' Loss-and-gain tile found for {tile_id}') + except rasterio.errors.RasterioIOError: + uu.print_log(f' Loss-and-gain tile not found for {tile_id}') # Opens the output tile, giving it the arguments of the input tiles gain_year_count_merged_dst = rasterio.open(gain_year_count_merged, 'w', **kwargs) # Adds metadata tags to the output raster - uu.add_rasterio_tags(gain_year_count_merged_dst, sensit_type) + uu.add_universal_metadata_rasterio(gain_year_count_merged_dst) gain_year_count_merged_dst.update_tags( units='years') gain_year_count_merged_dst.update_tags( @@ -311,7 +375,7 @@ def create_gain_year_count_merge(tile_id, pattern, sensit_type, no_upload): gain_year_count_merged_dst.update_tags( max_possible_value=cn.loss_years) gain_year_count_merged_dst.update_tags( - source='Gain years are assigned based on the combination of Hansen loss and gain in each pixel. There are four combinations: neither loss nor gain, loss only, gain only, loss and gain.') + source='Gain years are assigned based on the combination of Hansen loss-and-gain in each pixel. There are four combinations: neither loss nor gain, loss-only, gain-only, loss-and-gain.') gain_year_count_merged_dst.update_tags( extent='Full model extent') @@ -324,17 +388,17 @@ def create_gain_year_count_merge(tile_id, pattern, sensit_type, no_upload): try: loss_only_gain_years_window = loss_only_gain_years_src.read(1, window=window) - except: + except UnboundLocalError: loss_only_gain_years_window = np.zeros((window.height, window.width), dtype='uint8') try: gain_only_gain_years_window = gain_only_gain_years_src.read(1, window=window) - except: + except UnboundLocalError: gain_only_gain_years_window = np.zeros((window.height, window.width), dtype='uint8') try: loss_and_gain_gain_years_window = loss_and_gain_gain_years_src.read(1, window=window) - except: + except UnboundLocalError: loss_and_gain_gain_years_window = np.zeros((window.height, window.width), dtype='uint8') @@ -344,4 +408,4 @@ def create_gain_year_count_merge(tile_id, pattern, sensit_type, no_upload): gain_year_count_merged_dst.write_band(1, gain_year_count_merged_window, window=window) # Prints information about the tile that was just processed - uu.end_of_fx_summary(start, tile_id, pattern, no_upload) \ No newline at end of file + uu.end_of_fx_summary(start, tile_id, pattern) diff --git a/removals/gross_removals_all_forest_types.py b/removals/gross_removals_all_forest_types.py index 2c0b3eff..062f4ca5 100644 --- a/removals/gross_removals_all_forest_types.py +++ b/removals/gross_removals_all_forest_types.py @@ -1,48 +1,53 @@ +""" +Function to create gross removals tiles +""" + import datetime import rasterio -from subprocess import Popen, PIPE, STDOUT, check_call -import sys -sys.path.append('../') + import constants_and_names as cn import universal_util as uu -# Calculates cumulative aboveground carbon dioxide removals in mangroves -def gross_removals_all_forest_types(tile_id, output_pattern_list, sensit_type, no_upload): +def gross_removals_all_forest_types(tile_id, output_pattern_list): + """ + Calculates cumulative aboveground carbon dioxide removals in mangroves + :param tile_id: tile to be processed, identified by its tile id + :param output_pattern_list: pattern for output tile names + :return: 3 tiles: gross aboveground removals, belowground removals, aboveground+belowground removals + Units: Mg CO2/ha over entire model period. + """ - uu.print_log("Calculating cumulative CO2 removals:", tile_id) + uu.print_log(f'Calculating cumulative CO2 removals: {tile_id}') # Start time start = datetime.datetime.now() # Names of the input tiles, modified according to sensitivity analysis - gain_rate_AGC = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_annual_gain_AGC_all_types) - gain_rate_BGC = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_annual_gain_BGC_all_types) - gain_year_count = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_gain_year_count) + gain_rate_AGC = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_annual_gain_AGC_all_types) + gain_rate_BGC = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_annual_gain_BGC_all_types) + gain_year_count = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_gain_year_count) # Names of the output removal tiles - cumulative_gain_AGCO2 = '{0}_{1}.tif'.format(tile_id, output_pattern_list[0]) - cumulative_gain_BGCO2 = '{0}_{1}.tif'.format(tile_id, output_pattern_list[1]) - cumulative_gain_AGCO2_BGCO2 = '{0}_{1}.tif'.format(tile_id, output_pattern_list[2]) + cumulative_gain_AGCO2 = f'{tile_id}_{output_pattern_list[0]}.tif' + cumulative_gain_BGCO2 = f'{tile_id}_{output_pattern_list[1]}.tif' + cumulative_gain_AGCO2_BGCO2 = f'{tile_id}_{output_pattern_list[2]}.tif' # Opens the input tiles if they exist. If one of the inputs doesn't exist, try: gain_rate_AGC_src = rasterio.open(gain_rate_AGC) - uu.print_log(" Aboveground removal factor tile found for", tile_id) - except: - uu.print_log(" No aboveground removal factor tile found for {}. Not creating gross removals.".format(tile_id)) - return + uu.print_log(f' Aboveground removal factor tile found for {tile_id}') + except rasterio.errors.RasterioIOError: + uu.print_log(f' Aboveground removal factor tile not found for {tile_id}. Not creating gross removals.') try: gain_rate_BGC_src = rasterio.open(gain_rate_BGC) - uu.print_log(" Belowground removal factor tile found for", tile_id) - except: - uu.print_log(" No belowground removal factor tile found for {}. Not creating gross removals.".format(tile_id)) - return + uu.print_log(f' Belowground removal factor tile found for {tile_id}') + except rasterio.errors.RasterioIOError: + uu.print_log(f' Belowground removal factor tile not found for {tile_id}. Not creating gross removals.') try: gain_year_count_src = rasterio.open(gain_year_count) - uu.print_log(" Gain year count tile found for", tile_id) - except: - uu.print_log(" No gain year count tile found for {}. Not creating gross removals.".format(tile_id)) - return + uu.print_log(f' Gain year count tile found for {tile_id}') + except rasterio.errors.RasterioIOError: + uu.print_log(f' Gain year count tile not found for {tile_id}. Not creating gross removals.') # Grabs metadata for an input tile @@ -61,7 +66,7 @@ def gross_removals_all_forest_types(tile_id, output_pattern_list, sensit_type, n # The output files: aboveground gross removals, belowground gross removals, above+belowground gross removals. Adds metadata tags cumulative_gain_AGCO2_dst = rasterio.open(cumulative_gain_AGCO2, 'w', **kwargs) - uu.add_rasterio_tags(cumulative_gain_AGCO2_dst, sensit_type) + uu.add_universal_metadata_rasterio(cumulative_gain_AGCO2_dst) cumulative_gain_AGCO2_dst.update_tags( units='megagrams aboveground CO2/ha over entire model period') cumulative_gain_AGCO2_dst.update_tags( @@ -70,7 +75,7 @@ def gross_removals_all_forest_types(tile_id, output_pattern_list, sensit_type, n extent='Full model extent') cumulative_gain_BGCO2_dst = rasterio.open(cumulative_gain_BGCO2, 'w', **kwargs) - uu.add_rasterio_tags(cumulative_gain_BGCO2_dst, sensit_type) + uu.add_universal_metadata_rasterio(cumulative_gain_BGCO2_dst) cumulative_gain_BGCO2_dst.update_tags( units='megagrams belowground CO2/ha over entire model period') cumulative_gain_BGCO2_dst.update_tags( @@ -108,4 +113,4 @@ def gross_removals_all_forest_types(tile_id, output_pattern_list, sensit_type, n # Prints information about the tile that was just processed - uu.end_of_fx_summary(start, tile_id, output_pattern_list[0], no_upload) + uu.end_of_fx_summary(start, tile_id, output_pattern_list[0]) diff --git a/removals/mp_US_removal_rates.py b/removals/mp_US_removal_rates.py index 4c445da0..e069f9a3 100644 --- a/removals/mp_US_removal_rates.py +++ b/removals/mp_US_removal_rates.py @@ -41,28 +41,28 @@ from functools import partial import datetime import argparse -import US_removal_rates import pandas as pd from subprocess import Popen, PIPE, STDOUT, check_call import os import sys -sys.path.append('../') + import constants_and_names as cn import universal_util as uu +from . import US_removal_rates -def mp_US_removal_rates(sensit_type, tile_id_list, run_date): +def mp_US_removal_rates(tile_id_list): - os.chdir(cn.docker_base_dir) + os.chdir(cn.docker_tile_dir) # If a full model run is specified, the correct set of tiles for the particular script is listed if tile_id_list == 'all': tile_id_list = uu.tile_list_s3(cn.FIA_regions_processed_dir) uu.print_log(tile_id_list) - uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n") + uu.print_log(f'There are {str(len(tile_id_list))} tiles to process', "\n") # Files to download for this script - download_dict = {cn.gain_dir: [cn.pattern_gain], + download_dict = {cn.gain_dir: [cn.pattern_gain_data_lake], cn.FIA_regions_processed_dir: [cn.pattern_FIA_regions_processed], cn.FIA_forest_group_processed_dir: [cn.pattern_FIA_forest_group_processed], cn.age_cat_natrl_forest_US_dir: [cn.pattern_age_cat_natrl_forest_US] @@ -77,24 +77,24 @@ def mp_US_removal_rates(sensit_type, tile_id_list, run_date): for key, values in download_dict.items(): dir = key pattern = values[0] - uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list) + uu.s3_flexible_download(dir, pattern, cn.docker_tile_dir, cn.SENSIT_TYPE, tile_id_list) # If the model run isn't the standard one, the output directory and file names are changed - if sensit_type != 'std': - uu.print_log("Changing output directory and file name pattern based on sensitivity analysis") - output_dir_list = uu.alter_dirs(sensit_type, output_dir_list) - output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list) + if cn.SENSIT_TYPE != 'std': + uu.print_log('Changing output directory and file name pattern based on sensitivity analysis') + output_dir_list = uu.alter_dirs(cn.SENSIT_TYPE, output_dir_list) + output_pattern_list = uu.alter_patterns(cn.SENSIT_TYPE, output_pattern_list) # A date can optionally be provided by the full model script or a run of this script. # This replaces the date in constants_and_names. - if run_date is not None: - output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date) + if cn.RUN_DATE is not None: + output_dir_list = uu.replace_output_dir_date(output_dir_list, cn.RUN_DATE) # Table with US-specific removal rates # cmd = ['aws', 's3', 'cp', os.path.join(cn.gain_spreadsheet_dir, cn.table_US_removal_rate), cn.docker_base_dir, '--no-sign-request'] - cmd = ['aws', 's3', 'cp', os.path.join(cn.gain_spreadsheet_dir, cn.table_US_removal_rate), cn.docker_base_dir] + cmd = ['aws', 's3', 'cp', os.path.join(cn.gain_spreadsheet_dir, cn.table_US_removal_rate), cn.docker_tile_dir] uu.log_subprocess_output_full(cmd) @@ -216,25 +216,31 @@ def mp_US_removal_rates(sensit_type, tile_id_list, run_date): parser = argparse.ArgumentParser( description='Create tiles of removal factors for the US using US rates') parser.add_argument('--model-type', '-t', required=True, - help='{}'.format(cn.model_type_arg_help)) + help=f'{cn.model_type_arg_help}') parser.add_argument('--tile_id_list', '-l', required=True, help='List of tile ids to use in the model. Should be of form 00N_110E or 00N_110E,00N_120E or all.') parser.add_argument('--run-date', '-d', required=False, help='Date of run. Must be format YYYYMMDD.') + parser.add_argument('--no-upload', '-nu', action='store_true', + help='Disables uploading of outputs to s3') args = parser.parse_args() - sensit_type = args.model_type + + # Sets global variables to the command line arguments + cn.SENSIT_TYPE = args.model_type + cn.RUN_DATE = args.run_date + cn.NO_UPLOAD = args.no_upload + tile_id_list = args.tile_id_list - run_date = args.run_date # Disables upload to s3 if no AWS credentials are found in environment if not uu.check_aws_creds(): - no_upload = True + cn.NO_UPLOAD = True # Create the output log - uu.initiate_log(tile_id_list=tile_id_list, sensit_type=sensit_type, run_date=run_date) + uu.initiate_log(tile_id_list) # Checks whether the sensitivity analysis and tile_id_list arguments are valid - uu.check_sensit_type(sensit_type) + uu.check_sensit_type(cn.SENSIT_TYPE) tile_id_list = uu.tile_id_list_check(tile_id_list) - mp_US_removal_rates(sensit_type=sensit_type, tile_id_list=tile_id_list, run_date=run_date) \ No newline at end of file + mp_US_removal_rates(tile_id_list) \ No newline at end of file diff --git a/removals/mp_annual_gain_rate_AGC_BGC_all_forest_types.py b/removals/mp_annual_gain_rate_AGC_BGC_all_forest_types.py index 55378085..f4ea3b21 100644 --- a/removals/mp_annual_gain_rate_AGC_BGC_all_forest_types.py +++ b/removals/mp_annual_gain_rate_AGC_BGC_all_forest_types.py @@ -1,4 +1,4 @@ -''' +""" Creates tiles of annual aboveground and belowground removal rates for the entire model extent (all forest types). Also, creates tiles that show what the source of the removal factor is each for each pixel. This can correspond to particular forest types (mangrove, planted, natural) or data sources (US, Europe, young natural forests from Cook-Patton et al., @@ -7,34 +7,40 @@ rates for young secondary forests > IPCC defaults for old secondary and primary forests. This hierarchy is reflected in the removal rates and the forest type rasters. The different removal rate inputs are in different units but all are standardized to AGC/ha/yr and BGC/ha/yr. -''' + +python -m removals.mp_annual_gain_rate_AGC_BGC_all_forest_types -t std -l 00N_000E -nu +python -m removals.mp_annual_gain_rate_AGC_BGC_all_forest_types -t std -l all +""" -import multiprocessing -from functools import partial -import pandas as pd -import datetime import argparse -from subprocess import Popen, PIPE, STDOUT, check_call +from functools import partial +import multiprocessing import os import sys -sys.path.append('../') + import constants_and_names as cn import universal_util as uu -sys.path.append(os.path.join(cn.docker_app,'removals')) -import annual_gain_rate_AGC_BGC_all_forest_types +from . import annual_gain_rate_AGC_BGC_all_forest_types -def mp_annual_gain_rate_AGC_BGC_all_forest_types(sensit_type, tile_id_list, run_date = None, no_upload = None): +def mp_annual_gain_rate_AGC_BGC_all_forest_types(tile_id_list): + """ + :param tile_id_list: list of tile ids to process + :return: 5 sets of tiles with annual removal factors combined from all removal factor sources: + removal forest type, aboveground rate, belowground rate, aboveground+belowground rate, + standard deviation for aboveground rate. + Units: Mg carbon/ha/yr (including for standard deviation tiles) + """ - os.chdir(cn.docker_base_dir) + os.chdir(cn.docker_tile_dir) # If a full model run is specified, the correct set of tiles for the particular script is listed if tile_id_list == 'all': # List of tiles to run in the model - tile_id_list = uu.tile_list_s3(cn.model_extent_dir, sensit_type) + tile_id_list = uu.tile_list_s3(cn.model_extent_dir, cn.SENSIT_TYPE) uu.print_log(tile_id_list) - uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n") + uu.print_log(f'There are {str(len(tile_id_list))} tiles to process', "\n") # Files to download for this script. @@ -48,6 +54,7 @@ def mp_annual_gain_rate_AGC_BGC_all_forest_types(sensit_type, tile_id_list, run_ cn.annual_gain_AGC_natrl_forest_young_dir: [cn.pattern_annual_gain_AGC_natrl_forest_young], cn.age_cat_IPCC_dir: [cn.pattern_age_cat_IPCC], cn.annual_gain_AGB_IPCC_defaults_dir: [cn.pattern_annual_gain_AGB_IPCC_defaults], + cn.BGB_AGB_ratio_dir: [cn.pattern_BGB_AGB_ratio], cn.stdev_annual_gain_AGB_mangrove_dir: [cn.pattern_stdev_annual_gain_AGB_mangrove], cn.stdev_annual_gain_AGC_BGC_natrl_forest_Europe_dir: [cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_Europe], @@ -69,69 +76,70 @@ def mp_annual_gain_rate_AGC_BGC_all_forest_types(sensit_type, tile_id_list, run_ # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list for key, values in download_dict.items(): - dir = key + directory = key pattern = values[0] - uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list) + uu.s3_flexible_download(directory, pattern, cn.docker_tile_dir, cn.SENSIT_TYPE, tile_id_list) # If the model run isn't the standard one, the output directory and file names are changed - if sensit_type != 'std': - uu.print_log("Changing output directory and file name pattern based on sensitivity analysis") - output_dir_list = uu.alter_dirs(sensit_type, output_dir_list) - output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list) + if cn.SENSIT_TYPE != 'std': + uu.print_log('Changing output directory and file name pattern based on sensitivity analysis') + output_dir_list = uu.alter_dirs(cn.SENSIT_TYPE, output_dir_list) + output_pattern_list = uu.alter_patterns(cn.SENSIT_TYPE, output_pattern_list) # A date can optionally be provided by the full model script or a run of this script. # This replaces the date in constants_and_names. # Only done if output upload is enabled. - if run_date is not None and no_upload is not None: - output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date) + if cn.RUN_DATE is not None and cn.NO_UPLOAD is not None: + output_dir_list = uu.replace_output_dir_date(output_dir_list, cn.RUN_DATE) - # This configuration of the multiprocessing call is necessary for passing multiple arguments to the main function - # It is based on the example here: http://spencerimp.blogspot.com/2015/12/python-multiprocess-with-multiple.html - if cn.count == 96: - if sensit_type == 'biomass_swap': - processes = 13 - else: - processes = 17 # 30 processors > 740 GB peak; 18 = >740 GB peak; 16 = 660 GB peak; 17 = >680 GB peak + if cn.SINGLE_PROCESSOR: + for tile_id in tile_id_list: + annual_gain_rate_AGC_BGC_all_forest_types.annual_gain_rate_AGC_BGC_all_forest_types(tile_id, output_pattern_list) + else: - processes = 2 - uu.print_log('Removal factor processors=', processes) - pool = multiprocessing.Pool(processes) - pool.map(partial(annual_gain_rate_AGC_BGC_all_forest_types.annual_gain_rate_AGC_BGC_all_forest_types, - output_pattern_list=output_pattern_list, sensit_type=sensit_type, no_upload=no_upload), tile_id_list) - pool.close() - pool.join() - - # # For single processor use - # for tile_id in tile_id_list: - # annual_gain_rate_AGC_BGC_all_forest_types.annual_gain_rate_AGC_BGC_all_forest_types(tile_id, sensit_type, no_upload) + # This configuration of the multiprocessing call is necessary for passing multiple arguments to the main function + # It is based on the example here: http://spencerimp.blogspot.com/2015/12/python-multiprocess-with-multiple.html + if cn.count == 96: + if cn.SENSIT_TYPE == 'biomass_swap': + processes = 13 + else: + processes = 17 # 30 processors > 740 GB peak; 18 = >740 GB peak; 16 = 660 GB peak; 17 = >680 GB peak + else: + processes = 2 + uu.print_log(f'Removal factor processors={processes}') + with multiprocessing.Pool(processes) as pool: + pool.map(partial(annual_gain_rate_AGC_BGC_all_forest_types.annual_gain_rate_AGC_BGC_all_forest_types, + output_pattern_list=output_pattern_list), + tile_id_list) + pool.close() + pool.join() + + # No single-processor versions of these check-if-empty functions # Checks the gross removals outputs for tiles with no data for output_pattern in output_pattern_list: if cn.count <= 2: # For local tests processes = 1 - uu.print_log("Checking for empty tiles of {0} pattern with {1} processors using light function...".format( - output_pattern, processes)) - pool = multiprocessing.Pool(processes) - pool.map(partial(uu.check_and_delete_if_empty_light, output_pattern=output_pattern), tile_id_list) - pool.close() - pool.join() + uu.print_log(f'Checking for empty tiles of {output_pattern} pattern with {processes} processors using light function...') + with multiprocessing.Pool(processes) as pool: + pool.map(partial(uu.check_and_delete_if_empty_light, output_pattern=output_pattern), tile_id_list) + pool.close() + pool.join() else: - processes = 55 # 50 processors = XXX GB peak - uu.print_log( - "Checking for empty tiles of {0} pattern with {1} processors...".format(output_pattern, processes)) - pool = multiprocessing.Pool(processes) - pool.map(partial(uu.check_and_delete_if_empty, output_pattern=output_pattern), tile_id_list) - pool.close() - pool.join() + processes = 55 # 55 processors = XXX GB peak + uu.print_log(f'Checking for empty tiles of {output_pattern} pattern with {processes} processors...') + with multiprocessing.Pool(processes) as pool: + pool.map(partial(uu.check_and_delete_if_empty, output_pattern=output_pattern), tile_id_list) + pool.close() + pool.join() - # If no_upload flag is not activated (by choice or by lack of AWS credentials), output is uploaded - if not no_upload: - - for i in range(0, len(output_dir_list)): - uu.upload_final_set(output_dir_list[i], output_pattern_list[i]) + # If cn.NO_UPLOAD flag is not activated (by choice or by lack of AWS credentials), output is uploaded + if not cn.NO_UPLOAD: + for output_dir, output_pattern in zip(output_dir_list, output_pattern_list): + uu.upload_final_set(output_dir, output_pattern) if __name__ == '__main__': @@ -141,30 +149,35 @@ def mp_annual_gain_rate_AGC_BGC_all_forest_types(sensit_type, tile_id_list, run_ parser = argparse.ArgumentParser( description='Create tiles of removal factors for all forest types') parser.add_argument('--model-type', '-t', required=True, - help='{}'.format(cn.model_type_arg_help)) + help=f'{cn.model_type_arg_help}') parser.add_argument('--tile_id_list', '-l', required=True, help='List of tile ids to use in the model. Should be of form 00N_110E or 00N_110E,00N_120E or all.') parser.add_argument('--run-date', '-d', required=False, help='Date of run. Must be format YYYYMMDD.') parser.add_argument('--no-upload', '-nu', action='store_true', help='Disables uploading of outputs to s3') + parser.add_argument('--single-processor', '-sp', action='store_true', + help='Uses single processing rather than multiprocessing') args = parser.parse_args() - sensit_type = args.model_type + + + # Sets global variables to the command line arguments + cn.SENSIT_TYPE = args.model_type + cn.RUN_DATE = args.run_date + cn.NO_UPLOAD = args.no_upload + cn.SINGLE_PROCESSOR = args.single_processor + tile_id_list = args.tile_id_list - run_date = args.run_date - no_upload = args.no_upload # Disables upload to s3 if no AWS credentials are found in environment if not uu.check_aws_creds(): - no_upload = True + cn.NO_UPLOAD = True # Create the output log - uu.initiate_log(tile_id_list=tile_id_list, sensit_type=sensit_type, run_date=run_date, no_upload=no_upload) + uu.initiate_log(tile_id_list) # Checks whether the sensitivity analysis and tile_id_list arguments are valid - uu.check_sensit_type(sensit_type) + uu.check_sensit_type(cn.SENSIT_TYPE) tile_id_list = uu.tile_id_list_check(tile_id_list) - mp_annual_gain_rate_AGC_BGC_all_forest_types(sensit_type=sensit_type, tile_id_list=tile_id_list, - run_date=run_date, no_upload=no_upload) - + mp_annual_gain_rate_AGC_BGC_all_forest_types(tile_id_list) diff --git a/removals/mp_annual_gain_rate_IPCC_defaults.py b/removals/mp_annual_gain_rate_IPCC_defaults.py index dc33b6f6..5f585995 100644 --- a/removals/mp_annual_gain_rate_IPCC_defaults.py +++ b/removals/mp_annual_gain_rate_IPCC_defaults.py @@ -1,55 +1,60 @@ -''' -This script assigns annual aboveground and belowground removal rates for the full model extent according to IPCC Table 4.9 defaults -(in the units of IPCC Table 4.9 (currently tonnes biomass/ha/yr)) to the entire model extent. +""" It also creates assigns aboveground removal rate standard deviations for the full model extent according to IPCC Table 4.9 defaults (in the units of IPCC Table 4.9 (currently tonnes biomass/ha/yr)) to the entire model extent. The standard deviation tiles are used in the uncertainty analysis. It requires IPCC Table 4.9, formatted for easy ingestion by pandas. Essentially, this does some processing of the IPCC removals rate table, then uses it as a dictionary that it applies to every pixel in every tile. -Each continent-ecozone-forest age category combination gets its own code, which matches the codes in the +Each continent-ecozo0ne-forest age category combination gets its own code, which matches the codes in the processed IPCC table. The extent of these removal rates is greater than what is ultimately used in the model because it assigns IPCC defaults everywhere there's a forest age category, continent, and ecozone. You can think of this as the IPCC default rate that would be applied if no other data were available for that pixel. The belowground removal rates are purely the aboveground removal rates with the above:below ratio applied to them. -''' + +python -m removals.mp_annual_gain_rate_IPCC_defaults -t std -l 00N_000E -nu +python -m removals.mp_annual_gain_rate_IPCC_defaults -t std -l all +""" import multiprocessing from functools import partial import argparse import pandas as pd -import datetime -from subprocess import Popen, PIPE, STDOUT, check_call import os import sys -sys.path.append('../') + import constants_and_names as cn import universal_util as uu -sys.path.append(os.path.join(cn.docker_app,'removals')) -import annual_gain_rate_IPCC_defaults +from . import annual_gain_rate_IPCC_defaults -os.chdir(cn.docker_base_dir) +os.chdir(cn.docker_tile_dir) -def mp_annual_gain_rate_IPCC_defaults(sensit_type, tile_id_list, run_date = None, no_upload = None): +def mp_annual_gain_rate_IPCC_defaults(tile_id_list): + """ + :param tile_id_list: list of tile ids to process + :return: set of tiles with annual removal factors according to IPCC Volume 4 Table 4.9: + aboveground rate, belowground rate, standard deviation for aboveground rate. + Units: Mg biomass/ha/yr (including for standard deviation tiles) + """ - os.chdir(cn.docker_base_dir) + os.chdir(cn.docker_tile_dir) pd.options.mode.chained_assignment = None # If a full model run is specified, the correct set of tiles for the particular script is listed if tile_id_list == 'all': # List of tiles to run in the model - tile_id_list = uu.tile_list_s3(cn.model_extent_dir, sensit_type) + tile_id_list = uu.tile_list_s3(cn.model_extent_dir, cn.SENSIT_TYPE) uu.print_log(tile_id_list) - uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n") + uu.print_log(f'There are {str(len(tile_id_list))} tiles to process', "\n") # Files to download for this script. download_dict = { cn.age_cat_IPCC_dir: [cn.pattern_age_cat_IPCC], - cn.cont_eco_dir: [cn.pattern_cont_eco_processed] + cn.cont_eco_dir: [cn.pattern_cont_eco_processed], + cn.BGB_AGB_ratio_dir: [cn.pattern_BGB_AGB_ratio] } @@ -59,50 +64,49 @@ def mp_annual_gain_rate_IPCC_defaults(sensit_type, tile_id_list, run_date = None # If the model run isn't the standard one, the output directory and file names are changed - if sensit_type != 'std': - uu.print_log("Changing output directory and file name pattern based on sensitivity analysis") - output_dir_list = uu.alter_dirs(sensit_type, output_dir_list) - output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list) + if cn.SENSIT_TYPE != 'std': + uu.print_log('Changing output directory and file name pattern based on sensitivity analysis') + output_dir_list = uu.alter_dirs(cn.SENSIT_TYPE, output_dir_list) + output_pattern_list = uu.alter_patterns(cn.SENSIT_TYPE, output_pattern_list) # A date can optionally be provided by the full model script or a run of this script. # This replaces the date in constants_and_names. # Only done if output upload is enabled. - if run_date is not None and no_upload is not None: - output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date) + if cn.RUN_DATE is not None and cn.NO_UPLOAD is not None: + output_dir_list = uu.replace_output_dir_date(output_dir_list, cn.RUN_DATE) # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list for key, values in download_dict.items(): - dir = key + directory = key pattern = values[0] - uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list) + uu.s3_flexible_download(directory, pattern, cn.docker_tile_dir, cn.SENSIT_TYPE, tile_id_list) # Table with IPCC Table 4.9 default removals rates # cmd = ['aws', 's3', 'cp', os.path.join(cn.gain_spreadsheet_dir, cn.gain_spreadsheet), cn.docker_base_dir, '--no-sign-request'] - cmd = ['aws', 's3', 'cp', os.path.join(cn.gain_spreadsheet_dir, cn.gain_spreadsheet), cn.docker_base_dir] + cmd = ['aws', 's3', 'cp', os.path.join(cn.gain_spreadsheet_dir, cn.gain_spreadsheet), cn.docker_tile_dir] uu.log_subprocess_output_full(cmd) ### To make the removal factor dictionaries # Special removal rate table for no_primary_gain sensitivity analysis: primary forests and IFLs have removal rate of 0 - if sensit_type == 'no_primary_gain': + if cn.SENSIT_TYPE == 'no_primary_gain': # Imports the table with the ecozone-continent codes and the carbon removals rates - gain_table = pd.read_excel("{}".format(cn.gain_spreadsheet), - sheet_name = "natrl fores gain, no_prim_gain") - uu.print_log("Using no_primary_gain IPCC default rates for tile creation") + gain_table = pd.read_excel(cn.gain_spreadsheet, sheet_name = "natrl fores gain, no_prim_gain") + uu.print_log('Using no_primary_gain IPCC default rates for tile creation') # All other analyses use the standard removal rates else: # Imports the table with the ecozone-continent codes and the biomass removals rates - gain_table = pd.read_excel("{}".format(cn.gain_spreadsheet), - sheet_name = "natrl fores gain, for std model") + gain_table = pd.read_excel(cn.gain_spreadsheet, sheet_name = "natrl fores gain, for std model") # Removes rows with duplicate codes (N. and S. America for the same ecozone) gain_table_simplified = gain_table.drop_duplicates(subset='gainEcoCon', keep='first') # Converts removals table from wide to long, so each continent-ecozone-age category has its own row - gain_table_cont_eco_age = pd.melt(gain_table_simplified, id_vars = ['gainEcoCon'], value_vars = ['growth_primary', 'growth_secondary_greater_20', 'growth_secondary_less_20']) + gain_table_cont_eco_age = pd.melt(gain_table_simplified, id_vars = ['gainEcoCon'], + value_vars = ['growth_primary', 'growth_secondary_greater_20', 'growth_secondary_less_20']) gain_table_cont_eco_age = gain_table_cont_eco_age.dropna() # Creates a table that has just the continent-ecozone combinations for adding to the dictionary. @@ -141,17 +145,15 @@ def mp_annual_gain_rate_IPCC_defaults(sensit_type, tile_id_list, run_date = None ### To make the removal factor standard deviation dictionary # Special removal rate table for no_primary_gain sensitivity analysis: primary forests and IFLs have removal rate of 0 - if sensit_type == 'no_primary_gain': + if cn.SENSIT_TYPE == 'no_primary_gain': # Imports the table with the ecozone-continent codes and the carbon removals rates - stdev_table = pd.read_excel("{}".format(cn.gain_spreadsheet), - sheet_name="natrl fores stdv, no_prim_gain") - uu.print_log("Using no_primary_gain IPCC default standard deviations for tile creation") + stdev_table = pd.read_excel(cn.gain_spreadsheet, sheet_name="natrl fores stdv, no_prim_gain") + uu.print_log('Using no_primary_gain IPCC default standard deviations for tile creation') # All other analyses use the standard removal rates else: # Imports the table with the ecozone-continent codes and the biomass removals rate standard deviations - stdev_table = pd.read_excel("{}".format(cn.gain_spreadsheet), - sheet_name="natrl fores stdv, for std model") + stdev_table = pd.read_excel(cn.gain_spreadsheet, sheet_name="natrl fores stdv, for std model") # Removes rows with duplicate codes (N. and S. America for the same ecozone) stdev_table_simplified = stdev_table.drop_duplicates(subset='gainEcoCon', keep='first') @@ -193,36 +195,34 @@ def mp_annual_gain_rate_IPCC_defaults(sensit_type, tile_id_list, run_date = None # Converts all the keys (continent-ecozone-age codes) to float type stdev_table_dict = {float(key): value for key, value in stdev_table_dict.items()} + if cn.SINGLE_PROCESSOR: + for tile_id in tile_id_list: + annual_gain_rate_IPCC_defaults.annual_gain_rate(tile_id, gain_table_dict, stdev_table_dict, output_pattern_list) # This configuration of the multiprocessing call is necessary for passing multiple arguments to the main function # It is based on the example here: http://spencerimp.blogspot.com/2015/12/python-multiprocess-with-multiple.html if cn.count == 96: - if sensit_type == 'biomass_swap': + if cn.SENSIT_TYPE == 'biomass_swap': processes = 24 # 24 processors = 590 GB peak else: - processes = 30 # 30 processors = 725 GB peak + processes = 23 # 30 processors>=740 GB peak; 25>=740 GB peak (too high); 20>=740 GB peak (risky); + # 16>=740 GB peak; 14=420 GB peak; 17=520 GB peak; 20=610 GB peak; 23=690 GB peak; 25=>740 GB peak else: processes = 2 - uu.print_log('Annual removals rate natural forest max processors=', processes) - pool = multiprocessing.Pool(processes) - pool.map(partial(annual_gain_rate_IPCC_defaults.annual_gain_rate, sensit_type=sensit_type, - gain_table_dict=gain_table_dict, stdev_table_dict=stdev_table_dict, - output_pattern_list=output_pattern_list, no_upload=no_upload), tile_id_list) - pool.close() - pool.join() - - # # For single processor use - # for tile_id in tile_id_list: - # - # annual_gain_rate_IPCC_defaults.annual_gain_rate(tile_id, sensit_type, - # gain_table_dict, stdev_table_dict, output_pattern_list, no_upload) + uu.print_log(f'Annual removals rate natural forest max processors={processes}') + with multiprocessing.Pool(processes) as pool: + pool.map(partial(annual_gain_rate_IPCC_defaults.annual_gain_rate, + gain_table_dict=gain_table_dict, stdev_table_dict=stdev_table_dict, + output_pattern_list=output_pattern_list), + tile_id_list) + pool.close() + pool.join() # If no_upload flag is not activated (by choice or by lack of AWS credentials), output is uploaded - if not no_upload: - - for i in range(0, len(output_dir_list)): - uu.upload_final_set(output_dir_list[i], output_pattern_list[i]) + if not cn.NO_UPLOAD: + for output_dir, output_pattern in zip(output_dir_list, output_pattern_list): + uu.upload_final_set(output_dir, output_pattern) if __name__ == '__main__': @@ -232,28 +232,35 @@ def mp_annual_gain_rate_IPCC_defaults(sensit_type, tile_id_list, run_date = None parser = argparse.ArgumentParser( description='Create tiles of removal factors according to IPCC defaults') parser.add_argument('--model-type', '-t', required=True, - help='{}'.format(cn.model_type_arg_help)) + help=f'{cn.model_type_arg_help}') parser.add_argument('--tile_id_list', '-l', required=True, help='List of tile ids to use in the model. Should be of form 00N_110E or 00N_110E,00N_120E or all.') parser.add_argument('--run-date', '-d', required=False, help='Date of run. Must be format YYYYMMDD.') parser.add_argument('--no-upload', '-nu', action='store_true', help='Disables uploading of outputs to s3') + parser.add_argument('--single-processor', '-sp', action='store_true', + help='Uses single processing rather than multiprocessing') args = parser.parse_args() - sensit_type = args.model_type + + + # Sets global variables to the command line arguments + cn.SENSIT_TYPE = args.model_type + cn.RUN_DATE = args.run_date + cn.NO_UPLOAD = args.no_upload + cn.SINGLE_PROCESSOR = args.single_processor + tile_id_list = args.tile_id_list - run_date = args.run_date - no_upload = args.no_upload # Disables upload to s3 if no AWS credentials are found in environment if not uu.check_aws_creds(): - no_upload = True + cn.NO_UPLOAD = True # Create the output log - uu.initiate_log(tile_id_list=tile_id_list, sensit_type=sensit_type, run_date=run_date, no_upload=no_upload) + uu.initiate_log(tile_id_list) # Checks whether the sensitivity analysis and tile_id_list arguments are valid - uu.check_sensit_type(sensit_type) + uu.check_sensit_type(cn.SENSIT_TYPE) tile_id_list = uu.tile_id_list_check(tile_id_list) - mp_annual_gain_rate_IPCC_defaults(sensit_type=sensit_type, tile_id_list=tile_id_list, run_date=run_date, no_upload=no_upload) + mp_annual_gain_rate_IPCC_defaults(tile_id_list) diff --git a/removals/mp_annual_gain_rate_mangrove.py b/removals/mp_annual_gain_rate_mangrove.py index 035cbbab..7f621403 100644 --- a/removals/mp_annual_gain_rate_mangrove.py +++ b/removals/mp_annual_gain_rate_mangrove.py @@ -3,6 +3,9 @@ Its inputs are the continent-ecozone tiles, mangrove biomass tiles (for locations of mangroves), and the IPCC mangrove removals rate table. Also creates tiles of standard deviation in mangrove aboveground biomass removal rates based on the 95% CI in IPCC Wetlands Supplement Table 4.4. + +python -m removals.mp_annual_gain_rate_mangrove -t std -l 00N_000E -nu +python -m removals.mp_annual_gain_rate_mangrove -t std -l all ''' import multiprocessing @@ -13,15 +16,13 @@ from subprocess import Popen, PIPE, STDOUT, check_call import os import sys -sys.path.append('../') import constants_and_names as cn import universal_util as uu -sys.path.append(os.path.join(cn.docker_app,'removals')) -import annual_gain_rate_mangrove +from . import annual_gain_rate_mangrove -def mp_annual_gain_rate_mangrove(sensit_type, tile_id_list, run_date = None): +def mp_annual_gain_rate_mangrove(tile_id_list): - os.chdir(cn.docker_base_dir) + os.chdir(cn.docker_tile_dir) pd.options.mode.chained_assignment = None @@ -34,7 +35,7 @@ def mp_annual_gain_rate_mangrove(sensit_type, tile_id_list, run_date = None): tile_id_list = list(set(mangrove_biomass_tile_list).intersection(ecozone_tile_list)) uu.print_log(tile_id_list) - uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n") + uu.print_log(f'There are {str(len(tile_id_list))} tiles to process', "\n") download_dict = { @@ -49,20 +50,20 @@ def mp_annual_gain_rate_mangrove(sensit_type, tile_id_list, run_date = None): # A date can optionally be provided by the full model script or a run of this script. # This replaces the date in constants_and_names. - if run_date is not None: - output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date) + if cn.RUN_DATE is not None: + output_dir_list = uu.replace_output_dir_date(output_dir_list, cn.RUN_DATE) # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list, if AWS credentials are found for key, values in download_dict.items(): dir = key pattern = values[0] - uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list) + uu.s3_flexible_download(dir, pattern, cn.docker_tile_dir, cn.SENSIT_TYPE, tile_id_list) # Table with IPCC Wetland Supplement Table 4.4 default mangrove removals rates # cmd = ['aws', 's3', 'cp', os.path.join(cn.gain_spreadsheet_dir, cn.gain_spreadsheet), cn.docker_base_dir, '--no-sign-request'] - cmd = ['aws', 's3', 'cp', os.path.join(cn.gain_spreadsheet_dir, cn.gain_spreadsheet), cn.docker_base_dir] + cmd = ['aws', 's3', 'cp', os.path.join(cn.gain_spreadsheet_dir, cn.gain_spreadsheet), cn.docker_tile_dir] uu.log_subprocess_output_full(cmd) @@ -119,30 +120,29 @@ def mp_annual_gain_rate_mangrove(sensit_type, tile_id_list, run_date = None): stdev_dict = {float(key): value for key, value in stdev_dict.items()} - # This configuration of the multiprocessing call is necessary for passing multiple arguments to the main function - # It is based on the example here: http://spencerimp.blogspot.com/2015/12/python-multiprocess-with-multiple.html - # Ran with 18 processors on r4.16xlarge (430 GB memory peak) - if cn.count == 96: - processes = 20 #26 processors = >740 GB peak; 18 = 550 GB peak; 20 = 610 GB peak; 23 = 700 GB peak; 24 > 750 GB peak - else: - processes = 4 - uu.print_log('Mangrove annual removals rate max processors=', processes) - pool = multiprocessing.Pool(processes) - pool.map(partial(annual_gain_rate_mangrove.annual_gain_rate, sensit_type=sensit_type, output_pattern_list=output_pattern_list, - gain_above_dict=gain_above_dict, gain_below_dict=gain_below_dict, stdev_dict=stdev_dict), tile_id_list) - pool.close() - pool.join() + if cn.SINGLE_PROCESSOR: + for tile in tile_id_list: + annual_gain_rate_mangrove.annual_gain_rate(tile, output_pattern_list, gain_above_dict, gain_below_dict, stdev_dict) - # # For single processor use - # for tile in tile_id_list: - # - # annual_gain_rate_mangrove.annual_gain_rate(tile, sensit_type, output_pattern_list, - # gain_above_dict, gain_below_dict, stdev_dict) + else: + # This configuration of the multiprocessing call is necessary for passing multiple arguments to the main function + # It is based on the example here: http://spencerimp.blogspot.com/2015/12/python-multiprocess-with-multiple.html + # Ran with 18 processors on r4.16xlarge (430 GB memory peak) + if cn.count == 96: + processes = 20 #26 processors = >740 GB peak; 18 = 550 GB peak; 20 = 610 GB peak; 23 = 700 GB peak; 24 > 750 GB peak + else: + processes = 4 + uu.print_log('Mangrove annual removals rate max processors=', processes) + pool = multiprocessing.Pool(processes) + pool.map(partial(annual_gain_rate_mangrove.annual_gain_rate, output_pattern_list=output_pattern_list, + gain_above_dict=gain_above_dict, gain_below_dict=gain_below_dict, stdev_dict=stdev_dict), + tile_id_list) + pool.close() + pool.join() # If no_upload flag is not activated (by choice or by lack of AWS credentials), output is uploaded if not no_upload: - for i in range(0, len(output_dir_list)): uu.upload_final_set(output_dir_list[i], output_pattern_list[i]) @@ -154,26 +154,34 @@ def mp_annual_gain_rate_mangrove(sensit_type, tile_id_list, run_date = None): parser = argparse.ArgumentParser( description='Create tiles of removal factors for mangrove forests') parser.add_argument('--model-type', '-t', required=True, - help='{}'.format(cn.model_type_arg_help)) + help=f'{cn.model_type_arg_help}') parser.add_argument('--tile_id_list', '-l', required=True, help='List of tile ids to use in the model. Should be of form 00N_110E or 00N_110E,00N_120E or all.') parser.add_argument('--run-date', '-d', required=False, help='Date of run. Must be format YYYYMMDD.') + parser.add_argument('--no-upload', '-nu', action='store_true', + help='Disables uploading of outputs to s3') + parser.add_argument('--single-processor', '-sp', action='store_true', + help='Uses single processing rather than multiprocessing') args = parser.parse_args() - sensit_type = args.model_type + + # Sets global variables to the command line arguments + cn.SENSIT_TYPE = args.model_type + cn.RUN_DATE = args.run_date + cn.NO_UPLOAD = args.no_upload + cn.SINGLE_PROCESSOR = args.single_processor + tile_id_list = args.tile_id_list - run_date = args.run_date # Disables upload to s3 if no AWS credentials are found in environment if not uu.check_aws_creds(): - no_upload = True - + cn.NO_UPLOAD = True # Create the output log - uu.initiate_log(tile_id_list=tile_id_list, sensit_type=sensit_type, run_date=run_date) + uu.initiate_log(tile_id_list) # Checks whether the sensitivity analysis and tile_id_list arguments are valid - uu.check_sensit_type(sensit_type) + uu.check_sensit_type(cn.SENSIT_TYPE) tile_id_list = uu.tile_id_list_check(tile_id_list) - mp_annual_gain_rate_mangrove(sensit_type=sensit_type, tile_id_list=tile_id_list, run_date=run_date) \ No newline at end of file + mp_annual_gain_rate_mangrove(tile_id_list) \ No newline at end of file diff --git a/removals/mp_forest_age_category_IPCC.py b/removals/mp_forest_age_category_IPCC.py index c90203d9..6cb2571a 100644 --- a/removals/mp_forest_age_category_IPCC.py +++ b/removals/mp_forest_age_category_IPCC.py @@ -1,4 +1,4 @@ -''' +""" This script creates tiles of forest age category across the entire model extent (all pixels) according to a decision tree. The age categories are: <= 20 year old secondary forest (1), >20 year old secondary forest (2), and primary forest (3). The decision tree is implemented as a series of numpy array statements rather than as nested if statements or gdal_calc operations. @@ -9,54 +9,58 @@ This assigns forest age category to all pixels within the model but they are ultimately only used for non-mangrove, non-planted, non-European, non-US, older secondary and primary forest pixels. You can think of the output from this script as being the age category if IPCC Table 4.9 rates were to be applied there. -''' +python -m removals.mp_forest_age_category_IPCC -t std -l 00N_000E -nu +python -m removals.mp_forest_age_category_IPCC -t std -l all +""" -import multiprocessing + +import argparse from functools import partial import pandas as pd -import datetime -import argparse -from subprocess import Popen, PIPE, STDOUT, check_call +import multiprocessing import os import sys -sys.path.append('../') + import constants_and_names as cn import universal_util as uu -sys.path.append(os.path.join(cn.docker_app,'removals')) -import forest_age_category_IPCC +from . import forest_age_category_IPCC -def mp_forest_age_category_IPCC(sensit_type, tile_id_list, run_date = None, no_upload = None): +def mp_forest_age_category_IPCC(tile_id_list): + """ + :param tile_id_list: list of tile ids to process + :return: set of tiles denoting three broad forest age categories: 1- young (<20), 2- middle, 3- old/primary + """ - os.chdir(cn.docker_base_dir) + os.chdir(cn.docker_tile_dir) # If a full model run is specified, the correct set of tiles for the particular script is listed if tile_id_list == 'all': # List of tiles to run in the model - tile_id_list = uu.tile_list_s3(cn.model_extent_dir, sensit_type) + tile_id_list = uu.tile_list_s3(cn.model_extent_dir, cn.SENSIT_TYPE) uu.print_log(tile_id_list) - uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n") + uu.print_log(f'There are {str(len(tile_id_list))} tiles to process', "\n") # Files to download for this script. download_dict = { cn.model_extent_dir: [cn.pattern_model_extent], - cn.gain_dir: [cn.pattern_gain], + cn.gain_dir: [cn.pattern_gain_data_lake], cn.ifl_primary_processed_dir: [cn.pattern_ifl_primary], cn.cont_eco_dir: [cn.pattern_cont_eco_processed] } # Adds the correct loss tile to the download dictionary depending on the model run - if sensit_type == 'legal_Amazon_loss': + if cn.SENSIT_TYPE == 'legal_Amazon_loss': download_dict[cn.Brazil_annual_loss_processed_dir] = [cn.pattern_Brazil_annual_loss_processed] - elif sensit_type == 'Mekong_loss': + elif cn.SENSIT_TYPE == 'Mekong_loss': download_dict[cn.Mekong_loss_processed_dir] = [cn.pattern_Mekong_loss_processed] else: download_dict[cn.loss_dir] = [cn.pattern_loss] # Adds the correct biomass tile to the download dictionary depending on the model run - if sensit_type == 'biomass_swap': + if cn.SENSIT_TYPE == 'biomass_swap': download_dict[cn.JPL_processed_dir] = [cn.pattern_JPL_unmasked_processed] else: download_dict[cn.WHRC_biomass_2000_unmasked_dir] = [cn.pattern_WHRC_biomass_2000_unmasked] @@ -69,33 +73,32 @@ def mp_forest_age_category_IPCC(sensit_type, tile_id_list, run_date = None, no_u # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list for key, values in download_dict.items(): - dir = key + directory = key pattern = values[0] - uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list) + uu.s3_flexible_download(directory, pattern, cn.docker_tile_dir, cn.SENSIT_TYPE, tile_id_list) # If the model run isn't the standard one, the output directory and file names are changed - if sensit_type != 'std': - uu.print_log("Changing output directory and file name pattern based on sensitivity analysis") - output_dir_list = uu.alter_dirs(sensit_type, output_dir_list) - output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list) + if cn.SENSIT_TYPE != 'std': + uu.print_log('Changing output directory and file name pattern based on sensitivity analysis') + output_dir_list = uu.alter_dirs(cn.SENSIT_TYPE, output_dir_list) + output_pattern_list = uu.alter_patterns(cn.SENSIT_TYPE, output_pattern_list) # A date can optionally be provided by the full model script or a run of this script. # This replaces the date in constants_and_names. # Only done if output upload is enabled. - if run_date is not None and no_upload is not None: - output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date) + if cn.RUN_DATE is not None and cn.NO_UPLOAD is not None: + output_dir_list = uu.replace_output_dir_date(output_dir_list, cn.RUN_DATE) # Table with IPCC Table 4.9 default removals rates # cmd = ['aws', 's3', 'cp', os.path.join(cn.gain_spreadsheet_dir, cn.gain_spreadsheet), cn.docker_base_dir, '--no-sign-request'] - cmd = ['aws', 's3', 'cp', os.path.join(cn.gain_spreadsheet_dir, cn.gain_spreadsheet), cn.docker_base_dir] + cmd = ['aws', 's3', 'cp', os.path.join(cn.gain_spreadsheet_dir, cn.gain_spreadsheet), cn.docker_tile_dir] uu.log_subprocess_output_full(cmd) # Imports the table with the ecozone-continent codes and the carbon removals rates - gain_table = pd.read_excel("{}".format(cn.gain_spreadsheet), - sheet_name = "natrl fores gain, for std model") + gain_table = pd.read_excel(f'{cn.gain_spreadsheet}', sheet_name = "natrl fores gain, for std model") # Removes rows with duplicate codes (N. and S. America for the same ecozone) gain_table_simplified = gain_table.drop_duplicates(subset='gainEcoCon', keep='first') @@ -110,33 +113,31 @@ def mp_forest_age_category_IPCC(sensit_type, tile_id_list, run_date = None, no_u # Creates a single filename pattern to pass to the multiprocessor call pattern = output_pattern_list[0] - # This configuration of the multiprocessing call is necessary for passing multiple arguments to the main function - # It is based on the example here: http://spencerimp.blogspot.com/2015/12/python-multiprocess-with-multiple.html - # With processes=30, peak usage was about 350 GB using WHRC AGB. - # processes=26 maxes out above 480 GB for biomass_swap, so better to use fewer than that. - if cn.count == 96: - if sensit_type == 'biomass_swap': - processes = 32 # 32 processors = 610 GB peak - else: - processes = 42 # 30 processors=460 GB peak; 36 = 550 GB peak; 40 = XXX GB peak - else: - processes = 2 - uu.print_log('Natural forest age category max processors=', processes) - pool = multiprocessing.Pool(processes) - pool.map(partial(forest_age_category_IPCC.forest_age_category, gain_table_dict=gain_table_dict, - pattern=pattern, sensit_type=sensit_type, no_upload=no_upload), tile_id_list) - pool.close() - pool.join() - - # # For single processor use - # for tile_id in tile_id_list: - # - # forest_age_category_IPCC.forest_age_category(tile_id, gain_table_dict, pattern, sensit_type, no_upload) + if cn.SINGLE_PROCESSOR: + for tile_id in tile_id_list: + forest_age_category_IPCC.forest_age_category(tile_id, gain_table_dict, pattern) + else: + # This configuration of the multiprocessing call is necessary for passing multiple arguments to the main function + # It is based on the example here: http://spencerimp.blogspot.com/2015/12/python-multiprocess-with-multiple.html + # With processes=30, peak usage was about 350 GB using WHRC AGB. + # processes=26 maxes out above 480 GB for biomass_swap, so better to use fewer than that. + if cn.count == 96: + if cn.SENSIT_TYPE == 'biomass_swap': + processes = 32 # 32 processors = 610 GB peak + else: + processes = 44 # 30 processors=460 GB peak; 36 = 550 GB peak; 42 = 700 GB peak (slow increase later on); 44=725 GB peak + else: + processes = 2 + uu.print_log(f'Natural forest age category max processors={processes}') + with multiprocessing.Pool(processes) as pool: + pool.map(partial(forest_age_category_IPCC.forest_age_category, gain_table_dict=gain_table_dict, pattern=pattern), + tile_id_list) + pool.close() + pool.join() # If no_upload flag is not activated (by choice or by lack of AWS credentials), output is uploaded - if not no_upload: - + if not cn.NO_UPLOAD: uu.upload_final_set(output_dir_list[0], output_pattern_list[0]) @@ -147,29 +148,34 @@ def mp_forest_age_category_IPCC(sensit_type, tile_id_list, run_date = None, no_u parser = argparse.ArgumentParser( description='Create tiles of the forest age category (<20 years, >20 years secondary, primary)') parser.add_argument('--model-type', '-t', required=True, - help='{}'.format(cn.model_type_arg_help)) + help=f'{cn.model_type_arg_help}') parser.add_argument('--tile_id_list', '-l', required=True, help='List of tile ids to use in the model. Should be of form 00N_110E or 00N_110E,00N_120E or all.') parser.add_argument('--run-date', '-d', required=False, help='Date of run. Must be format YYYYMMDD.') parser.add_argument('--no-upload', '-nu', action='store_true', help='Disables uploading of outputs to s3') + parser.add_argument('--single-processor', '-sp', action='store_true', + help='Uses single processing rather than multiprocessing') args = parser.parse_args() - sensit_type = args.model_type + + # Sets global variables to the command line arguments + cn.SENSIT_TYPE = args.model_type + cn.RUN_DATE = args.run_date + cn.NO_UPLOAD = args.no_upload + cn.SINGLE_PROCESSOR = args.single_processor + tile_id_list = args.tile_id_list - run_date = args.run_date - no_upload = args.no_upload # Disables upload to s3 if no AWS credentials are found in environment if not uu.check_aws_creds(): - no_upload = True + cn.NO_UPLOAD = True # Create the output log - uu.initiate_log(tile_id_list=tile_id_list, sensit_type=sensit_type, run_date=run_date, no_upload=no_upload) + uu.initiate_log(tile_id_list) # Checks whether the sensitivity analysis and tile_id_list arguments are valid - uu.check_sensit_type(sensit_type) + uu.check_sensit_type(cn.SENSIT_TYPE) tile_id_list = uu.tile_id_list_check(tile_id_list) - mp_forest_age_category_IPCC(sensit_type=sensit_type, tile_id_list=tile_id_list, run_date=run_date, no_upload=no_upload) - + mp_forest_age_category_IPCC(tile_id_list) diff --git a/removals/mp_gain_year_count_all_forest_types.py b/removals/mp_gain_year_count_all_forest_types.py index 6638be58..63f7e392 100644 --- a/removals/mp_gain_year_count_all_forest_types.py +++ b/removals/mp_gain_year_count_all_forest_types.py @@ -1,185 +1,200 @@ -''' +""" Creates tiles of the number of years in which carbon removals occur during the model duration (2001 to 2020 currently). It is based on the annual Hansen loss data and the 2000-2012 Hansen gain data. -First it separately calculates rasters of gain years for model pixels that had loss only, -gain only, neither loss nor gain, and both loss and gain. +First it separately calculates rasters of gain years for model pixels that had loss-only, +gain-only, neither loss nor gain, and both loss-and-gain. The gain years for each of these conditions are calculated according to rules that are found in the function called by the multiprocessor commands. The same gain year count rules are applied to all types of forest (mangrove, planted, etc.). Then it combines those four rasters into a single gain year raster for each tile using rasterio because summing the arrays using rasterio is faster and uses less memory than combining them with gdalmerge. If different input rasters for loss (e.g., 2001-2017) and gain (e.g., 2000-2018) are used, the year count constants in constants_and_names.py must be changed. -''' -import multiprocessing +python -m removals.mp_gain_year_count_all_forest_types -t std -l 00N_000E -nu +python -m removals.mp_gain_year_count_all_forest_types -t std -l all +""" + import argparse -import os -import datetime from functools import partial +import multiprocessing +import os import sys -import gain_year_count_all_forest_types -sys.path.append('../') + import constants_and_names as cn import universal_util as uu +from . import gain_year_count_all_forest_types -def mp_gain_year_count_all_forest_types(sensit_type, tile_id_list, run_date = None, no_upload = None): +def mp_gain_year_count_all_forest_types(tile_id_list): + """ + :param tile_id_list: list of tile ids to process + :return: 5 sets of tiles that show the estimated years of carbon accumulation. + The only one used later in the model is the combined one. The other four are for QC. + Units: years. + """ - os.chdir(cn.docker_base_dir) + os.chdir(cn.docker_tile_dir) # If a full model run is specified, the correct set of tiles for the particular script is listed if tile_id_list == 'all': # No point in making gain year count tiles for tiles that don't have annual removals - tile_id_list = uu.tile_list_s3(cn.annual_gain_AGC_all_types_dir, sensit_type) + tile_id_list = uu.tile_list_s3(cn.annual_gain_AGC_all_types_dir, cn.SENSIT_TYPE) uu.print_log(tile_id_list) - uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n") + uu.print_log(f'There are {str(len(tile_id_list))} tiles to process', "\n") # Files to download for this script. 'true'/'false' says whether the input directory and pattern should be # changed for a sensitivity analysis. This does not need to change based on what run is being done; # this assignment should be true for all sensitivity analyses and the standard model. download_dict = { - cn.gain_dir: [cn.pattern_gain], + cn.gain_dir: [cn.pattern_gain_data_lake], cn.model_extent_dir: [cn.pattern_model_extent] } - + # Adds the correct loss tile to the download dictionary depending on the model run - if sensit_type == 'legal_Amazon_loss': + if cn.SENSIT_TYPE == 'legal_Amazon_loss': download_dict[cn.Brazil_annual_loss_processed_dir] = [cn.pattern_Brazil_annual_loss_processed] - elif sensit_type == 'Mekong_loss': + elif cn.SENSIT_TYPE == 'Mekong_loss': download_dict[cn.Mekong_loss_processed_dir] = [cn.pattern_Mekong_loss_processed] else: download_dict[cn.loss_dir] = [cn.pattern_loss] - - + + output_dir_list = [cn.gain_year_count_dir] output_pattern_list = [cn.pattern_gain_year_count] # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list for key, values in download_dict.items(): - dir = key + directory = key pattern = values[0] - uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list) + uu.s3_flexible_download(directory, pattern, cn.docker_tile_dir, cn.SENSIT_TYPE, tile_id_list) # If the model run isn't the standard one, the output directory and file names are changed - if sensit_type != 'std': - uu.print_log("Changing output directory and file name pattern based on sensitivity analysis") - output_dir_list = uu.alter_dirs(sensit_type, output_dir_list) - output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list) + if cn.SENSIT_TYPE != 'std': + uu.print_log('Changing output directory and file name pattern based on sensitivity analysis') + output_dir_list = uu.alter_dirs(cn.SENSIT_TYPE, output_dir_list) + output_pattern_list = uu.alter_patterns(cn.SENSIT_TYPE, output_pattern_list) # A date can optionally be provided by the full model script or a run of this script. # This replaces the date in constants_and_names. # Only done if output upload is enabled. - if run_date is not None and no_upload is not None: - output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date) + if cn.RUN_DATE is not None and cn.NO_UPLOAD is not None: + output_dir_list = uu.replace_output_dir_date(output_dir_list, cn.RUN_DATE) # Creates a single filename pattern to pass to the multiprocessor call pattern = output_pattern_list[0] - # Creates gain year count tiles using only pixels that had only loss - if cn.count == 96: - processes = 90 # 66 = 310 GB peak; 75 = 380 GB peak; 90 = 480 GB peak - else: - processes = int(cn.count/2) - uu.print_log('Gain year count loss only pixels max processors=', processes) - pool = multiprocessing.Pool(processes) - pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_loss_only, - sensit_type=sensit_type, no_upload=no_upload), tile_id_list) - - if cn.count == 96: - processes = 90 # 66 = 330 GB peak; 75 = 380 GB peak; 90 = 530 GB peak - else: - processes = int(cn.count/2) - uu.print_log('Gain year count gain only pixels max processors=', processes) - pool = multiprocessing.Pool(processes) - if sensit_type == 'maxgain': - # Creates gain year count tiles using only pixels that had only gain - pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_gain_only_maxgain, - sensit_type=sensit_type, no_upload=no_upload), tile_id_list) - if sensit_type == 'legal_Amazon_loss': - uu.print_log("Gain-only pixels do not apply to legal_Amazon_loss sensitivity analysis. Skipping this step.") - else: - # Creates gain year count tiles using only pixels that had only gain - pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_gain_only_standard, - sensit_type=sensit_type, no_upload=no_upload), tile_id_list) - # Creates gain year count tiles using only pixels that had neither loss nor gain pixels - if cn.count == 96: - processes = 90 # 66 = 360 GB peak; 88 = 430 GB peak; 90 = 510 GB peak - else: - processes = int(cn.count/2) - uu.print_log('Gain year count no change pixels max processors=', processes) - pool = multiprocessing.Pool(processes) - if sensit_type == 'legal_Amazon_loss': - pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_no_change_legal_Amazon_loss, - sensit_type=sensit_type, no_upload=no_upload), tile_id_list) - else: - pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_no_change_standard, - sensit_type=sensit_type, no_upload=no_upload), tile_id_list) + if cn.SINGLE_PROCESSOR: + + for tile_id in tile_id_list: + gain_year_count_all_forest_types.create_gain_year_count_loss_only(tile_id) + + for tile_id in tile_id_list: + if cn.SENSIT_TYPE == 'maxgain': + gain_year_count_all_forest_types.create_gain_year_count_gain_only_maxgain(tile_id) + else: + gain_year_count_all_forest_types.create_gain_year_count_gain_only_standard(tile_id) + + for tile_id in tile_id_list: + gain_year_count_all_forest_types.create_gain_year_count_no_change_standard(tile_id) + + for tile_id in tile_id_list: + if cn.SENSIT_TYPE == 'maxgain': + gain_year_count_all_forest_types.create_gain_year_count_loss_and_gain_maxgain(tile_id) + else: + gain_year_count_all_forest_types.create_gain_year_count_loss_and_gain_standard(tile_id) + + for tile_id in tile_id_list: + gain_year_count_all_forest_types.create_gain_year_count_merge(tile_id, pattern) - if cn.count == 96: - processes = 90 # 66 = 370 GB peak; 88 = 430 GB peak; 90 = 550 GB peak else: - processes = int(cn.count/2) - uu.print_log('Gain year count loss & gain pixels max processors=', processes) - pool = multiprocessing.Pool(processes) - if sensit_type == 'maxgain': + + # Creates gain year count tiles using only pixels that had only loss + if cn.count == 96: + processes = 70 # 90>=740 GB peak; 70=610 GB peak + else: + processes = int(cn.count/2) + uu.print_log(f'Gain year count loss-only pixels max processors={processes}') + with multiprocessing.Pool(processes) as pool: + pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_loss_only), + tile_id_list) + pool.close() + pool.join() + # Creates gain year count tiles using only pixels that had only gain - pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_loss_and_gain_maxgain, - sensit_type=sensit_type, no_upload=no_upload), tile_id_list) - else: + if cn.count == 96: + processes = 90 # 66 = 330 GB peak; 75 = 380 GB peak; 90 = 530 GB peak + else: + processes = int(cn.count/2) + uu.print_log(f'Gain year count gain-only pixels max processors={processes}') + with multiprocessing.Pool(processes) as pool: + if cn.SENSIT_TYPE == 'maxgain': + pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_gain_only_maxgain), + tile_id_list) + elif cn.SENSIT_TYPE == 'legal_Amazon_loss': + uu.print_log('Gain-only pixels do not apply to legal_Amazon_loss sensitivity analysis. Skipping this step.') + else: + pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_gain_only_standard), + tile_id_list) + pool.close() + pool.join() + + # Creates gain year count tiles using only pixels that had neither loss nor gain pixels + if cn.count == 96: + processes = 90 # 66 = 360 GB peak; 88 = 430 GB peak; 90 = 510 GB peak + else: + processes = int(cn.count/2) + uu.print_log(f'Gain year count no-change pixels max processors={processes}') + with multiprocessing.Pool(processes) as pool: + if cn.SENSIT_TYPE == 'legal_Amazon_loss': + pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_no_change_legal_Amazon_loss), + tile_id_list) + else: + pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_no_change_standard), + tile_id_list) + pool.close() + pool.join() + # Creates gain year count tiles using only pixels that had only gain - pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_loss_and_gain_standard, - sensit_type=sensit_type, no_upload=no_upload), tile_id_list) - - # Combines the four above gain year count tiles for each Hansen tile into a single output tile - if cn.count == 96: - processes = 84 # 28 processors = 220 GB peak; 62 = 470 GB peak; 78 = 600 GB peak; 80 = 620 GB peak; 84 = XXX GB peak - elif cn.count < 4: - processes = 1 - else: - processes = int(cn.count/4) - uu.print_log('Gain year count gain merge all combos max processors=', processes) - pool = multiprocessing.Pool(processes) - pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_merge, - pattern=pattern, sensit_type=sensit_type, no_upload=no_upload), tile_id_list) - pool.close() - pool.join() - - - # # For single processor use - # for tile_id in tile_id_list: - # gain_year_count_all_forest_types.create_gain_year_count_loss_only(tile_id, no_upload) - # - # for tile_id in tile_id_list: - # if sensit_type == 'maxgain': - # gain_year_count_all_forest_types.create_gain_year_count_gain_only_maxgain(tile_id, no_upload) - # else: - # gain_year_count_all_forest_types.create_gain_year_count_gain_only_standard(tile_id, no_upload) - # - # for tile_id in tile_id_list: - # gain_year_count_all_forest_types.create_gain_year_count_no_change_standard(tile_id, no_upload) - # - # for tile_id in tile_id_list: - # if sensit_type == 'maxgain': - # gain_year_count_all_forest_types.create_gain_year_count_loss_and_gain_maxgain(tile_id, no_upload) - # else: - # gain_year_count_all_forest_types.create_gain_year_count_loss_and_gain_standard(tile_id, no_upload) - # - # for tile_id in tile_id_list: - # gain_year_count_all_forest_types.create_gain_year_count_merge(tile_id, pattern, sensit_type, no_upload) - - - # If no_upload flag is not activated (by choice or by lack of AWS credentials), output is uploaded - if not no_upload: - - print("in upload area") + if cn.count == 96: + processes = 90 # 66 = 370 GB peak; 88 = 430 GB peak; 90 = 550 GB peak + else: + processes = int(cn.count/2) + uu.print_log(f'Gain year count loss & gain pixels max processors={processes}') + with multiprocessing.Pool(processes) as pool: + if cn.SENSIT_TYPE == 'maxgain': + pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_loss_and_gain_maxgain), + tile_id_list) + else: + pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_loss_and_gain_standard), + tile_id_list) + pool.close() + pool.join() + + # Combines the four above gain year count tiles for each Hansen tile into a single output tile + if cn.count == 96: + processes = 84 # 28 processors = 220 GB peak; 62 = 470 GB peak; 78 = 600 GB peak; 80 = 620 GB peak; 84 = 630 GB peak + elif cn.count < 4: + processes = 1 + else: + processes = int(cn.count/4) + uu.print_log(f'Gain year count gain merge all combos max processors={processes}') + with multiprocessing.Pool(processes) as pool: + pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_merge, pattern=pattern), + tile_id_list) + pool.close() + pool.join() + + + # If cn.NO_UPLOAD flag is not activated (by choice or by lack of AWS credentials), output is uploaded + if not cn.NO_UPLOAD: # Intermediate output tiles for checking outputs - uu.upload_final_set(output_dir_list[0], "growth_years_loss_only") - uu.upload_final_set(output_dir_list[0], "growth_years_gain_only") - uu.upload_final_set(output_dir_list[0], "growth_years_no_change") - uu.upload_final_set(output_dir_list[0], "growth_years_loss_and_gain") + uu.upload_final_set(output_dir_list[0], "gain_year_count_loss_only") + uu.upload_final_set(output_dir_list[0], "gain_year_count_gain_only") + uu.upload_final_set(output_dir_list[0], "gain_year_count_no_change") + uu.upload_final_set(output_dir_list[0], "gain_year_count_loss_and_gain") # This is the final output used later in the model uu.upload_final_set(output_dir_list[0], output_pattern_list[0]) @@ -192,28 +207,34 @@ def mp_gain_year_count_all_forest_types(sensit_type, tile_id_list, run_date = No parser = argparse.ArgumentParser( description='Create tiles of number of years in which removals occurred during the model period') parser.add_argument('--model-type', '-t', required=True, - help='{}'.format(cn.model_type_arg_help)) + help=f'{cn.model_type_arg_help}') parser.add_argument('--tile_id_list', '-l', required=True, help='List of tile ids to use in the model. Should be of form 00N_110E or 00N_110E,00N_120E or all.') parser.add_argument('--run-date', '-d', required=False, help='Date of run. Must be format YYYYMMDD.') parser.add_argument('--no-upload', '-nu', action='store_true', help='Disables uploading of outputs to s3') + parser.add_argument('--single-processor', '-sp', action='store_true', + help='Uses single processing rather than multiprocessing') args = parser.parse_args() - sensit_type = args.model_type + + # Sets global variables to the command line arguments + cn.SENSIT_TYPE = args.model_type + cn.RUN_DATE = args.run_date + cn.NO_UPLOAD = args.no_upload + cn.SINGLE_PROCESSOR = args.single_processor + tile_id_list = args.tile_id_list - run_date = args.run_date - no_upload = args.no_upload # Disables upload to s3 if no AWS credentials are found in environment if not uu.check_aws_creds(): - no_upload = True + cn.NO_UPLOAD = True # Create the output log - uu.initiate_log(tile_id_list=tile_id_list, sensit_type=sensit_type, run_date=run_date, no_upload=no_upload) + uu.initiate_log(tile_id_list) # Checks whether the sensitivity analysis and tile_id_list arguments are valid - uu.check_sensit_type(sensit_type) + uu.check_sensit_type(cn.SENSIT_TYPE) tile_id_list = uu.tile_id_list_check(tile_id_list) - mp_gain_year_count_all_forest_types(sensit_type=sensit_type, tile_id_list=tile_id_list, run_date=run_date, no_upload=no_upload) \ No newline at end of file + mp_gain_year_count_all_forest_types(tile_id_list) diff --git a/removals/mp_gross_removals_all_forest_types.py b/removals/mp_gross_removals_all_forest_types.py index ceb89545..c281e70e 100644 --- a/removals/mp_gross_removals_all_forest_types.py +++ b/removals/mp_gross_removals_all_forest_types.py @@ -1,39 +1,45 @@ -''' +""" This script calculates the cumulative above and belowground carbon dioxide removals (removals) for all forest types for the duration of the model. It multiplies the annual aboveground and belowground carbon removal factors by the number of years of removals and the C to CO2 conversion. It then sums the aboveground and belowground gross removals to get gross removals for all forest types in both emitted_pools. That is the final gross removals for the entire model. Note that gross removals from this script are reported as positive values. -''' -import multiprocessing +python -m removals.gross_removals_all_forest_types -t std -l 00N_000E -nu +python -m removals.gross_removals_all_forest_types -t std -l all +""" + import argparse -import os -import datetime from functools import partial +import multiprocessing +import os import sys -sys.path.append('../') + import constants_and_names as cn import universal_util as uu -sys.path.append(os.path.join(cn.docker_app,'removals')) -import gross_removals_all_forest_types +from . import gross_removals_all_forest_types -def mp_gross_removals_all_forest_types(sensit_type, tile_id_list, run_date = None, no_upload = True): +def mp_gross_removals_all_forest_types(tile_id_list): + """ + :param tile_id_list: list of tile ids to process + :return: 3 set of tiles: gross aboveground removals, belowground removals, aboveground+belowground removals + Units: Mg CO2/ha over entire model period. + """ - os.chdir(cn.docker_base_dir) + os.chdir(cn.docker_tile_dir) # If a full model run is specified, the correct set of tiles for the particular script is listed if tile_id_list == 'all': # List of tiles to run in the model - # tile_id_list = uu.tile_list_s3(cn.model_extent_dir, sensit_type) - gain_year_count_tile_id_list = uu.tile_list_s3(cn.gain_year_count_dir, sensit_type=sensit_type) - annual_removals_tile_id_list = uu.tile_list_s3(cn.annual_gain_AGC_all_types_dir, sensit_type=sensit_type) + # tile_id_list = uu.tile_list_s3(cn.model_extent_dir, cn.SENSIT_TYPE) + gain_year_count_tile_id_list = uu.tile_list_s3(cn.gain_year_count_dir, cn.SENSIT_TYPE) + annual_removals_tile_id_list = uu.tile_list_s3(cn.annual_gain_AGC_all_types_dir, cn.SENSIT_TYPE) tile_id_list = list(set(gain_year_count_tile_id_list).intersection(annual_removals_tile_id_list)) - uu.print_log("Gross removals tile_id_list is combination of gain_year_count and annual_removals tiles:") + uu.print_log('Gross removals tile_id_list is combination of gain_year_count and annual_removals tiles:') uu.print_log(tile_id_list) - uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n") + uu.print_log(f'There are {str(len(tile_id_list))} tiles to process', "\n") # Files to download for this script. @@ -51,67 +57,66 @@ def mp_gross_removals_all_forest_types(sensit_type, tile_id_list, run_date = Non # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list for key, values in download_dict.items(): - dir = key + directory = key pattern = values[0] - uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list) + uu.s3_flexible_download(directory, pattern, cn.docker_tile_dir, cn.SENSIT_TYPE, tile_id_list) # If the model run isn't the standard one, the output directory and file names are changed - if sensit_type != 'std': - uu.print_log("Changing output directory and file name pattern based on sensitivity analysis") - output_dir_list = uu.alter_dirs(sensit_type, output_dir_list) - output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list) + if cn.SENSIT_TYPE != 'std': + uu.print_log('Changing output directory and file name pattern based on sensitivity analysis') + output_dir_list = uu.alter_dirs(cn.SENSIT_TYPE, output_dir_list) + output_pattern_list = uu.alter_patterns(cn.SENSIT_TYPE, output_pattern_list) # A date can optionally be provided by the full model script or a run of this script. # This replaces the date in constants_and_names. # Only done if output upload is enabled. - if run_date is not None and no_upload is not None: - output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date) + if cn.RUN_DATE is not None and cn.NO_UPLOAD is not None: + output_dir_list = uu.replace_output_dir_date(output_dir_list, cn.RUN_DATE) - # Calculates gross removals - if cn.count == 96: - if sensit_type == 'biomass_swap': - processes = 18 - else: - processes = 22 # 50 processors > 740 GB peak; 25 = >740 GB peak; 15 = 490 GB peak; 20 = 590 GB peak; 22 = 710 GB peak + if cn.SINGLE_PROCESSOR: + for tile_id in tile_id_list: + gross_removals_all_forest_types.gross_removals_all_forest_types(tile_id, output_pattern_list) + else: - processes = 2 - uu.print_log('Gross removals max processors=', processes) - pool = multiprocessing.Pool(processes) - pool.map(partial(gross_removals_all_forest_types.gross_removals_all_forest_types, output_pattern_list=output_pattern_list, - sensit_type=sensit_type, no_upload=no_upload), tile_id_list) - pool.close() - pool.join() - - # # For single processor use - # for tile_id in tile_id_list: - # gross_removals_all_forest_types.gross_removals_all_forest_types(tile_id, output_pattern_list, sensit_type, no_upload) + if cn.count == 96: + if cn.SENSIT_TYPE == 'biomass_swap': + processes = 18 + else: + processes = 22 # 50 processors > 740 GB peak; 25 = >740 GB peak; 15 = 490 GB peak; 20 = 590 GB peak; 22 = 710 GB peak + else: + processes = 2 + uu.print_log(f'Gross removals max processors={processes}') + with multiprocessing.Pool(processes) as pool: + pool.map(partial(gross_removals_all_forest_types.gross_removals_all_forest_types, + output_pattern_list=output_pattern_list), + tile_id_list) + pool.close() + pool.join() + # Checks the gross removals outputs for tiles with no data for output_pattern in output_pattern_list: if cn.count <= 2: # For local tests processes = 1 - uu.print_log("Checking for empty tiles of {0} pattern with {1} processors using light function...".format( - output_pattern, processes)) - pool = multiprocessing.Pool(processes) - pool.map(partial(uu.check_and_delete_if_empty_light, output_pattern=output_pattern), tile_id_list) - pool.close() - pool.join() + uu.print_log(f'Checking for empty tiles of {output_pattern} pattern with {processes} processors using light function...') + with multiprocessing.Pool(processes) as pool: + pool.map(partial(uu.check_and_delete_if_empty_light, output_pattern=output_pattern), tile_id_list) + pool.close() + pool.join() else: processes = 55 # 55 processors = 670 GB peak - uu.print_log( - "Checking for empty tiles of {0} pattern with {1} processors...".format(output_pattern, processes)) - pool = multiprocessing.Pool(processes) - pool.map(partial(uu.check_and_delete_if_empty, output_pattern=output_pattern), tile_id_list) - pool.close() - pool.join() - - # If no_upload flag is not activated (by choice or by lack of AWS credentials), output is uploaded - if not no_upload: + uu.print_log(f'Checking for empty tiles of {output_pattern} pattern with {processes} processors...') + with multiprocessing.Pool(processes) as pool: + pool.map(partial(uu.check_and_delete_if_empty, output_pattern=output_pattern), tile_id_list) + pool.close() + pool.join() - for i in range(0, len(output_dir_list)): - uu.upload_final_set(output_dir_list[i], output_pattern_list[i]) + # If cn.NO_UPLOAD flag is not activated (by choice or by lack of AWS credentials), output is uploaded + if not cn.NO_UPLOAD: + for output_dir, output_pattern in zip(output_dir_list, output_pattern_list): + uu.upload_final_set(output_dir, output_pattern) if __name__ == '__main__': @@ -121,28 +126,34 @@ def mp_gross_removals_all_forest_types(sensit_type, tile_id_list, run_date = Non parser = argparse.ArgumentParser( description='Create tiles of gross removals over the model period') parser.add_argument('--model-type', '-t', required=True, - help='{}'.format(cn.model_type_arg_help)) + help=f'{cn.model_type_arg_help}') parser.add_argument('--tile_id_list', '-l', required=True, help='List of tile ids to use in the model. Should be of form 00N_110E or 00N_110E,00N_120E or all.') parser.add_argument('--run-date', '-d', required=False, help='Date of run. Must be format YYYYMMDD.') parser.add_argument('--no-upload', '-nu', action='store_true', help='Disables uploading of outputs to s3') + parser.add_argument('--single-processor', '-sp', action='store_true', + help='Uses single processing rather than multiprocessing') args = parser.parse_args() - sensit_type = args.model_type + + # Sets global variables to the command line arguments + cn.SENSIT_TYPE = args.model_type + cn.RUN_DATE = args.run_date + cn.NO_UPLOAD = args.no_upload + cn.SINGLE_PROCESSOR = args.single_processor + tile_id_list = args.tile_id_list - run_date = args.run_date - no_upload = args.no_upload # Disables upload to s3 if no AWS credentials are found in environment if not uu.check_aws_creds(): - no_upload = True + cn.NO_UPLOAD = True # Create the output log - uu.initiate_log(tile_id_list=tile_id_list, sensit_type=sensit_type, run_date=run_date, no_upload=no_upload) + uu.initiate_log(tile_id_list) # Checks whether the sensitivity analysis and tile_id_list arguments are valid - uu.check_sensit_type(sensit_type) + uu.check_sensit_type(cn.SENSIT_TYPE) tile_id_list = uu.tile_id_list_check(tile_id_list) - mp_gross_removals_all_forest_types(sensit_type=sensit_type, tile_id_list=tile_id_list, run_date=run_date, no_upload=no_upload) \ No newline at end of file + mp_gross_removals_all_forest_types(tile_id_list) diff --git a/requirements.txt b/requirements.txt index d1baa6e6..46b907a1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,14 +1,19 @@ -cftime~=1.4.1 -awscli~=1.16.50 -boto3~=1.9.40 -botocore~=1.12.40 -netCDF4~=1.4.2 -numpy~=1.15.4 -pandas~=0.23.4 -psycopg2~=2.7.4 -rasterio~=1.1.5 -scipy~=1.1.0 -simpledbf~=0.2.6 -virtualenv~=16.0.0 -xlrd~=1.1.0 -psutil +awscli==1.25.58 +boto3==1.24.57 +botocore==1.27.57 +cftime==1.6.1 +memory_profiler==0.61.0 +netCDF4==1.6.0 +numpy>=1.18.5 +openpyxl==3.0.10 +pandas==1.4.3 +psutil==5.9.1 +psycopg2==2.9.3 +pylint==2.14.5 +pytest==7.1.2 +rasterio==1.3.2 +rioxarray==0.13.3 +scipy==1.9.0 +simpledbf==0.2.6 +virtualenv==20.16.3 + diff --git a/run_full_model.py b/run_full_model.py index f10b4099..852142b1 100644 --- a/run_full_model.py +++ b/run_full_model.py @@ -1,28 +1,54 @@ -''' -Clone repositoroy: +""" +Clone repository: git clone https://github.com/wri/carbon-budget Create spot machine using spotutil: spotutil new r5d.24xlarge dgibbs_wri -Compile C++ emissions modulte (for standard model and sensitivity analyses that using standard emissions model) -c++ /usr/local/app/emissions/cpp_util/calc_gross_emissions_generic.cpp -o /usr/local/app/emissions/cpp_util/calc_gross_emissions_generic.exe -lgdal - -Run 00N_000E in standard model; save intermediate outputs; do upload outputs to s3; run all model stages; -starting from the beginning; get carbon pools at time of loss; emissions from biomass and soil -python run_full_model.py -si -t std -s all -r -d 20229999 -l 00N_000E -ce loss -p biomass_soil -tcd 30 -ln "00N_000E test" - -FULL STANDARD MODEL RUN: Run all tiles in standard model; save intermediate outputs; do upload outputs to s3; -run all model stages; starting from the beginning; get carbon pools at time of loss; emissions from biomass and soil -python run_full_model.py -si -t std -s all -r -l all -ce loss -p biomass_soil -tcd 30 -ln "Running all tiles" - -''' +Build Docker container: +docker build . -t gfw/carbon-budget + +Enter Docker container: +docker run --rm -it -e AWS_SECRET_ACCESS_KEY=[] -e AWS_ACCESS_KEY_ID=[] gfw/carbon-budget + +Run: standard model; save intermediate outputs; run model from annual_removals_IPCC; +upload to folder with date 20239999; run 00N_000E; get carbon pools at time of loss; add a log note; +do not upload outputs to s3; use multiprocessing (implicit because no -sp flag); +only run listed stage (implicit because no -r flag) +python -m run_full_model -t std -si -s annual_removals_IPCC -nu -l 00N_000E -ce loss -ln "00N_000E test" + +Run: standard model; save intermediate outputs; run model from annual_removals_IPCC; run all subsequent model stages; +do not upload outputs to s3; run 00N_000E; get carbon pools at time of loss; add a log note; +upload outputs to s3 (implicit because no -nu flag); use multiprocessing (implicit because no -sp flag) +python -m run_full_model -t std -si -s annual_removals_IPCC -r -nu -l 00N_000E -ce loss -ln "00N_000E test" + +Run: standard model; save intermediate outputs; run model from the beginning; run all model stages; +upload to folder with date 20239999; run 00N_000E; get carbon pools at time of loss; add a log note; +upload outputs to s3 (implicit because no -nu flag); use multiprocessing (implicit because no -sp flag) +python -m run_full_model -t std -si -s all -r -d 20239999 -l 00N_000E -ce loss -ln "00N_000E test" + +Run: standard model; save intermediate outputs; run model from the beginning; run all model stages; +upload to folder with date 20239999; run 00N_000E; get carbon pools at time of loss; add a log note; +do not upload outputs to s3; use multiprocessing (implicit because no -sp flag) +python -m run_full_model -t std -si -s all -r -d 20239999 -l 00N_000E -ce loss -ln "00N_000E test" -nu + +Run: standard model; run model from the beginning; run all model stages; +upload to folder with date 20239999; run 00N_000E; get carbon pools at time of loss; add a log note; +do not upload outputs to s3; use singleprocessing; +do not save intermediate outputs (implicit because no -si flag) +python -m run_full_model -t std -s all -r -nu -d 20239999 -l 00N_000E,00N_010E -ce loss -sp -ln "Two tile test" + +FULL STANDARD MODEL RUN: standard model; save intermediate outputs; run model from the beginning; run all model stages; +run all tiles; get carbon pools at time of loss; add a log note; +upload outputs to s3 (implicit because no -nu flag); use multiprocessing (implicit because no -sp flag) +python -m run_full_model -t std -si -s all -r -l all -ce loss -ln "Running all tiles" +""" import argparse -import os -import glob import datetime -import logging +import glob +import os + import constants_and_names as cn import universal_util as uu from data_prep.mp_model_extent import mp_model_extent @@ -36,25 +62,28 @@ from carbon_pools.mp_create_carbon_pools import mp_create_carbon_pools from emissions.mp_calculate_gross_emissions import mp_calculate_gross_emissions from analyses.mp_net_flux import mp_net_flux -from analyses.mp_aggregate_results_to_4_km import mp_aggregate_results_to_4_km -from analyses.mp_create_supplementary_outputs import mp_create_supplementary_outputs +from analyses.mp_derivative_outputs import mp_derivative_outputs def main (): + """ + Runs the entire forest GHG flux model or a subset of stages + :return: Sets of output tiles for the selected stages + """ - os.chdir(cn.docker_base_dir) + os.chdir(cn.docker_tile_dir) # List of possible model stages to run (not including mangrove and planted forest stages) model_stages = ['all', 'model_extent', 'forest_age_category_IPCC', 'annual_removals_IPCC', 'annual_removals_all_forest_types', 'gain_year_count', 'gross_removals_all_forest_types', - 'carbon_pools', 'gross_emissions', - 'net_flux', 'aggregate', 'create_supplementary_outputs'] + 'carbon_pools', 'gross_emissions_biomass_soil', 'gross_emissions_soil_only', + 'net_flux', 'create_derivative_outputs'] # The argument for what kind of model run is being done: standard conditions or a sensitivity analysis run parser = argparse.ArgumentParser(description='Run the full carbon flux model') - parser.add_argument('--model-type', '-t', required=True, help='{}'.format(cn.model_type_arg_help)) + parser.add_argument('--model-type', '-t', required=True, help=f'{cn.model_type_arg_help}') parser.add_argument('--stages', '-s', required=True, - help='Stages for running the flux model. Options are {}'.format(model_stages)) + help=f'Stages for running the flux model. Options are {model_stages}') parser.add_argument('--run-through', '-r', action='store_true', help='If activated, run named stage and all following stages. If not activated, run the selected stage only.') parser.add_argument('--run-date', '-d', required=False, @@ -62,11 +91,7 @@ def main (): parser.add_argument('--tile-id-list', '-l', required=True, help='List of tile ids to use in the model. Should be of form 00N_110E or 00N_110E,00N_120E or all.') parser.add_argument('--carbon-pool-extent', '-ce', required=False, - help='Time period for which carbon emitted_pools should be calculated: loss, 2000, loss,2000, or 2000,loss') - parser.add_argument('--emitted-pools-to-use', '-p', required=False, - help='Options are soil_only or biomass_soil. Former only considers emissions from soil. Latter considers emissions from biomass and soil.') - parser.add_argument('--tcd-threshold', '-tcd', required=False, default=cn.canopy_threshold, - help='Tree cover density threshold above which pixels will be included in the aggregation. Default is 30.') + help='Time period for which carbon pools should be calculated: loss, 2000, loss,2000, or 2000,loss') parser.add_argument('--std-net-flux-aggreg', '-sagg', required=False, help='The s3 standard model net flux aggregated tif, for comparison with the sensitivity analysis map') parser.add_argument('--mangroves', '-ma', action='store_true', @@ -75,115 +100,77 @@ def main (): help='Include US removal rate and standard deviation tile creation step (before model extent).') parser.add_argument('--no-upload', '-nu', action='store_true', help='Disables uploading of outputs to s3') + parser.add_argument('--single-processor', '-sp', action='store_true', + help='Uses single processing rather than multiprocessing') parser.add_argument('--save-intermediates', '-si', action='store_true', help='Saves intermediate model outputs rather than deleting them to save storage') parser.add_argument('--log-note', '-ln', required=False, help='Note to include in log header about model run.') args = parser.parse_args() - sensit_type = args.model_type - stage_input = args.stages - run_through = args.run_through - run_date = args.run_date - tile_id_list = args.tile_id_list - carbon_pool_extent = args.carbon_pool_extent - emitted_pools = args.emitted_pools_to_use - thresh = args.tcd_threshold - if thresh is not None: - thresh = int(thresh) - std_net_flux = args.std_net_flux_aggreg - include_mangroves = args.mangroves - include_us = args.us_rates - no_upload = args.no_upload - save_intermediates = args.save_intermediates - log_note = args.log_note - + # Sets global variables to the command line arguments + cn.SENSIT_TYPE = args.model_type + cn.STAGE_INPUT = args.stages + cn.RUN_THROUGH = args.run_through + cn.RUN_DATE = args.run_date + cn.CARBON_POOL_EXTENT = args.carbon_pool_extent + cn.STD_NET_FLUX = args.std_net_flux_aggreg + cn.INCLUDE_MANGROVES = args.mangroves + cn.INCLUDE_US = args.us_rates + cn.NO_UPLOAD = args.no_upload + cn.SINGLE_PROCESSOR = args.single_processor + cn.SAVE_INTERMEDIATES = args.save_intermediates + cn.LOG_NOTE = args.log_note - # Start time for script - script_start = datetime.datetime.now() + tile_id_list = args.tile_id_list # Disables upload to s3 if no AWS credentials are found in environment if not uu.check_aws_creds(): uu.print_log("s3 credentials not found. Uploading to s3 disabled but downloading enabled.") - no_upload = True - + cn.NO_UPLOAD = True # Forces intermediate files to not be deleted if files can't be uploaded to s3. # Rationale is that if uploads to s3 are not occurring, intermediate files can't be downloaded during the model # run and therefore must exist locally. - if no_upload == True: - save_intermediates = True - + if cn.NO_UPLOAD: + cn.SAVE_INTERMEDIATES = True # Create the output log - uu.initiate_log(tile_id_list=tile_id_list, sensit_type=sensit_type, run_date=run_date, no_upload=no_upload, - save_intermediates=save_intermediates, - stage_input=stage_input, run_through=run_through, carbon_pool_extent=carbon_pool_extent, - emitted_pools=emitted_pools, thresh=thresh, std_net_flux=std_net_flux, - include_mangroves=include_mangroves, include_us=include_us, log_note=log_note) + uu.initiate_log(tile_id_list) + + # Checks whether the sensitivity analysis and tile_id_list arguments are valid + uu.check_sensit_type(cn.SENSIT_TYPE) + # Start time for script + script_start = datetime.datetime.now() # Checks the validity of the model stage arguments. If either one is invalid, the script ends. - if (stage_input not in model_stages): - uu.exception_log(no_upload, 'Invalid stage selection. Please provide a stage from', model_stages) + if cn.STAGE_INPUT not in model_stages: + uu.exception_log(f'Invalid stage selection. Please provide a stage from {model_stages}') else: pass # Generates the list of stages to run - actual_stages = uu.analysis_stages(model_stages, stage_input, run_through, sensit_type, - include_mangroves = include_mangroves, include_us=include_us) - uu.print_log("Analysis stages to run:", actual_stages) + actual_stages = uu.analysis_stages(model_stages, cn.STAGE_INPUT, cn.RUN_THROUGH, cn.SENSIT_TYPE, + include_mangroves = cn.INCLUDE_MANGROVES, include_us=cn.INCLUDE_US) + uu.print_log(f'Analysis stages to run: {actual_stages}') # Reports how much storage is being used with files uu.check_storage() # Checks whether the sensitivity analysis argument is valid - uu.check_sensit_type(sensit_type) + uu.check_sensit_type(cn.SENSIT_TYPE) # Checks if the carbon pool type is specified if the stages to run includes carbon pool generation. # Does this up front so the user knows before the run begins that information is missing. - if ('carbon_pools' in actual_stages) & (carbon_pool_extent not in ['loss', '2000', 'loss,2000', '2000,loss']): - uu.exception_log(no_upload, "Invalid carbon_pool_extent input. Please choose loss, 2000, loss,2000 or 2000,loss.") - - # Checks if the correct c++ script has been compiled for the pool option selected. - # Does this up front so that the user is prompted to compile the C++ before the script starts running, if necessary. - if 'gross_emissions' in actual_stages: - - if emitted_pools == 'biomass_soil': - # Some sensitivity analyses have specific gross emissions scripts. - # The rest of the sensitivity analyses and the standard model can all use the same, generic gross emissions script. - if sensit_type in ['no_shifting_ag', 'convert_to_grassland']: - if os.path.exists('{0}/calc_gross_emissions_{1}.exe'.format(cn.c_emis_compile_dst, sensit_type)): - uu.print_log("C++ for {} already compiled.".format(sensit_type)) - else: - uu.exception_log(no_upload, 'Must compile standard {} model C++...'.format(sensit_type)) - else: - if os.path.exists('{0}/calc_gross_emissions_generic.exe'.format(cn.c_emis_compile_dst)): - uu.print_log("C++ for generic emissions already compiled.") - else: - uu.exception_log(no_upload, 'Must compile generic emissions C++...') - - elif (emitted_pools == 'soil_only') & (sensit_type == 'std'): - if os.path.exists('{0}/calc_gross_emissions_soil_only.exe'.format(cn.c_emis_compile_dst)): - uu.print_log("C++ for generic emissions already compiled.") - else: - uu.exception_log(no_upload, 'Must compile soil_only C++...') - - else: - uu.exception_log(no_upload, 'Pool and/or sensitivity analysis option not valid for gross emissions') - - # Checks whether the canopy cover argument is valid up front. - if 'aggregate' in actual_stages: - if thresh < 0 or thresh > 99: - uu.exception_log(no_upload, 'Invalid tcd. Please provide an integer between 0 and 99.') - else: - pass + if ('carbon_pools' in actual_stages) & (cn.CARBON_POOL_EXTENT not in ['loss', '2000', 'loss,2000', '2000,loss']): + uu.exception_log('Invalid carbon_pool_extent input. Please choose loss, 2000, loss,2000 or 2000,loss.') # If the tile_list argument is an s3 folder, the list of tiles in it is created if 's3://' in tile_id_list: tile_id_list = uu.tile_list_s3(tile_id_list, 'std') uu.print_log(tile_id_list) - uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))), "\n") + uu.print_log(f'There are {str(len(tile_id_list))} tiles to process', "\n") # Otherwise, check that the tile list argument is valid. "all" is the way to specify that all tiles should be processed else: tile_id_list = uu.tile_id_list_check(tile_id_list) @@ -214,43 +201,42 @@ def main (): output_dir_list = [cn.annual_gain_AGC_BGC_natrl_forest_US_dir, cn.stdev_annual_gain_AGC_BGC_natrl_forest_US_dir] + output_dir_list - # Adds the carbon directories depending on which carbon emitted_pools are being generated: 2000 and/or emissions year + # Adds the carbon directories depending on which carbon years are being generated: 2000 and/or emissions year if 'carbon_pools' in actual_stages: - if 'loss' in carbon_pool_extent: + if 'loss' in cn.CARBON_POOL_EXTENT: output_dir_list = output_dir_list + [cn.AGC_emis_year_dir, cn.BGC_emis_year_dir, cn.deadwood_emis_year_2000_dir, cn.litter_emis_year_2000_dir, cn.soil_C_emis_year_2000_dir, cn.total_C_emis_year_dir] - if '2000' in carbon_pool_extent: + if '2000' in cn.CARBON_POOL_EXTENT: output_dir_list = output_dir_list + [cn.AGC_2000_dir, cn.BGC_2000_dir, cn.deadwood_2000_dir, cn.litter_2000_dir, cn.soil_C_full_extent_2000_dir, cn.total_C_2000_dir] - # Adds the biomass_soil output directories or the soil_only output directories depending on the model run - if emitted_pools == 'biomass_soil': - output_dir_list = output_dir_list + [cn.gross_emis_commod_biomass_soil_dir, - cn.gross_emis_shifting_ag_biomass_soil_dir, - cn.gross_emis_forestry_biomass_soil_dir, - cn.gross_emis_wildfire_biomass_soil_dir, - cn.gross_emis_urban_biomass_soil_dir, - cn.gross_emis_no_driver_biomass_soil_dir, - cn.gross_emis_all_gases_all_drivers_biomass_soil_dir, - cn.gross_emis_co2_only_all_drivers_biomass_soil_dir, - cn.gross_emis_non_co2_all_drivers_biomass_soil_dir, - cn.gross_emis_nodes_biomass_soil_dir] - - else: - output_dir_list = output_dir_list + [cn.gross_emis_commod_soil_only_dir, - cn.gross_emis_shifting_ag_soil_only_dir, - cn.gross_emis_forestry_soil_only_dir, - cn.gross_emis_wildfire_soil_only_dir, - cn.gross_emis_urban_soil_only_dir, - cn.gross_emis_no_driver_soil_only_dir, - cn.gross_emis_all_gases_all_drivers_soil_only_dir, - cn.gross_emis_co2_only_all_drivers_soil_only_dir, - cn.gross_emis_non_co2_all_drivers_soil_only_dir, - cn.gross_emis_nodes_soil_only_dir] - + # Adds the biomass_soil output directories and the soil_only output directories + output_dir_list = output_dir_list + [cn.gross_emis_commod_biomass_soil_dir, + cn.gross_emis_shifting_ag_biomass_soil_dir, + cn.gross_emis_forestry_biomass_soil_dir, + cn.gross_emis_wildfire_biomass_soil_dir, + cn.gross_emis_urban_biomass_soil_dir, + cn.gross_emis_no_driver_biomass_soil_dir, + cn.gross_emis_all_gases_all_drivers_biomass_soil_dir, + cn.gross_emis_co2_only_all_drivers_biomass_soil_dir, + cn.gross_emis_non_co2_all_drivers_biomass_soil_dir, + cn.gross_emis_nodes_biomass_soil_dir] + + output_dir_list = output_dir_list + [cn.gross_emis_commod_soil_only_dir, + cn.gross_emis_shifting_ag_soil_only_dir, + cn.gross_emis_forestry_soil_only_dir, + cn.gross_emis_wildfire_soil_only_dir, + cn.gross_emis_urban_soil_only_dir, + cn.gross_emis_no_driver_soil_only_dir, + cn.gross_emis_all_gases_all_drivers_soil_only_dir, + cn.gross_emis_co2_only_all_drivers_soil_only_dir, + cn.gross_emis_non_co2_all_drivers_soil_only_dir, + cn.gross_emis_nodes_soil_only_dir] + + # Adds the net flux output directory output_dir_list = output_dir_list + [cn.net_flux_dir] # Supplementary outputs @@ -270,365 +256,363 @@ def main (): # removal function if 'annual_removals_mangrove' in actual_stages: - uu.print_log(":::::Creating tiles of annual removals for mangrove") + uu.print_log(':::::Creating tiles of annual removals for mangrove') start = datetime.datetime.now() - mp_annual_gain_rate_mangrove(sensit_type, tile_id_list, run_date = run_date) + mp_annual_gain_rate_mangrove(tile_id_list) end = datetime.datetime.now() elapsed_time = end - start uu.check_storage() - uu.print_log(":::::Processing time for annual_gain_rate_mangrove:", elapsed_time, "\n") + uu.print_log(f':::::Processing time for annual_gain_rate_mangrove: {elapsed_time}', "\n", "\n") # Creates tiles of annual AGC+BGC removals rate and AGC stdev for US-specific removals using the standard model # removal function if 'annual_removals_us' in actual_stages: - uu.print_log(":::::Creating tiles of annual removals for US") + uu.print_log(':::::Creating tiles of annual removals for US') start = datetime.datetime.now() - mp_US_removal_rates(sensit_type, tile_id_list, run_date = run_date) + mp_US_removal_rates(tile_id_list) end = datetime.datetime.now() elapsed_time = end - start uu.check_storage() - uu.print_log(":::::Processing time for annual_gain_rate_us:", elapsed_time, "\n") + uu.print_log(f':::::Processing time for annual_gain_rate_us: {elapsed_time}', "\n", "\n") # Creates model extent tiles if 'model_extent' in actual_stages: - uu.print_log(":::::Creating tiles of model extent") + uu.print_log(':::::Creating tiles of model extent') start = datetime.datetime.now() - mp_model_extent(sensit_type, tile_id_list, run_date=run_date, no_upload=no_upload) + mp_model_extent(tile_id_list) end = datetime.datetime.now() elapsed_time = end - start uu.check_storage() - uu.print_log(":::::Processing time for model_extent:", elapsed_time, "\n", "\n") + uu.print_log(f':::::Processing time for model_extent: {elapsed_time}', "\n", "\n") # Creates age category tiles for natural forests if 'forest_age_category_IPCC' in actual_stages: - uu.print_log(":::::Creating tiles of forest age categories for IPCC removal rates") + uu.print_log(':::::Creating tiles of forest age categories for IPCC removal rates') start = datetime.datetime.now() - mp_forest_age_category_IPCC(sensit_type, tile_id_list, run_date=run_date, no_upload=no_upload) + mp_forest_age_category_IPCC(tile_id_list) end = datetime.datetime.now() elapsed_time = end - start uu.check_storage() - uu.print_log(":::::Processing time for forest_age_category_IPCC:", elapsed_time, "\n", "\n") + uu.print_log(f':::::Processing time for forest_age_category_IPCC: {elapsed_time}', "\n", "\n") # Creates tiles of annual AGB and BGB removals rates using IPCC Table 4.9 defaults if 'annual_removals_IPCC' in actual_stages: - uu.print_log(":::::Creating tiles of annual aboveground and belowground removal rates using IPCC defaults") + uu.print_log(':::::Creating tiles of annual aboveground and belowground removal rates using IPCC defaults') start = datetime.datetime.now() - mp_annual_gain_rate_IPCC_defaults(sensit_type, tile_id_list, run_date=run_date, no_upload=no_upload) + mp_annual_gain_rate_IPCC_defaults(tile_id_list) end = datetime.datetime.now() elapsed_time = end - start uu.check_storage() - uu.print_log(":::::Processing time for annual_gain_rate_IPCC:", elapsed_time, "\n", "\n") + uu.print_log(f':::::Processing time for annual_gain_rate_IPCC: {elapsed_time}', "\n", "\n") # Creates tiles of annual AGC and BGC removal factors for the entire model, combining removal factors from all forest types if 'annual_removals_all_forest_types' in actual_stages: - uu.print_log(":::::Creating tiles of annual aboveground and belowground removal rates for all forest types") + uu.print_log(':::::Creating tiles of annual aboveground and belowground removal rates for all forest types') start = datetime.datetime.now() - mp_annual_gain_rate_AGC_BGC_all_forest_types(sensit_type, tile_id_list, run_date=run_date, no_upload=no_upload) + mp_annual_gain_rate_AGC_BGC_all_forest_types(tile_id_list) end = datetime.datetime.now() elapsed_time = end - start uu.check_storage() - uu.print_log(":::::Processing time for annual_gain_rate_AGC_BGC_all_forest_types:", elapsed_time, "\n", "\n") + uu.print_log(f':::::Processing time for annual_gain_rate_AGC_BGC_all_forest_types: {elapsed_time}', "\n", "\n") # Creates tiles of the number of years of removals for all model pixels (across all forest types) if 'gain_year_count' in actual_stages: - if not save_intermediates: + if not cn.SAVE_INTERMEDIATES: - uu.print_log(":::::Freeing up memory for gain year count creation by deleting unneeded tiles") + uu.print_log(':::::Freeing up memory for gain year count creation by deleting unneeded tiles') tiles_to_delete = [] - # tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_mangrove_biomass_2000))) - # tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_WHRC_biomass_2000_unmasked))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_annual_gain_AGB_mangrove))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_annual_gain_BGB_mangrove))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_annual_gain_AGC_BGC_natrl_forest_Europe))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_annual_gain_AGC_BGC_planted_forest_unmasked))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_annual_gain_AGC_BGC_natrl_forest_US))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_annual_gain_AGC_natrl_forest_young))) - # tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_age_cat_IPCC))) - # tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_annual_gain_AGB_IPCC_defaults))) - # tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_annual_gain_BGB_IPCC_defaults))) - # tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_annual_gain_AGC_BGC_all_types))) - # tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_ifl_primary))) - # tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_planted_forest_type_unmasked))) - # tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_plant_pre_2000))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_stdev_annual_gain_AGB_mangrove))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_Europe))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_stdev_annual_gain_AGC_BGC_planted_forest_unmasked))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_US))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_stdev_annual_gain_AGC_natrl_forest_young))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_stdev_annual_gain_AGB_IPCC_defaults))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_stdev_annual_gain_AGC_all_types))) - uu.print_log(" Deleting", len(tiles_to_delete), "tiles...") + # tiles_to_delete.extend(glob.glob(f'*{cn.pattern_mangrove_biomass_2000}*tif')) + # tiles_to_delete.extend(glob.glob(f'*{cn.pattern_WHRC_biomass_2000_unmasked}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_annual_gain_AGB_mangrove}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_annual_gain_BGB_mangrove}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_annual_gain_AGC_BGC_natrl_forest_Europe}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_annual_gain_AGC_BGC_planted_forest_unmasked}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_annual_gain_AGC_BGC_natrl_forest_US}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_annual_gain_AGC_natrl_forest_young}*tif')) + # tiles_to_delete.extend(glob.glob(f'*{cn.pattern_age_cat_IPCC}*tif')) + # tiles_to_delete.extend(glob.glob(f'*{cn.pattern_annual_gain_AGB_IPCC_defaults}*tif')) + # tiles_to_delete.extend(glob.glob(f'*{cn.pattern_annual_gain_BGB_IPCC_defaults}*tif')) + # tiles_to_delete.extend(glob.glob(f'*{cn.pattern_annual_gain_AGC_BGC_all_types}*tif')) + # tiles_to_delete.extend(glob.glob(f'*{cn.pattern_ifl_primary}*tif')) + # tiles_to_delete.extend(glob.glob(f'*{cn.pattern_planted_forest_type_unmasked}*tif')) + # tiles_to_delete.extend(glob.glob(f'*{cn.pattern_plant_pre_2000}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_stdev_annual_gain_AGB_mangrove}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_Europe}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_stdev_annual_gain_AGC_BGC_planted_forest_unmasked}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_US}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_stdev_annual_gain_AGC_natrl_forest_young}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_stdev_annual_gain_AGB_IPCC_defaults}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_stdev_annual_gain_AGC_all_types}*tif')) + uu.print_log(f' Deleting {len(tiles_to_delete)} tiles...') for tile_to_delete in tiles_to_delete: os.remove(tile_to_delete) - uu.print_log(":::::Deleted unneeded tiles") + uu.print_log(':::::Deleted unneeded tiles') uu.check_storage() - uu.print_log(":::::Creating tiles of gain year count for all removal pixels") + uu.print_log(':::::Creating tiles of gain year count for all removal pixels') start = datetime.datetime.now() - mp_gain_year_count_all_forest_types(sensit_type, tile_id_list, run_date = run_date, no_upload=no_upload) + mp_gain_year_count_all_forest_types(tile_id_list) end = datetime.datetime.now() elapsed_time = end - start uu.check_storage() - uu.print_log(":::::Processing time for gain_year_count:", elapsed_time, "\n", "\n") + uu.print_log(f':::::Processing time for gain_year_count: {elapsed_time}', "\n", "\n") # Creates tiles of gross removals for all forest types (aboveground, belowground, and above+belowground) if 'gross_removals_all_forest_types' in actual_stages: - uu.print_log(":::::Creating gross removals for all forest types combined (above + belowground) tiles") + uu.print_log(':::::Creating gross removals for all forest types combined (above + belowground) tiles') start = datetime.datetime.now() - mp_gross_removals_all_forest_types(sensit_type, tile_id_list, run_date=run_date, no_upload=no_upload) + mp_gross_removals_all_forest_types(tile_id_list) end = datetime.datetime.now() elapsed_time = end - start uu.check_storage() - uu.print_log(":::::Processing time for gross_removals_all_forest_types:", elapsed_time, "\n", "\n") + uu.print_log(f':::::Processing time for gross_removals_all_forest_types: {elapsed_time}', "\n", "\n") - # Creates carbon emitted_pools in loss year + # Creates carbon pools in loss year if 'carbon_pools' in actual_stages: - if not save_intermediates: + if not cn.SAVE_INTERMEDIATES: - uu.print_log(":::::Freeing up memory for carbon pool creation by deleting unneeded tiles") + uu.print_log(':::::Freeing up memory for carbon pool creation by deleting unneeded tiles') tiles_to_delete = [] - # tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_model_extent))) - # tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_age_cat_IPCC))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_annual_gain_AGB_IPCC_defaults))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_annual_gain_BGB_IPCC_defaults))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_annual_gain_BGC_all_types))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_annual_gain_AGC_BGC_all_types))) + # tiles_to_delete.extend(glob.glob(f'*{cn.pattern_model_extent}*tif')) + # tiles_to_delete.extend(glob.glob(f'*{cn.pattern_age_cat_IPCC}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_annual_gain_AGB_IPCC_defaults}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_annual_gain_BGB_IPCC_defaults}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_annual_gain_BGC_all_types}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_annual_gain_AGC_BGC_all_types}*tif')) tiles_to_delete.extend(glob.glob('*growth_years*tif')) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_gain_year_count))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_cumul_gain_BGCO2_all_types))) - # tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_cumul_gain_AGCO2_BGCO2_all_types))) - # tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_ifl_primary))) - # tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_planted_forest_type_unmasked))) - uu.print_log(" Deleting", len(tiles_to_delete), "tiles...") + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_gain_year_count}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_cumul_gain_BGCO2_all_types}*tif')) + # tiles_to_delete.extend(glob.glob(f'*{cn.pattern_cumul_gain_AGCO2_BGCO2_all_types}*tif')) + # tiles_to_delete.extend(glob.glob(f'*{cn.pattern_ifl_primary}*tif')) + # tiles_to_delete.extend(glob.glob(f'*{cn.pattern_planted_forest_type_unmasked}*tif')) + uu.print_log(f' Deleting {len(tiles_to_delete)} tiles...') for tile_to_delete in tiles_to_delete: os.remove(tile_to_delete) - uu.print_log(":::::Deleted unneeded tiles") + uu.print_log(':::::Deleted unneeded tiles') uu.check_storage() - uu.print_log(":::::Creating carbon pool tiles") + uu.print_log(':::::Creating carbon pool tiles') start = datetime.datetime.now() - mp_create_carbon_pools(sensit_type, tile_id_list, carbon_pool_extent, run_date=run_date, no_upload=no_upload, - save_intermediates=save_intermediates) + mp_create_carbon_pools(tile_id_list, cn.CARBON_POOL_EXTENT) end = datetime.datetime.now() elapsed_time = end - start uu.check_storage() - uu.print_log(":::::Processing time for create_carbon_pools:", elapsed_time, "\n", "\n") + uu.print_log(f':::::Processing time for create_carbon_pools: {elapsed_time}', "\n", "\n") - # Creates gross emissions tiles by driver, gas, and all emissions combined - if 'gross_emissions' in actual_stages: + # Creates gross emissions tiles for biomass+soil by driver, gas, and all emissions combined + if 'gross_emissions_biomass_soil' in actual_stages: - if not save_intermediates: + if not cn.SAVE_INTERMEDIATES: - uu.print_log(":::::Freeing up memory for gross emissions creation by deleting unneeded tiles") + uu.print_log(':::::Freeing up memory for biomass_soil gross emissions creation by deleting unneeded tiles') tiles_to_delete = [] - # tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_removal_forest_type))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_AGC_2000))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_BGC_2000))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_deadwood_2000))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_litter_2000))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_total_C_2000))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_elevation))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_precip))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_annual_gain_AGC_all_types))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_cumul_gain_AGCO2_all_types))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_cont_eco_processed))) - # tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_WHRC_biomass_2000_unmasked))) - # tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_mangrove_biomass_2000))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_removal_forest_type))) - uu.print_log(" Deleting", len(tiles_to_delete), "tiles...") + # tiles_to_delete.extend(glob.glob(f'*{cn.pattern_removal_forest_type}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_AGC_2000}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_BGC_2000}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_deadwood_2000}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_litter_2000}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_total_C_2000}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_elevation}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_precip}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_annual_gain_AGC_all_types}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_cumul_gain_AGCO2_all_types}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_cont_eco_processed}*tif')) + # tiles_to_delete.extend(glob.glob(f'*{cn.pattern_WHRC_biomass_2000_unmasked}*tif')) + # tiles_to_delete.extend(glob.glob(f'*{cn.pattern_mangrove_biomass_2000}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_removal_forest_type}*tif')) + uu.print_log(f' Deleting {len(tiles_to_delete)} tiles...') uu.print_log(tiles_to_delete) for tile_to_delete in tiles_to_delete: os.remove(tile_to_delete) - uu.print_log(":::::Deleted unneeded tiles") + uu.print_log(':::::Deleted unneeded tiles') uu.check_storage() - uu.print_log(":::::Creating gross emissions tiles") + uu.print_log(':::::Creating gross biomass_soil emissions tiles') start = datetime.datetime.now() - mp_calculate_gross_emissions(sensit_type, tile_id_list, emitted_pools, run_date=run_date, no_upload=no_upload) + mp_calculate_gross_emissions(tile_id_list, 'biomass_soil') end = datetime.datetime.now() elapsed_time = end - start uu.check_storage() - uu.print_log(":::::Processing time for gross_emissions:", elapsed_time, "\n", "\n") + uu.print_log(f':::::Processing time for biomass_soil gross_emissions: {elapsed_time}', "\n", "\n") - # Creates net flux tiles (gross emissions - gross removals) - if 'net_flux' in actual_stages: + # Creates gross emissions tiles for soil only by driver, gas, and all emissions combined + if 'gross_emissions_soil_only' in actual_stages: - if not save_intermediates: + if not cn.SAVE_INTERMEDIATES: - uu.print_log(":::::Freeing up memory for net flux creation by deleting unneeded tiles") + uu.print_log(':::::Freeing up memory for soil_only gross emissions creation by deleting unneeded tiles') tiles_to_delete = [] - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_gross_emis_non_co2_all_drivers_biomass_soil))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_gross_emis_co2_only_all_drivers_biomass_soil))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_gross_emis_commod_biomass_soil))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_gross_emis_shifting_ag_biomass_soil))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_gross_emis_forestry_biomass_soil))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_gross_emis_wildfire_biomass_soil))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_gross_emis_urban_biomass_soil))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_gross_emis_no_driver_biomass_soil))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_gross_emis_nodes_biomass_soil))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_AGC_emis_year))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_BGC_emis_year))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_deadwood_emis_year_2000))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_litter_emis_year_2000))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_soil_C_emis_year_2000))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_total_C_emis_year))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_peat_mask))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_ifl_primary))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_planted_forest_type_unmasked))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_drivers))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_climate_zone))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_bor_tem_trop_processed))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_burn_year))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_plant_pre_2000))) - uu.print_log(" Deleting", len(tiles_to_delete), "tiles...") + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_gross_emis_non_co2_all_drivers_biomass_soil}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_gross_emis_co2_only_all_drivers_biomass_soil}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_gross_emis_commod_biomass_soil}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_gross_emis_shifting_ag_biomass_soil}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_gross_emis_forestry_biomass_soil}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_gross_emis_wildfire_biomass_soil}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_gross_emis_urban_biomass_soil}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_gross_emis_no_driver_biomass_soil}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_gross_emis_nodes_biomass_soil}*tif')) + uu.print_log(f' Deleting {len(tiles_to_delete)} tiles...') + + uu.print_log(tiles_to_delete) for tile_to_delete in tiles_to_delete: os.remove(tile_to_delete) - uu.print_log(":::::Deleted unneeded tiles") + uu.print_log(':::::Deleted unneeded tiles') uu.check_storage() - uu.print_log(":::::Creating net flux tiles") + uu.print_log(':::::Creating soil_only gross emissions tiles') start = datetime.datetime.now() - mp_net_flux(sensit_type, tile_id_list, run_date=run_date, no_upload=no_upload) + mp_calculate_gross_emissions(tile_id_list, 'soil_only') end = datetime.datetime.now() elapsed_time = end - start uu.check_storage() - uu.print_log(":::::Processing time for net_flux:", elapsed_time, "\n", "\n") + uu.print_log(f':::::Processing time for soil_only gross_emissions: {elapsed_time}', "\n", "\n") - # Aggregates gross emissions, gross removals, and net flux to coarser resolution. - # For sensitivity analyses, creates percent difference and sign change maps compared to standard model net flux. - if 'aggregate' in actual_stages: + # Creates net flux tiles (gross emissions - gross removals) + if 'net_flux' in actual_stages: - # aux.xml files need to be deleted because otherwise they'll be included in the aggregation iteration. - # They are created by using check_and_delete_if_empty_light() - uu.print_log(":::::Deleting any aux.xml files") - tiles_to_delete = [] - tiles_to_delete.extend(glob.glob('*aux.xml')) + if not cn.SAVE_INTERMEDIATES: - for tile_to_delete in tiles_to_delete: - os.remove(tile_to_delete) - uu.print_log(":::::Deleted {0} aux.xml files: {1}".format(len(tiles_to_delete), tiles_to_delete), "\n") + uu.print_log(':::::Freeing up memory for net flux creation by deleting unneeded tiles') + tiles_to_delete = [] + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_gross_emis_all_gases_all_drivers_soil_only}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_gross_emis_non_co2_all_drivers_soil_only}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_gross_emis_co2_only_all_drivers_soil_only}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_gross_emis_commod_soil_only}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_gross_emis_shifting_ag_soil_only}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_gross_emis_forestry_soil_only}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_gross_emis_wildfire_soil_only}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_gross_emis_urban_soil_only}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_gross_emis_no_driver_soil_only}*tif')) + tiles_to_delete.extend(glob.glob(f'*{cn.pattern_gross_emis_nodes_soil_only}*tif')) + uu.print_log(f' Deleting {len(tiles_to_delete)} tiles...') + + for tile_to_delete in tiles_to_delete: + os.remove(tile_to_delete) + uu.print_log(':::::Deleted unneeded tiles') + uu.check_storage() - uu.print_log(":::::Creating 4x4 km aggregate maps") + uu.print_log(':::::Creating net flux tiles') start = datetime.datetime.now() - mp_aggregate_results_to_4_km(sensit_type, thresh, tile_id_list, std_net_flux=std_net_flux, - run_date=run_date, no_upload=no_upload) + mp_net_flux(tile_id_list) end = datetime.datetime.now() elapsed_time = end - start uu.check_storage() - uu.print_log(":::::Processing time for aggregate:", elapsed_time, "\n", "\n") + uu.print_log(f':::::Processing time for net_flux: {elapsed_time}', "\n", "\n") - # Converts gross emissions, gross removals and net flux from per hectare rasters to per pixel rasters - if 'create_supplementary_outputs' in actual_stages: + # Creates derivative outputs for gross emissions, gross removals, and net flux. + # Creates forest extent and per-pixel tiles at original (0.00025x0.00025 deg) resolution and + # creates aggregated global maps at 0.04x0.04 deg resolution. + # For sensitivity analyses, also creates percent difference and sign change maps compared to standard model net flux. + if 'create_derivative_outputs' in actual_stages: - if not save_intermediates: - - uu.print_log(":::::Deleting rewindowed tiles") - tiles_to_delete = [] - tiles_to_delete.extend(glob.glob('*rewindow*tif')) - uu.print_log(" Deleting", len(tiles_to_delete), "tiles...") + # aux.xml files need to be deleted because otherwise they'll be included in the aggregation iteration. + # They are created by using check_and_delete_if_empty_light() + uu.print_log(':::::Deleting any aux.xml files') + tiles_to_delete = [] + tiles_to_delete.extend(glob.glob('*aux.xml')) - for tile_to_delete in tiles_to_delete: - os.remove(tile_to_delete) - uu.print_log(":::::Deleted unneeded tiles") + for tile_to_delete in tiles_to_delete: + os.remove(tile_to_delete) + uu.print_log(f':::::Deleted {len(tiles_to_delete)} aux.xml files: {tiles_to_delete}', "\n") - uu.check_storage() - uu.print_log(":::::Creating supplementary versions of main model outputs (forest extent, per pixel)") + uu.print_log(':::::Creating derivative outputs: forest extent/per-pixel tiles and aggregate maps') start = datetime.datetime.now() - mp_create_supplementary_outputs(sensit_type, tile_id_list, run_date=run_date, no_upload=no_upload) + mp_derivative_outputs(tile_id_list) end = datetime.datetime.now() elapsed_time = end - start uu.check_storage() - uu.print_log(":::::Processing time for supplementary output raster creation:", elapsed_time, "\n", "\n") + uu.print_log(f':::::Processing time for creating derivative outputs: {elapsed_time}', "\n", "\n") # If no_upload flag is activated, tiles on s3 aren't counted - if not no_upload: + if not cn.NO_UPLOAD: - uu.print_log(":::::Counting tiles output to each folder") + uu.print_log(':::::Counting tiles output to each folder') # Modifies output directory names to make them match those used during the model run. # The tiles in each of these directories and counted and logged. # If the model run isn't the standard one, the output directory and file names are changed - if sensit_type != 'std': - uu.print_log("Modifying output directory and file name pattern based on sensitivity analysis") - output_dir_list = uu.alter_dirs(sensit_type, output_dir_list) + if cn.SENSIT_TYPE != 'std': + uu.print_log('Modifying output directory and file name pattern based on sensitivity analysis') + output_dir_list = uu.alter_dirs(cn.SENSIT_TYPE, output_dir_list) # A date can optionally be provided by the full model script or a run of this script. # This replaces the date in constants_and_names. # Only done if output upload is enabled. - if run_date is not None and no_upload is not None: - output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date) + if cn.RUN_DATE is not None and cn.NO_UPLOAD is not None: + output_dir_list = uu.replace_output_dir_date(output_dir_list, cn.RUN_DATE) for output in output_dir_list: tile_count = uu.count_tiles_s3(output) - uu.print_log("Total tiles in", output, ": ", tile_count) + uu.print_log(f'Total tiles in {output}: {tile_count}') script_end = datetime.datetime.now() script_elapsed_time = script_end - script_start - uu.print_log(":::::Processing time for entire run:", script_elapsed_time, "\n") + uu.print_log(f':::::Processing time for entire run: {script_elapsed_time}', "\n") # If no_upload flag is not activated (by choice or by lack of AWS credentials), output is uploaded - if not no_upload: + if not cn.NO_UPLOAD: uu.upload_log() + if __name__ == '__main__': main() diff --git a/sensitivity_analysis/US_removal_rates.py b/sensitivity_analysis/US_removal_rates.py index 2b004476..a11750fa 100644 --- a/sensitivity_analysis/US_removal_rates.py +++ b/sensitivity_analysis/US_removal_rates.py @@ -53,11 +53,12 @@ def US_removal_rate_calc(tile_id, gain_table_group_region_age_dict, gain_table_g start = datetime.datetime.now() # Names of the input tiles - gain = '{0}_{1}.tif'.format(cn.pattern_gain, tile_id) + gain = f'{tile_id}_{cn.pattern_gain_ec2}.tif' annual_gain_standard = '{0}_{1}.tif'.format(tile_id, cn.pattern_annual_gain_AGB_IPCC_defaults) # Used as the template extent/default for the US US_age_cat = '{0}_{1}.tif'.format(tile_id, cn.pattern_US_forest_age_cat_processed) US_forest_group = '{0}_{1}.tif'.format(tile_id, cn.pattern_FIA_forest_group_processed) US_region = '{0}_{1}.tif'.format(tile_id, cn.pattern_FIA_regions_processed) + BGB_AGB_ratio = uu.sensit_tile_rename(cn.SENSIT_TYPE, tile_id, cn.pattern_BGB_AGB_ratio) # Opens standard model removals rate tile with rasterio.open(annual_gain_standard) as annual_gain_standard_src: @@ -74,6 +75,12 @@ def US_removal_rate_calc(tile_id, gain_table_group_region_age_dict, gain_table_g US_forest_group_src = rasterio.open(US_forest_group) US_region_src = rasterio.open(US_region) + try: + BGB_AGB_ratio_src = rasterio.open(BGB_AGB_ratio) + uu.print_log(f' BGB:AGB tile found for {tile_id}') + except rasterio.errors.RasterioIOError: + uu.print_log(f' BGB:AGB tile not found for {tile_id}. Using default BGB:AGB from Mokany instead.') + # Updates kwargs for the output dataset kwargs.update( driver='GTiff', @@ -96,6 +103,12 @@ def US_removal_rate_calc(tile_id, gain_table_group_region_age_dict, gain_table_g US_forest_group_window = US_forest_group_src.read(1, window=window) US_region_window = US_region_src.read(1, window=window) + try: + BGB_AGB_ratio_window = BGB_AGB_ratio_src.read(1, window=window) + except UnboundLocalError: + BGB_AGB_ratio_window = np.empty((window.height, window.width), dtype='float32') + BGB_AGB_ratio_window[:] = cn.below_to_above_non_mang + # Masks the three input tiles (age category, forest group, FIA region) to the pixels to the standard removals model extent age_cat_masked_window = np.ma.masked_where(annual_gain_standard_window == 0, US_age_cat_window).filled(0).astype('uint16') US_forest_group_masked_window = np.ma.masked_where(annual_gain_standard_window == 0, US_forest_group_window).filled(0).astype('uint16') @@ -138,7 +151,7 @@ def US_removal_rate_calc(tile_id, gain_table_group_region_age_dict, gain_table_g agb_dst_corrected_window = np.where(agb_dst_window > (max_rate*1.2), annual_gain_standard_window, agb_dst_window) # Calculates BGB removal rate from AGB removal rate - bgb_dst_window = agb_dst_corrected_window * cn.below_to_above_non_mang + bgb_dst_window = agb_dst_corrected_window * BGB_AGB_ratio_window # Writes the output windows to the outputs agb_dst.write_band(1, agb_dst_corrected_window, window=window) diff --git a/sensitivity_analysis/legal_AMZ_loss.py b/sensitivity_analysis/legal_AMZ_loss.py index 2a3d6d87..da20158c 100644 --- a/sensitivity_analysis/legal_AMZ_loss.py +++ b/sensitivity_analysis/legal_AMZ_loss.py @@ -14,7 +14,7 @@ def legal_Amazon_forest_age_category(tile_id, sensit_type, output_pattern): start = datetime.datetime.now() loss = '{0}_{1}.tif'.format(tile_id, cn.pattern_Brazil_annual_loss_processed) - gain = '{0}_{1}.tif'.format(cn.pattern_gain, tile_id) + gain = f'{tile_id}_{cn.pattern_gain_ec2}.tif' extent = '{0}_{1}.tif'.format(tile_id, cn.pattern_Brazil_forest_extent_2000_processed) biomass = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_WHRC_biomass_2000_non_mang_non_planted) plantations = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_planted_forest_type_unmasked) @@ -39,13 +39,13 @@ def legal_Amazon_forest_age_category(tile_id, sensit_type, output_pattern): plantations_src = rasterio.open(plantations) uu.print_log(" Planted forest tile found for {}".format(tile_id)) except: - uu.print_log(" No planted forest tile for {}".format(tile_id)) + uu.print_log(" Planted forest tile not found for {}".format(tile_id)) try: mangroves_src = rasterio.open(mangroves) uu.print_log(" Mangrove tile found for {}".format(tile_id)) except: - uu.print_log(" No mangrove tile for {}".format(tile_id)) + uu.print_log(" Mangrove tile not found for {}".format(tile_id)) # Updates kwargs for the output dataset kwargs.update( @@ -98,7 +98,7 @@ def tile_names(tile_id, sensit_type): # Names of the input files loss = '{0}_{1}.tif'.format(tile_id, cn.pattern_Brazil_annual_loss_processed) - gain = '{0}_{1}.tif'.format(cn.pattern_gain, tile_id) + gain = f'{tile_id}_{cn.pattern_gain_ec2}.tif' extent = '{0}_{1}.tif'.format(tile_id, cn.pattern_Brazil_forest_extent_2000_processed) biomass = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_WHRC_biomass_2000_non_mang_non_planted) @@ -108,7 +108,7 @@ def tile_names(tile_id, sensit_type): # Creates gain year count tiles for pixels that only had loss def legal_Amazon_create_gain_year_count_loss_only(tile_id, sensit_type): - uu.print_log("Gain year count for loss only pixels:", tile_id) + uu.print_log("Gain year count for loss-only pixels:", tile_id) # Names of the input tiles loss, gain, extent, biomass = tile_names(tile_id, sensit_type) @@ -116,9 +116,9 @@ def legal_Amazon_create_gain_year_count_loss_only(tile_id, sensit_type): # start time start = datetime.datetime.now() - # Pixels with loss only, in PRODES forest 2000 + # Pixels with loss-only, in PRODES forest 2000 loss_calc = '--calc=(A>0)*(B==0)*(C==1)*(A-1)' - loss_outfilename = '{}_growth_years_loss_only.tif'.format(tile_id) + loss_outfilename = '{}_gain_year_count_loss_only.tif'.format(tile_id) loss_outfilearg = '--outfile={}'.format(loss_outfilename) cmd = ['gdal_calc.py', '-A', loss, '-B', gain, '-C', extent, loss_calc, loss_outfilearg, '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=DEFLATE', '--type', 'Byte', '--quiet'] @@ -128,13 +128,13 @@ def legal_Amazon_create_gain_year_count_loss_only(tile_id, sensit_type): uu.log_subprocess_output(process.stdout) # Prints information about the tile that was just processed - uu.end_of_fx_summary(start, tile_id, 'growth_years_loss_only') + uu.end_of_fx_summary(start, tile_id, 'gain_year_count_loss_only') # Creates gain year count tiles for pixels that had no loss. It doesn't matter if there was gain in these pixels because # gain without loss in PRODES extent is being ignored for this analysis (as in, there can't be canopy gain in PRODES # extent without loss because it's already dense primary forest). -# Making the condition for "no change" be "no loss" covers the rest of the loss-gain space, since loss-only and +# Making the condition for "no-change" be "no loss" covers the rest of the loss-gain space, since loss-only and # loss-and-gain covers the loss pixel side of things. def legal_Amazon_create_gain_year_count_no_change(tile_id, sensit_type): @@ -153,7 +153,7 @@ def legal_Amazon_create_gain_year_count_no_change(tile_id, sensit_type): # Pixels with loss but in areas with PRODES forest 2000 and biomass >0 (same as standard model) no_change_calc = '--calc=(A==0)*(B==1)*(C>0)*{}'.format(cn.loss_years) - no_change_outfilename = '{}_growth_years_no_change.tif'.format(tile_id) + no_change_outfilename = '{}_gain_year_count_no_change.tif'.format(tile_id) no_change_outfilearg = '--outfile={}'.format(no_change_outfilename) cmd = ['gdal_calc.py', '-A', loss_vrt, '-B', extent, '-C', biomass, no_change_calc, no_change_outfilearg, '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=DEFLATE', '--type', 'Byte', '--quiet'] @@ -163,13 +163,13 @@ def legal_Amazon_create_gain_year_count_no_change(tile_id, sensit_type): uu.log_subprocess_output(process.stdout) # Prints information about the tile that was just processed - uu.end_of_fx_summary(start, tile_id, 'growth_years_no_change') + uu.end_of_fx_summary(start, tile_id, 'gain_year_count_no_change') -# Creates gain year count tiles for pixels that had both loss and gain +# Creates gain year count tiles for pixels that had both loss-and-gain def legal_Amazon_create_gain_year_count_loss_and_gain_standard(tile_id, sensit_type): - uu.print_log("Gain year count for loss and gain pixels:", tile_id) + uu.print_log("Gain year count for loss-and-gain pixels:", tile_id) # start time start = datetime.datetime.now() @@ -177,9 +177,9 @@ def legal_Amazon_create_gain_year_count_loss_and_gain_standard(tile_id, sensit_t # Names of the loss, gain and tree cover density tiles loss, gain, extent, biomass = tile_names(tile_id, sensit_type) - # Pixels with both loss and gain, and in PRODES forest 2000 + # Pixels with both loss-and-gain, and in PRODES forest 2000 loss_and_gain_calc = '--calc=((A>0)*(B==1)*(C==1)*((A-1)+({}+1-A)/2))'.format(cn.loss_years) - loss_and_gain_outfilename = '{}_growth_years_loss_and_gain.tif'.format(tile_id) + loss_and_gain_outfilename = f'{tile_id}_gain_year_count_loss_and_gain.tif' loss_and_gain_outfilearg = '--outfile={}'.format(loss_and_gain_outfilename) cmd = ['gdal_calc.py', '-A', loss, '-B', gain, '-C', extent, loss_and_gain_calc, loss_and_gain_outfilearg, '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=DEFLATE', '--type', 'Byte', '--quiet'] @@ -189,21 +189,21 @@ def legal_Amazon_create_gain_year_count_loss_and_gain_standard(tile_id, sensit_t uu.log_subprocess_output(process.stdout) # Prints information about the tile that was just processed - uu.end_of_fx_summary(start, tile_id, 'growth_years_loss_and_gain') + uu.end_of_fx_summary(start, tile_id, 'gain_year_count_loss_and_gain') # Merges the four gain year count tiles above to create a single gain year count tile def legal_Amazon_create_gain_year_count_merge(tile_id, output_pattern): - uu.print_log("Merging loss, gain, no change, and loss/gain pixels into single raster for {}".format(tile_id)) + uu.print_log("Merging loss, gain, no-change, and loss/gain pixels into single raster for {}".format(tile_id)) # start time start = datetime.datetime.now() # The four rasters from above that are to be merged - loss_outfilename = '{}_growth_years_loss_only.tif'.format(tile_id) - no_change_outfilename = '{}_growth_years_no_change.tif'.format(tile_id) - loss_and_gain_outfilename = '{}_growth_years_loss_and_gain.tif'.format(tile_id) + loss_outfilename = '{}_gain_year_count_loss_only.tif'.format(tile_id) + no_change_outfilename = '{}_gain_year_count_no_change.tif'.format(tile_id) + loss_and_gain_outfilename = '{}_gain_year_count_loss_and_gain.tif'.format(tile_id) # All four components are merged together to the final output raster age_outfile = '{}_{}.tif'.format(tile_id, output_pattern) diff --git a/sensitivity_analysis/mp_Mekong_loss.py b/sensitivity_analysis/mp_Mekong_loss.py index c282ac82..5907e288 100644 --- a/sensitivity_analysis/mp_Mekong_loss.py +++ b/sensitivity_analysis/mp_Mekong_loss.py @@ -20,17 +20,17 @@ def main (): # Create the output log uu.initiate_log() - os.chdir(cn.docker_base_dir) + os.chdir(cn.docker_tile_dir) # List of tiles that could be run. This list is only used to create the FIA region tiles if they don't already exist. tile_id_list = uu.tile_list_s3(cn.WHRC_biomass_2000_unmasked_dir) # tile_id_list = ['50N_130W'] # test tiles uu.print_log(tile_id_list) - uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n") + uu.print_log(f'There are {str(len(tile_id_list))} tiles to process', "\n") # Downloads the Mekong loss folder. Each year of loss has its own raster - uu.s3_folder_download(cn.Mekong_loss_raw_dir, cn.docker_base_dir, sensit_type) + uu.s3_folder_download(cn.Mekong_loss_raw_dir, cn.docker_tile_dir, sensit_type) # The list of all annual loss rasters annual_loss_list = glob.glob('Loss_20*tif') @@ -60,7 +60,8 @@ def main (): source_raster = loss_composite out_pattern = cn.pattern_Mekong_loss_processed dt = 'Byte' - pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt, no_upload=no_upload), tile_id_list) + pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), + tile_id_list) # This is necessary for changing NoData values to 0s (so they are recognized as 0s) pool.map(Mekong_loss.recode_tiles, tile_id_list) diff --git a/sensitivity_analysis/mp_Saatchi_biomass_prep.py b/sensitivity_analysis/mp_Saatchi_biomass_prep.py index fe7b49ae..4d373e74 100644 --- a/sensitivity_analysis/mp_Saatchi_biomass_prep.py +++ b/sensitivity_analysis/mp_Saatchi_biomass_prep.py @@ -20,14 +20,14 @@ def main (): # Create the output log uu.initiate_log() - os.chdir(cn.docker_base_dir) + os.chdir(cn.docker_tile_dir) # The list of tiles to iterate through tile_id_list = uu.tile_list_s3(cn.WHRC_biomass_2000_unmasked_dir) # tile_id_list = ["00N_000E", "00N_050W", "00N_060W", "00N_010E", "00N_020E", "00N_030E", "00N_040E", "10N_000E", "10N_010E", "10N_010W", "10N_020E", "10N_020W"] # test tiles # tile_id_list = ['00N_110E'] # test tile uu.print_log(tile_id_list) - uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n") + uu.print_log(f'There are {str(len(tile_id_list))} tiles to process', "\n") # By definition, this script is for the biomass swap analysis (replacing WHRC AGB with Saatchi/JPL AGB) sensit_type = 'biomass_swap' @@ -40,7 +40,8 @@ def main (): out_pattern = cn.pattern_JPL_unmasked_processed dt = 'Float32' pool = multiprocessing.Pool(cn.count-5) # count-5 peaks at 320GB of memory - pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt, no_upload=no_upload), tile_id_list) + pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), + tile_id_list) # Checks if each tile has data in it. Only tiles with data are uploaded. upload_dir = cn.JPL_processed_dir diff --git a/sensitivity_analysis/mp_US_removal_rates.py b/sensitivity_analysis/mp_US_removal_rates.py index 6a547a0c..b2e03553 100644 --- a/sensitivity_analysis/mp_US_removal_rates.py +++ b/sensitivity_analysis/mp_US_removal_rates.py @@ -45,11 +45,12 @@ def main (): # Create the output log uu.initiate_log() - os.chdir(cn.docker_base_dir) + os.chdir(cn.docker_tile_dir) # Files to download for this script. - download_dict = {cn.gain_dir: [cn.pattern_gain], - cn.annual_gain_AGB_IPCC_defaults_dir: [cn.pattern_annual_gain_AGB_IPCC_defaults] + download_dict = {cn.gain_dir: [cn.pattern_gain_data_lake], + cn.annual_gain_AGB_IPCC_defaults_dir: [cn.pattern_annual_gain_AGB_IPCC_defaults], + cn.BGB_AGB_ratio_dir: [cn.pattern_BGB_AGB_ratio] } # List of tiles that could be run. This list is only used to create the FIA region tiles if they don't already exist. @@ -72,11 +73,11 @@ def main (): # Only creates FIA region tiles if they don't already exist on s3. if FIA_regions_tile_count == 16: uu.print_log("FIA region tiles already created. Copying to s3 now...") - uu.s3_flexible_download(cn.FIA_regions_processed_dir, cn.pattern_FIA_regions_processed, cn.docker_base_dir, 'std', 'all') + uu.s3_flexible_download(cn.FIA_regions_processed_dir, cn.pattern_FIA_regions_processed, cn.docker_tile_dir, 'std', 'all') else: uu.print_log("FIA region tiles do not exist. Creating tiles, then copying to s3 for future use...") - uu.s3_file_download(os.path.join(cn.FIA_regions_raw_dir, cn.name_FIA_regions_raw), cn.docker_base_dir, 'std') + uu.s3_file_download(os.path.join(cn.FIA_regions_raw_dir, cn.name_FIA_regions_raw), cn.docker_tile_dir, 'std') cmd = ['unzip', '-o', '-j', cn.name_FIA_regions_raw] # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging @@ -90,7 +91,7 @@ def main (): # List of FIA region tiles on the spot machine. Only this list is used for the rest of the script. - US_tile_list = uu.tile_list_spot_machine(cn.docker_base_dir, '{}.tif'.format(cn.pattern_FIA_regions_processed)) + US_tile_list = uu.tile_list_spot_machine(cn.docker_tile_dir, '{}.tif'.format(cn.pattern_FIA_regions_processed)) US_tile_id_list = [i[0:8] for i in US_tile_list] # US_tile_id_list = ['50N_130W'] # For testing uu.print_log(US_tile_id_list) @@ -108,15 +109,15 @@ def main (): else: uu.print_log("Southern forest age category tiles do not exist. Creating tiles, then copying to s3 for future use...") - uu.s3_file_download(os.path.join(cn.US_forest_age_cat_raw_dir, cn.name_US_forest_age_cat_raw), cn.docker_base_dir, 'std') + uu.s3_file_download(os.path.join(cn.US_forest_age_cat_raw_dir, cn.name_US_forest_age_cat_raw), cn.docker_tile_dir, 'std') # Converts the national forest age category raster to Hansen tiles source_raster = cn.name_US_forest_age_cat_raw out_pattern = cn.pattern_US_forest_age_cat_processed dt = 'Int16' pool = multiprocessing.Pool(int(cn.count/2)) - pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt, - no_upload=no_upload), US_tile_id_list) + pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), + US_tile_id_list) uu.upload_final_set(cn.US_forest_age_cat_processed_dir, cn.pattern_US_forest_age_cat_processed) @@ -131,15 +132,15 @@ def main (): else: uu.print_log("FIA forest group tiles do not exist. Creating tiles, then copying to s3 for future use...") - uu.s3_file_download(os.path.join(cn.FIA_forest_group_raw_dir, cn.name_FIA_forest_group_raw), cn.docker_base_dir, 'std') + uu.s3_file_download(os.path.join(cn.FIA_forest_group_raw_dir, cn.name_FIA_forest_group_raw), cn.docker_tile_dir, 'std') # Converts the national forest group raster to Hansen forest group tiles source_raster = cn.name_FIA_forest_group_raw out_pattern = cn.pattern_FIA_forest_group_processed dt = 'Byte' pool = multiprocessing.Pool(int(cn.count/2)) - pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt, - no_upload=no_upload), US_tile_id_list) + pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), + US_tile_id_list) uu.upload_final_set(cn.FIA_forest_group_processed_dir, cn.pattern_FIA_forest_group_processed) @@ -148,13 +149,13 @@ def main (): for key, values in download_dict.items(): dir = key pattern = values[0] - uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, US_tile_id_list) + uu.s3_flexible_download(dir, pattern, cn.docker_tile_dir, sensit_type, US_tile_id_list) # Table with US-specific removal rates # cmd = ['aws', 's3', 'cp', os.path.join(cn.gain_spreadsheet_dir, cn.table_US_removal_rate), cn.docker_base_dir, '--no-sign-request'] - cmd = ['aws', 's3', 'cp', os.path.join(cn.gain_spreadsheet_dir, cn.table_US_removal_rate), cn.docker_base_dir] + cmd = ['aws', 's3', 'cp', os.path.join(cn.gain_spreadsheet_dir, cn.table_US_removal_rate), cn.docker_tile_dir] # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging process = Popen(cmd, stdout=PIPE, stderr=STDOUT) diff --git a/sensitivity_analysis/mp_legal_AMZ_loss.py b/sensitivity_analysis/mp_legal_AMZ_loss.py index fc7a43de..0c773bc3 100644 --- a/sensitivity_analysis/mp_legal_AMZ_loss.py +++ b/sensitivity_analysis/mp_legal_AMZ_loss.py @@ -28,7 +28,7 @@ def main (): # Create the output log uu.initiate_log() - os.chdir(cn.docker_base_dir) + os.chdir(cn.docker_tile_dir) Brazil_stages = ['all', 'create_forest_extent', 'create_loss'] @@ -46,11 +46,11 @@ def main (): # Checks the validity of the two arguments. If either one is invalid, the script ends. if (stage_input not in Brazil_stages): - uu.exception_log(no_upload, 'Invalid stage selection. Please provide a stage from', Brazil_stages) + uu.exception_log('Invalid stage selection. Please provide a stage from', Brazil_stages) else: pass if (run_through not in ['true', 'false']): - uu.exception_log(no_upload, 'Invalid run through option. Please enter true or false.') + uu.exception_log('Invalid run through option. Please enter true or false.') else: pass @@ -78,10 +78,10 @@ def main (): # tile_id_list = ["00N_000E", "00N_050W", "00N_060W", "00N_010E", "00N_020E", "00N_030E", "00N_040E", "10N_000E", "10N_010E", "10N_010W", "10N_020E", "10N_020W"] # test tiles # tile_id_list = ['50N_130W'] # test tiles uu.print_log(tile_id_list) - uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n") + uu.print_log(f'There are {str(len(tile_id_list))} tiles to process', "\n") # Downloads input rasters and lists them - uu.s3_folder_download(cn.Brazil_forest_extent_2000_raw_dir, cn.docker_base_dir, sensit_type) + uu.s3_folder_download(cn.Brazil_forest_extent_2000_raw_dir, cn.docker_tile_dir, sensit_type) raw_forest_extent_inputs = glob.glob('*_AMZ_warped_*tif') # The list of tiles to merge # Gets the resolution of a more recent PRODES raster, which has a higher resolution. The merged output matches that. @@ -109,8 +109,8 @@ def main (): out_pattern = cn.pattern_Brazil_forest_extent_2000_processed dt = 'Byte' pool = multiprocessing.Pool(int(cn.count/2)) - pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt, - no_upload=no_upload), tile_id_list) + pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), + tile_id_list) # Checks if each tile has data in it. Only tiles with data are uploaded. upload_dir = master_output_dir_list[0] @@ -126,10 +126,10 @@ def main (): tile_id_list = uu.tile_list_s3(cn.Brazil_forest_extent_2000_processed_dir) uu.print_log(tile_id_list) - uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n") + uu.print_log(f'There are {str(len(tile_id_list))} tiles to process', "\n") # Downloads input rasters and lists them - cmd = ['aws', 's3', 'cp', cn.Brazil_annual_loss_raw_dir, '.', '--recursive'] + cmd = ['aws', 's3', 'cp', cn.Brazil_annual_loss_raw_dir, '.'] uu.log_subprocess_output_full(cmd) uu.print_log("Input loss rasters downloaded. Getting resolution of recent raster...") @@ -163,8 +163,8 @@ def main (): out_pattern = cn.pattern_Brazil_annual_loss_processed dt = 'Byte' pool = multiprocessing.Pool(int(cn.count/2)) - pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt, - no_upload=no_upload), tile_id_list) + pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), + tile_id_list) uu.print_log(" PRODES composite loss raster warped to Hansen tiles") # Checks if each tile has data in it. Only tiles with data are uploaded. @@ -182,7 +182,7 @@ def main (): # Files to download for this script. download_dict = {cn.Brazil_annual_loss_processed_dir: [cn.pattern_Brazil_annual_loss_processed], - cn.gain_dir: [cn.pattern_gain], + cn.gain_dir: [cn.pattern_gain_data_lake], cn.WHRC_biomass_2000_non_mang_non_planted_dir: [cn.pattern_WHRC_biomass_2000_non_mang_non_planted], cn.planted_forest_type_unmasked_dir: [cn.pattern_planted_forest_type_unmasked], cn.mangrove_biomass_2000_dir: [cn.pattern_mangrove_biomass_2000], @@ -193,19 +193,19 @@ def main (): tile_id_list = uu.tile_list_s3(cn.Brazil_forest_extent_2000_processed_dir) # tile_id_list = ['00N_050W'] uu.print_log(tile_id_list) - uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n") + uu.print_log(f'There are {str(len(tile_id_list))} tiles to process', "\n") # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list for key, values in download_dict.items(): dir = key pattern = values[0] - uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list) + uu.s3_flexible_download(dir, pattern, cn.docker_tile_dir, sensit_type, tile_id_list) # If the model run isn't the standard one, the output directory and file names are changed if sensit_type != 'std': - uu.print_log("Changing output directory and file name pattern based on sensitivity analysis") + uu.print_log('Changing output directory and file name pattern based on sensitivity analysis') stage_output_dir_list = uu.alter_dirs(sensit_type, master_output_dir_list) stage_output_pattern_list = uu.alter_patterns(sensit_type, master_output_pattern_list) @@ -239,7 +239,7 @@ def main (): # Files to download for this script. download_dict = { cn.Brazil_annual_loss_processed_dir: [cn.pattern_Brazil_annual_loss_processed], - cn.gain_dir: [cn.pattern_gain], + cn.gain_dir: [cn.pattern_gain_data_lake], cn.WHRC_biomass_2000_non_mang_non_planted_dir: [cn.pattern_WHRC_biomass_2000_non_mang_non_planted], cn.planted_forest_type_unmasked_dir: [cn.pattern_planted_forest_type_unmasked], cn.mangrove_biomass_2000_dir: [cn.pattern_mangrove_biomass_2000], @@ -250,19 +250,19 @@ def main (): tile_id_list = uu.tile_list_s3(cn.Brazil_forest_extent_2000_processed_dir) # tile_id_list = ['00N_050W'] uu.print_log(tile_id_list) - uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n") + uu.print_log(f'There are {str(len(tile_id_list))} tiles to process', "\n") # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list for key, values in download_dict.items(): dir = key pattern = values[0] - uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list) + uu.s3_flexible_download(dir, pattern, cn.docker_tile_dir, sensit_type, tile_id_list) # If the model run isn't the standard one, the output directory and file names are changed if sensit_type != 'std': - uu.print_log("Changing output directory and file name pattern based on sensitivity analysis") + uu.print_log('Changing output directory and file name pattern based on sensitivity analysis') stage_output_dir_list = uu.alter_dirs(sensit_type, master_output_dir_list) stage_output_pattern_list = uu.alter_patterns(sensit_type, master_output_pattern_list) @@ -296,10 +296,10 @@ def main (): # legal_AMZ_loss.legal_Amazon_create_gain_year_count_merge(tile_id, output_pattern) # Intermediate output tiles for checking outputs - uu.upload_final_set(stage_output_dir_list[3], "growth_years_loss_only") - uu.upload_final_set(stage_output_dir_list[3], "growth_years_gain_only") - uu.upload_final_set(stage_output_dir_list[3], "growth_years_no_change") - uu.upload_final_set(stage_output_dir_list[3], "growth_years_loss_and_gain") + uu.upload_final_set(stage_output_dir_list[3], "gain_year_count_loss_only") + uu.upload_final_set(stage_output_dir_list[3], "gain_year_count_gain_only") + uu.upload_final_set(stage_output_dir_list[3], "gain_year_count_no_change") + uu.upload_final_set(stage_output_dir_list[3], "gain_year_count_loss_and_gain") # Uploads output from this stage uu.upload_final_set(stage_output_dir_list[3], stage_output_pattern_list[3]) @@ -322,13 +322,13 @@ def main (): tile_id_list = uu.tile_list_s3(cn.Brazil_forest_extent_2000_processed_dir) # tile_id_list = ['00N_050W'] uu.print_log(tile_id_list) - uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n") + uu.print_log(f'There are {str(len(tile_id_list))} tiles to process', "\n") # If the model run isn't the standard one, the output directory and file names are changed. # This adapts just the relevant items in the output directory and pattern lists (annual removals). if sensit_type != 'std': - uu.print_log("Changing output directory and file name pattern based on sensitivity analysis") + uu.print_log('Changing output directory and file name pattern based on sensitivity analysis') stage_output_dir_list = uu.alter_dirs(sensit_type, master_output_dir_list[4:6]) stage_output_pattern_list = uu.alter_patterns(sensit_type, master_output_pattern_list[4:6]) @@ -337,11 +337,11 @@ def main (): for key, values in download_dict.items(): dir = key pattern = values[0] - uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list) + uu.s3_flexible_download(dir, pattern, cn.docker_tile_dir, sensit_type, tile_id_list) # Table with IPCC Table 4.9 default removals rates - cmd = ['aws', 's3', 'cp', os.path.join(cn.gain_spreadsheet_dir, cn.gain_spreadsheet), cn.docker_base_dir] + cmd = ['aws', 's3', 'cp', os.path.join(cn.gain_spreadsheet_dir, cn.gain_spreadsheet), cn.docker_tile_dir] # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging process = Popen(cmd, stdout=PIPE, stderr=STDOUT) @@ -438,13 +438,13 @@ def main (): tile_id_list = uu.tile_list_s3(cn.Brazil_forest_extent_2000_processed_dir) # tile_id_list = ['00N_050W'] uu.print_log(tile_id_list) - uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n") + uu.print_log(f'There are {str(len(tile_id_list))} tiles to process', "\n") # If the model run isn't the standard one, the output directory and file names are changed. # This adapts just the relevant items in the output directory and pattern lists (cumulative removals). if sensit_type != 'std': - uu.print_log("Changing output directory and file name pattern based on sensitivity analysis") + uu.print_log('Changing output directory and file name pattern based on sensitivity analysis') stage_output_dir_list = uu.alter_dirs(sensit_type, master_output_dir_list[6:8]) stage_output_pattern_list = uu.alter_patterns(sensit_type, master_output_pattern_list[6:8]) @@ -453,7 +453,7 @@ def main (): for key, values in download_dict.items(): dir = key pattern = values[0] - uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list) + uu.s3_flexible_download(dir, pattern, cn.docker_tile_dir, sensit_type, tile_id_list) # Calculates cumulative aboveground carbon removals in non-mangrove planted forests @@ -510,13 +510,13 @@ def main (): tile_id_list = uu.tile_list_s3(cn.Brazil_forest_extent_2000_processed_dir) # tile_id_list = ['00N_050W'] uu.print_log(tile_id_list) - uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n") + uu.print_log(f'There are {str(len(tile_id_list))} tiles to process', "\n") # If the model run isn't the standard one, the output directory and file names are changed. # This adapts just the relevant items in the output directory and pattern lists (cumulative removals). if sensit_type != 'std': - uu.print_log("Changing output directory and file name pattern based on sensitivity analysis") + uu.print_log('Changing output directory and file name pattern based on sensitivity analysis') stage_output_dir_list = uu.alter_dirs(sensit_type, master_output_dir_list[8:10]) stage_output_pattern_list = uu.alter_patterns(sensit_type, master_output_pattern_list[8:10]) @@ -525,7 +525,7 @@ def main (): for key, values in download_dict.items(): dir = key pattern = values[0] - uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list) + uu.s3_flexible_download(dir, pattern, cn.docker_tile_dir, sensit_type, tile_id_list) # For multiprocessing @@ -563,7 +563,7 @@ def main (): cn.precip_processed_dir: [cn.pattern_precip], cn.elevation_processed_dir: [cn.pattern_elevation], cn.soil_C_full_extent_2000_dir: [cn.pattern_soil_C_full_extent_2000], - cn.gain_dir: [cn.pattern_gain], + cn.gain_dir: [cn.pattern_gain_data_lake], cn.cumul_gain_AGCO2_mangrove_dir: [cn.pattern_cumul_gain_AGCO2_mangrove], cn.cumul_gain_AGCO2_planted_forest_non_mangrove_dir: [cn.pattern_cumul_gain_AGCO2_planted_forest_non_mangrove], cn.cumul_gain_AGCO2_natrl_forest_dir: [cn.pattern_cumul_gain_AGCO2_natrl_forest], @@ -588,22 +588,22 @@ def main (): tile_id_list = uu.tile_list_s3(cn.Brazil_forest_extent_2000_processed_dir) # tile_id_list = ['00N_050W'] uu.print_log(tile_id_list) - uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n") + uu.print_log(f'There are {str(len(tile_id_list))} tiles to process', "\n") for key, values in download_dict.items(): dir = key pattern = values[0] - uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list) + uu.s3_flexible_download(dir, pattern, cn.docker_tile_dir, sensit_type, tile_id_list) # If the model run isn't the standard one, the output directory and file names are changed if sensit_type != 'std': - uu.print_log("Changing output directory and file name pattern based on sensitivity analysis") + uu.print_log('Changing output directory and file name pattern based on sensitivity analysis') stage_output_dir_list = uu.alter_dirs(sensit_type, master_output_dir_list[10:16]) stage_output_pattern_list = uu.alter_patterns(sensit_type, master_output_pattern_list[10:16]) # Table with IPCC Wetland Supplement Table 4.4 default mangrove removals rates - cmd = ['aws', 's3', 'cp', os.path.join(cn.gain_spreadsheet_dir, cn.gain_spreadsheet), cn.docker_base_dir] + cmd = ['aws', 's3', 'cp', os.path.join(cn.gain_spreadsheet_dir, cn.gain_spreadsheet), cn.docker_tile_dir] # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging process = Popen(cmd, stdout=PIPE, stderr=STDOUT) @@ -675,7 +675,7 @@ def main (): uu.upload_final_set(stage_output_dir_list[0], stage_output_pattern_list[0]) else: - uu.exception_log(no_upload, "Extent argument not valid") + uu.exception_log("Extent argument not valid") uu.print_log("Creating tiles of belowground carbon") # 18 processors used between 300 and 400 GB memory, so it was okay on a r4.16xlarge spot machine @@ -749,7 +749,7 @@ def main (): uu.print_log("Skipping soil for 2000 carbon pool calculation") else: - uu.exception_log(no_upload, "Extent argument not valid") + uu.exception_log("Extent argument not valid") uu.print_log("Creating tiles of total carbon") # I tried several different processor numbers for this. Ended up using 14 processors, which used about 380 GB memory diff --git a/__init__.py b/test/__init__.py similarity index 100% rename from __init__.py rename to test/__init__.py diff --git a/burn_date/__init__.py b/test/carbon_pools/__init__.py similarity index 100% rename from burn_date/__init__.py rename to test/carbon_pools/__init__.py diff --git a/test/carbon_pools/conftest.py b/test/carbon_pools/conftest.py new file mode 100644 index 00000000..6aa12941 --- /dev/null +++ b/test/carbon_pools/conftest.py @@ -0,0 +1,48 @@ +import glob +import numpy as np +import os +import pytest +import rasterio +import constants_and_names as cn +from carbon_pools.create_carbon_pools import prepare_gain_table, mangrove_pool_ratio_dict + +# Makes mangrove BGC:AGC dictionary for different continent-ecozone combinations +@pytest.fixture(scope='session') +def create_BGC_dictionary(): + + gain_table_simplified = prepare_gain_table() + + mang_BGB_AGB_ratio = mangrove_pool_ratio_dict(gain_table_simplified, + cn.below_to_above_trop_dry_mang, + cn.below_to_above_trop_wet_mang, + cn.below_to_above_subtrop_mang) + + return mang_BGB_AGB_ratio + + +# Makes mangrove deadwood:AGC dictionary for different continent-ecozone combinations +@pytest.fixture(scope='session') +def create_deadwood_dictionary(): + + gain_table_simplified = prepare_gain_table() + + mang_deadwood_AGB_ratio = mangrove_pool_ratio_dict(gain_table_simplified, + cn.deadwood_to_above_trop_dry_mang, + cn.deadwood_to_above_trop_wet_mang, + cn.deadwood_to_above_subtrop_mang) + + return mang_deadwood_AGB_ratio + + +# Makes mangrove litter:AGC dictionary for different continent-ecozone combinations +@pytest.fixture(scope='session') +def create_litter_dictionary(): + + gain_table_simplified = prepare_gain_table() + + mang_litter_AGB_ratio = mangrove_pool_ratio_dict(gain_table_simplified, + cn.litter_to_above_trop_dry_mang, + cn.litter_to_above_trop_wet_mang, + cn.litter_to_above_subtrop_mang) + + return mang_litter_AGB_ratio diff --git a/test/carbon_pools/test_BGC_rasterio.py b/test/carbon_pools/test_BGC_rasterio.py new file mode 100644 index 00000000..45389be7 --- /dev/null +++ b/test/carbon_pools/test_BGC_rasterio.py @@ -0,0 +1,96 @@ +import cProfile +import glob +import numpy as np +import os +import pytest +import rasterio +import sys +import universal_util as uu +import constants_and_names as cn +from unittest.mock import patch +from carbon_pools.create_carbon_pools import create_BGC + +import test.test_utilities as tu + + +# run from /usr/local/app +# pytest -m BGC -s +# Good test coordinates in GIS are -0.0002 S, 9.549 E (has two mangrove loss pixels adjacent to a few non-mangrove loss pixels) + +# @pytest.mark.xfail +@patch("universal_util.sensit_tile_rename") +@patch("universal_util.sensit_tile_rename_biomass") +@patch("universal_util.make_tile_name") +@patch("universal_util.upload_log") +@pytest.mark.rasterio +@pytest.mark.BGC +@pytest.mark.parametrize("comparison_dict", [{cn.BGC_emis_year_dir: cn.pattern_BGC_emis_year}]) + +def test_rasterio_runs(upload_log_dummy, make_tile_name_fake, sensit_tile_rename_biomass_fake, sensit_tile_rename_fake, + delete_old_outputs, create_BGC_dictionary, comparison_dict): + + ### arrange + # tile_id for testing and the extent that should be tested within it + tile_id = "00N_000E" + xmin = 0 + ymin = -0.005 + xmax = 10 + ymax = 0 + + # Dictionary of tiles needed for test + input_dict = {cn.cont_eco_dir: cn.pattern_cont_eco_processed, + cn.AGC_emis_year_dir: cn.pattern_AGC_emis_year, + cn.BGB_AGB_ratio_dir: cn.pattern_BGB_AGB_ratio, + cn.removal_forest_type_dir: cn.pattern_removal_forest_type} + + # Makes input tiles for process being tested in specified test area + tu.make_test_tiles(tile_id, input_dict, cn.pattern_test_suffix, cn.test_data_dir, xmin, ymin, xmax, ymax) + + test_input_pattern = list(comparison_dict.values())[0] + + # Makes comparison tiles for output in specified test area + tu.make_test_tiles(tile_id, comparison_dict, cn.pattern_comparison_suffix, cn.test_data_dir, xmin, ymin, xmax, ymax) + + # Deletes outputs of previous run if they exist. + # Only runs before first parametrized run to avoid deleting the difference raster created from previous parametrizations + print(delete_old_outputs) + + # Makes mangrove BGC:AGC dictionary for different continent-ecozone combinations + BGC_dict = create_BGC_dictionary + + # Renames the input test tiles with the test suffix (except for biomass, which has its own rule) + def fake_impl_sensit_tile_rename(sensit_type, tile_id, raw_pattern): + return f"test/test_data/{tile_id}_{raw_pattern}_{cn.pattern_test_suffix}.tif" + sensit_tile_rename_fake.side_effect = fake_impl_sensit_tile_rename + + # Renames the input biomass tile with the test suffix + def fake_impl_sensit_tile_rename_biomass(sensit_type, tile_id): + return f"test/test_data/{tile_id}_t_aboveground_biomass_ha_2000_{cn.pattern_test_suffix}.tif" + sensit_tile_rename_biomass_fake.side_effect = fake_impl_sensit_tile_rename_biomass + + # Makes the output tile names with the test suffix + def fake_impl_make_tile_name(tile_id, out_pattern): + return f"test/test_data/tmp_out/{tile_id}_{out_pattern}_{cn.pattern_test_suffix}.tif" + make_tile_name_fake.side_effect = fake_impl_make_tile_name + + ### act + # Creates the fragment output tiles + create_BGC(tile_id=tile_id, + mang_BGB_AGB_ratio=BGC_dict, + carbon_pool_extent=['loss']) + + + ### assert + # The original and new rasters that need to be compared + original_raster = f'{cn.test_data_dir}{tile_id}_{test_input_pattern}_{cn.pattern_comparison_suffix}.tif' + # original_raster = f'{cn.test_data_dir}{tile_id}_{cn.pattern_deadwood_emis_year_2000}_{cn.pattern_comparison_suffix}.tif' # For forcing failure of litter test (compares litter to deadwood) + new_raster = f'{cn.test_data_out_dir}{tile_id}_{test_input_pattern}_{cn.pattern_test_suffix}.tif' + # new_raster = f'{cn.test_data_out_dir}{tile_id}_{cn.pattern_litter_emis_year_2000}_{cn.pattern_test_suffix}.tif' # For forcing failure of deadwood test (compares deadwood to litter) + + # # Converts the original and new rasters into numpy arrays for comparison. + # # Also creates a difference raster for visualization (not used in testing). + # # original_raster is from the previous run of the model. new_raster is the developmental version. + tu.assert_make_test_arrays_and_difference(original_raster, new_raster, tile_id, test_input_pattern) + + pr.disable() + pr.print_stats() diff --git a/test/carbon_pools/test_deadwood_litter_equations.py b/test/carbon_pools/test_deadwood_litter_equations.py new file mode 100644 index 00000000..d118bc08 --- /dev/null +++ b/test/carbon_pools/test_deadwood_litter_equations.py @@ -0,0 +1,158 @@ +import numpy as np +import pytest + +from carbon_pools.create_carbon_pools import create_deadwood_litter, deadwood_litter_equations + + + +def test_deadwood_litter_equations_can_be_called(): + result = deadwood_litter_equations( + bor_tem_trop_window=np.zeros((1, 1), dtype='float32'), + deadwood_2000_output=np.zeros((1, 1), dtype='float32'), + elevation_window=np.zeros((1, 1), dtype='float32'), + litter_2000_output=np.zeros((1, 1), dtype='float32'), + natrl_forest_biomass_window=np.zeros((1, 1), dtype='float32'), + precip_window=np.zeros((1, 1), dtype='float32') + ) + +def test_deadwood_litter_equations_return_zero_deadwood_for_zero_biomass(): + deadwood, _ = deadwood_litter_equations( + bor_tem_trop_window=np.zeros((1, 1), dtype='float32'), + deadwood_2000_output=np.zeros((1, 1), dtype='float32'), + elevation_window=np.zeros((1, 1), dtype='float32'), + litter_2000_output=np.zeros((1, 1), dtype='float32'), + natrl_forest_biomass_window=np.zeros((1, 1), dtype='float32'), + precip_window=np.zeros((1, 1), dtype='float32') + ) + assert deadwood == np.array([0.]) + +def test_deadwood_litter_equations_return_zero_litter_for_zero_biomass(): + _, litter = deadwood_litter_equations( + bor_tem_trop_window=np.zeros((1, 1), dtype='float32'), + deadwood_2000_output=np.zeros((1, 1), dtype='float32'), + elevation_window=np.zeros((1, 1), dtype='float32'), + litter_2000_output=np.zeros((1, 1), dtype='float32'), + natrl_forest_biomass_window=np.zeros((1, 1), dtype='float32'), + precip_window=np.zeros((1, 1), dtype='float32') + ) + assert litter == np.array([0.]) + + +# Scenario 1- tropical, low elevation, low precipitation +def test_deadwood_litter_equations_return_zero_deadwood__tropical_low_elev_low_precip(): + deadwood, _ = deadwood_litter_equations( + bor_tem_trop_window=np.array([1], dtype='float32'), + deadwood_2000_output=np.array([0], dtype='float32'), + elevation_window=np.array([1], dtype='float32'), + litter_2000_output=np.array([0], dtype='float32'), + natrl_forest_biomass_window=np.array([1], dtype='float32'), + precip_window=np.array([1], dtype='float32') + ) + assert deadwood == np.array([0.0094], dtype='float32') + +def test_deadwood_litter_equations_return_zero_litter__tropical_low_elev_low_precip(): + _, litter = deadwood_litter_equations( + bor_tem_trop_window=np.array([1], dtype='float32'), + deadwood_2000_output=np.array([0], dtype='float32'), + elevation_window=np.array([1], dtype='float32'), + litter_2000_output=np.array([0], dtype='float32'), + natrl_forest_biomass_window=np.array([1], dtype='float32'), + precip_window=np.array([1], dtype='float32') + ) + assert litter == np.array([0.0148], dtype='float32') + + +# Scenario 2- tropical, low elevation, moderate precipitation +def test_deadwood_litter_equations_return_zero_deadwood__tropical_low_elev_mod_precip(): + deadwood, _ = deadwood_litter_equations( + bor_tem_trop_window=np.array([1], dtype='float32'), + deadwood_2000_output=np.array([0], dtype='float32'), + elevation_window=np.array([1], dtype='float32'), + litter_2000_output=np.array([0], dtype='float32'), + natrl_forest_biomass_window=np.array([100], dtype='float32'), + precip_window=np.array([1600], dtype='float32') + ) + assert deadwood == np.array([0.47], dtype='float32') + +def test_deadwood_litter_equations_return_zero_litter__tropical_low_elev_mod_precip(): + _, litter = deadwood_litter_equations( + bor_tem_trop_window=np.array([1], dtype='float32'), + deadwood_2000_output=np.array([0], dtype='float32'), + elevation_window=np.array([1], dtype='float32'), + litter_2000_output=np.array([0], dtype='float32'), + natrl_forest_biomass_window=np.array([100], dtype='float32'), + precip_window=np.array([1600], dtype='float32') + ) + assert litter == np.array([0.37], dtype='float32') + + +# Scenario 3- tropical, low elevation, high precipitation +def test_deadwood_litter_equations_return_zero_deadwood__tropical_low_elev_high_precip(): + deadwood, _ = deadwood_litter_equations( + bor_tem_trop_window=np.array([1], dtype='float32'), + deadwood_2000_output=np.array([0], dtype='float32'), + elevation_window=np.array([1], dtype='float32'), + litter_2000_output=np.array([0], dtype='float32'), + natrl_forest_biomass_window=np.array([100], dtype='float32'), + precip_window=np.array([1601], dtype='float32') + ) + assert deadwood == np.array([2.82], dtype='float32') + +def test_deadwood_litter_equations_return_zero_litter__tropical_low_elev_high_precip(): + _, litter = deadwood_litter_equations( + bor_tem_trop_window=np.array([1], dtype='float32'), + deadwood_2000_output=np.array([0], dtype='float32'), + elevation_window=np.array([1], dtype='float32'), + litter_2000_output=np.array([0], dtype='float32'), + natrl_forest_biomass_window=np.array([100], dtype='float32'), + precip_window=np.array([1601], dtype='float32') + ) + assert litter == np.array([0.37], dtype='float32') + + +# Scenario 4- tropical, high elevation, any precipitation +def test_deadwood_litter_equations_return_zero_deadwood__tropical_high_elev_any_precip(): + deadwood, _ = deadwood_litter_equations( + bor_tem_trop_window=np.array([1], dtype='float32'), + deadwood_2000_output=np.array([0], dtype='float32'), + elevation_window=np.array([2001], dtype='float32'), + litter_2000_output=np.array([0], dtype='float32'), + natrl_forest_biomass_window=np.array([100], dtype='float32'), + precip_window=np.array([1], dtype='float32') + ) + assert deadwood == np.array([3.29], dtype='float32') + +def test_deadwood_litter_equations_return_zero_litter__tropical_high_elev_any_precip(): + _, litter = deadwood_litter_equations( + bor_tem_trop_window=np.array([1], dtype='float32'), + deadwood_2000_output=np.array([0], dtype='float32'), + elevation_window=np.array([2001], dtype='float32'), + litter_2000_output=np.array([0], dtype='float32'), + natrl_forest_biomass_window=np.array([100], dtype='float32'), + precip_window=np.array([1], dtype='float32') + ) + assert litter == np.array([0.37], dtype='float32') + + +# Scenario 5- non-tropical, any elevation, any precipitation +def test_deadwood_litter_equations_return_zero_deadwood__non_tropical_any_elev_any_precip(): + deadwood, _ = deadwood_litter_equations( + bor_tem_trop_window=np.array([2], dtype='float32'), + deadwood_2000_output=np.array([0], dtype='float32'), + elevation_window=np.array([1], dtype='float32'), + litter_2000_output=np.array([0], dtype='float32'), + natrl_forest_biomass_window=np.array([100], dtype='float32'), + precip_window=np.array([1], dtype='float32') + ) + assert deadwood == np.array([3.76], dtype='float32') + +def test_deadwood_litter_equations_return_zero_litter__non_tropical_any_elev_any_precip(): + _, litter = deadwood_litter_equations( + bor_tem_trop_window=np.array([2], dtype='float32'), + deadwood_2000_output=np.array([0], dtype='float32'), + elevation_window=np.array([1], dtype='float32'), + litter_2000_output=np.array([0], dtype='float32'), + natrl_forest_biomass_window=np.array([100], dtype='float32'), + precip_window=np.array([1], dtype='float32') + ) + assert litter == np.array([1.48], dtype='float32') diff --git a/test/carbon_pools/test_deadwood_litter_rasterio.py b/test/carbon_pools/test_deadwood_litter_rasterio.py new file mode 100644 index 00000000..af763997 --- /dev/null +++ b/test/carbon_pools/test_deadwood_litter_rasterio.py @@ -0,0 +1,108 @@ +import cProfile +import glob +import numpy as np +import os +import pytest +import rasterio +import sys +import universal_util as uu +import constants_and_names as cn +from unittest.mock import patch +from carbon_pools.create_carbon_pools import create_deadwood_litter + +import test.test_utilities as tu + + +# run from /usr/local/app +# pytest -m rasterio -s +# pytest -m deadwood_litter -s +# Good test coordinates in GIS are -0.0002 S, 9.549 E (has two mangrove loss pixels adjacent to a few non-mangrove loss pixels) + +@pytest.mark.xfail +@patch("universal_util.sensit_tile_rename") +@patch("universal_util.sensit_tile_rename_biomass") +@patch("universal_util.make_tile_name") +@patch("universal_util.upload_log") +@pytest.mark.rasterio +@pytest.mark.deadwood_litter +@pytest.mark.parametrize("comparison_dict", [{cn.deadwood_emis_year_2000_dir: cn.pattern_deadwood_emis_year_2000} + ,{cn.litter_emis_year_2000_dir: cn.pattern_litter_emis_year_2000} + ]) + +def test_rasterio_runs(upload_log_dummy, make_tile_name_fake, sensit_tile_rename_biomass_fake, sensit_tile_rename_fake, + delete_old_outputs, create_deadwood_dictionary, create_litter_dictionary, comparison_dict): + + # # cProfile profiler + # pr=cProfile.Profile() + # pr.enable() + + ### arrange + # tile_id for testing and the extent that should be tested within it + tile_id = "00N_000E" + xmin = 0 + ymin = -0.005 + xmax = 10 + ymax = 0 + + # Dictionary of tiles needed for test + input_dict = {cn.mangrove_biomass_2000_dir: cn.pattern_mangrove_biomass_2000, + cn.cont_eco_dir: cn.pattern_cont_eco_processed, + cn.precip_processed_dir: cn.pattern_precip, + cn.elevation_processed_dir: cn.pattern_elevation, + cn.bor_tem_trop_processed_dir: cn.pattern_bor_tem_trop_processed, + cn.WHRC_biomass_2000_unmasked_dir: cn.pattern_WHRC_biomass_2000_unmasked, + cn.AGC_emis_year_dir: cn.pattern_AGC_emis_year} + + # Makes input tiles for process being tested in specified test area + tu.make_test_tiles(tile_id, input_dict, cn.pattern_test_suffix, cn.test_data_dir, xmin, ymin, xmax, ymax) + + test_input_pattern = list(comparison_dict.values())[0] + + # Makes comparison tiles for output in specified test area + tu.make_test_tiles(tile_id, comparison_dict, cn.pattern_comparison_suffix, cn.test_data_dir, xmin, ymin, xmax, ymax) + + # Deletes outputs of previous run if they exist. + # Only runs before first parametrized run to avoid deleting the difference raster created from previous parametrizations + print(delete_old_outputs) + + # Makes mangrove deadwood:AGC and litter:AGC dictionaries for different continent-ecozone combinations + deadwood_dict = create_deadwood_dictionary + litter_dict = create_litter_dictionary + + # Renames the input test tiles with the test suffix (except for biomass, which has its own rule) + def fake_impl_sensit_tile_rename(sensit_type, tile_id, raw_pattern): + return f"test/test_data/{tile_id}_{raw_pattern}_{cn.pattern_test_suffix}.tif" + sensit_tile_rename_fake.side_effect = fake_impl_sensit_tile_rename + + # Renames the input biomass tile with the test suffix + def fake_impl_sensit_tile_rename_biomass(sensit_type, tile_id): + return f"test/test_data/{tile_id}_t_aboveground_biomass_ha_2000_{cn.pattern_test_suffix}.tif" + sensit_tile_rename_biomass_fake.side_effect = fake_impl_sensit_tile_rename_biomass + + # Makes the output tile names with the test suffix + def fake_impl_make_tile_name(tile_id, out_pattern): + return f"test/test_data/tmp_out/{tile_id}_{out_pattern}_{cn.pattern_test_suffix}.tif" + make_tile_name_fake.side_effect = fake_impl_make_tile_name + + ### act + # Creates the fragment output tiles + create_deadwood_litter(tile_id=tile_id, + mang_deadwood_AGB_ratio=deadwood_dict, + mang_litter_AGB_ratio=litter_dict, + carbon_pool_extent=['loss']) + + + ### assert + # The original and new rasters that need to be compared + original_raster = f'{cn.test_data_dir}{tile_id}_{test_input_pattern}_{cn.pattern_comparison_suffix}.tif' + # original_raster = f'{cn.test_data_dir}{tile_id}_{cn.pattern_deadwood_emis_year_2000}_{cn.pattern_comparison_suffix}.tif' # For forcing failure of litter test (compares litter to deadwood) + new_raster = f'{cn.test_data_out_dir}{tile_id}_{test_input_pattern}_{cn.pattern_test_suffix}.tif' + # new_raster = f'{cn.test_data_out_dir}{tile_id}_{cn.pattern_litter_emis_year_2000}_{cn.pattern_test_suffix}.tif' # For forcing failure of deadwood test (compares deadwood to litter) + + # # Converts the original and new rasters into numpy arrays for comparison. + # # Also creates a difference raster for visualization (not used in testing). + # # original_raster is from the previous run of the model. new_raster is the developmental version. + tu.assert_make_test_arrays_and_difference(original_raster, new_raster, tile_id, test_input_pattern) + + # pr.disable() + # pr.print_stats() diff --git a/test/conftest.py b/test/conftest.py new file mode 100644 index 00000000..2736a8b3 --- /dev/null +++ b/test/conftest.py @@ -0,0 +1,73 @@ +import glob +import numpy as np +import os +import pytest +import rasterio +import constants_and_names as cn + +# Deletes outputs of previous run if they exist. +# This fixture runs only before the first parametrized run, per https://stackoverflow.com/a/62288070. +@pytest.fixture(scope='session') +def delete_old_outputs(): + + out_tests = glob.glob(f'{cn.test_data_out_dir}*.tif') + for f in out_tests: + os.remove(f) + print(f"Deleted {f}") + + + +# Makes test tile fragments of specified size for testing purposes using vsis3 (rather than downloading full rasters to Docker instance) +def make_test_tiles(tile_id, input_dict, test_suffix, out_dir, xmin, ymin, xmax, ymax): + + # Iterates through all input files + for key, pattern in input_dict.items(): + + # Directory for vsis3 for input file + s3_dir = f'{key}'[5:] + vsis3_dir = f'/vsis3/{s3_dir}' + + # The full tile name and the test tile fragment name + in_file = f'{vsis3_dir}{tile_id}_{pattern}.tif' + out_file = f'{out_dir}{tile_id}_{pattern}_{test_suffix}.tif' + + # Skips creating the test tile fragment if it already exists + if os.path.exists(out_file): + uu.print_log(f'{out_file} already exists. Not creating.') + continue + + uu.print_log(f'Making test tile {out_file}') + + # Makes the test tile fragment + cmd = ['gdalwarp', '-tr', '{}'.format(str(cn.Hansen_res)), '{}'.format(str(cn.Hansen_res)), + '-co', 'COMPRESS=DEFLATE', '-tap', '-te', str(xmin), str(ymin), str(xmax), str(ymax), + '-dstnodata', '0', '-t_srs', 'EPSG:4326', '-overwrite', in_file, out_file] + uu.log_subprocess_output_full(cmd) + + +# Converts two rasters into numpy arrays, which can be compared in an assert statement. +# Also creates a raster that's the difference between the two compared rasters. Not used in assert statement. +# original_raster is from the previous run of the model. new_raster is the developmental version. +def assert_make_test_arrays_and_difference(original_raster, new_raster, tile_id, pattern): + + print(f'Comparing {new_raster} to {original_raster}') + + array_original = rasterio.open(original_raster).read() + array_new = rasterio.open(new_raster).read() + + # Array that is difference between the original and new rasters. Not used for testing, just for visualization. + difference = array_original - array_new + + # Saves the difference raster + with rasterio.open(original_raster) as src: + dsm_meta = src.profile + + diff_saved = f'{cn.test_data_out_dir}{tile_id}_{pattern}_{cn.pattern_test_suffix}_difference.tif' + + with rasterio.open(diff_saved, 'w', **dsm_meta) as diff_out: + diff_out.write(difference) + + # https://numpy.org/doc/stable/reference/generated/numpy.testing.assert_equal.html#numpy.testing.assert_equal + np.testing.assert_equal(array_original, array_new) + + print('\n') \ No newline at end of file diff --git a/test/removals/test_annual_removals_all_forest_types_rasterio.py b/test/removals/test_annual_removals_all_forest_types_rasterio.py new file mode 100644 index 00000000..1c61721a --- /dev/null +++ b/test/removals/test_annual_removals_all_forest_types_rasterio.py @@ -0,0 +1,125 @@ +import glob +import numpy as np +import os +import pytest +import rasterio +import sys +import universal_util as uu +import constants_and_names as cn +from unittest.mock import patch +from removals.annual_gain_rate_AGC_BGC_all_forest_types import annual_gain_rate_AGC_BGC_all_forest_types + +import test.test_utilities as tu + + +# run from /usr/local/app +# pytest -m all_removals -s + +# @pytest.mark.xfail +@patch("universal_util.sensit_tile_rename") +@patch("universal_util.make_tile_name") +@patch("universal_util.upload_log") +@pytest.mark.rasterio +@pytest.mark.all_removals +@pytest.mark.parametrize("comparison_dict", [ + {cn.removal_forest_type_dir: cn.pattern_removal_forest_type}, + {cn.annual_gain_AGC_all_types_dir: cn.pattern_annual_gain_AGC_all_types}, + {cn.annual_gain_BGC_all_types_dir: cn.pattern_annual_gain_BGC_all_types}, + {cn.annual_gain_AGC_BGC_all_types_dir: cn.pattern_annual_gain_AGC_BGC_all_types}, + {cn.stdev_annual_gain_AGC_all_types_dir: cn.pattern_stdev_annual_gain_AGC_all_types} + ]) + +def test_rasterio_runs(upload_log_dummy, make_tile_name_fake, sensit_tile_rename_fake, + delete_old_outputs, comparison_dict): + + ### arrange + # # tile_id for testing and the extent that should be tested within it + + # # For 40N_020E, AGC changes with using BGB:AGB map because European removal factor tiles are AGC+BGC, + # # so making the composite AGC tiles from that depends on the BGC ratio. 40N_020E seems to work fine. + # tile_id = "40N_020E" + # xmin = 20 + # ymax = 40 + # xmax = xmin + 10 + # ymin = ymax - 0.005 + + # For 40N_090W, AGC changes with using BGB:AGB map because US removal factor tiles are AGC+BGC, + # so making the composite AGC tiles from that depends on the BGC ratio. 40N_090W seems to work fine. + tile_id = "40N_090W" + xmin = -90 + ymax = 40 + xmax = xmin + 10 + ymin = ymax - 0.005 + + # tile_id = "00N_000E" + # xmin = 0 + # ymax = 0 + # xmax = 10 + # ymin = -0.005 + + + # Dictionary of tiles needed for test + input_dict = { + cn.model_extent_dir: cn.pattern_model_extent, + cn.annual_gain_AGB_mangrove_dir: cn.pattern_annual_gain_AGB_mangrove, + cn.annual_gain_BGB_mangrove_dir: cn.pattern_annual_gain_BGB_mangrove, + cn.annual_gain_AGC_BGC_natrl_forest_Europe_dir: cn.pattern_annual_gain_AGC_BGC_natrl_forest_Europe, + cn.annual_gain_AGC_BGC_planted_forest_unmasked_dir: cn.pattern_annual_gain_AGC_BGC_planted_forest_unmasked, + cn.annual_gain_AGC_BGC_natrl_forest_US_dir: cn.pattern_annual_gain_AGC_BGC_natrl_forest_US, + cn.annual_gain_AGC_natrl_forest_young_dir: cn.pattern_annual_gain_AGC_natrl_forest_young, + cn.age_cat_IPCC_dir: cn.pattern_age_cat_IPCC, + cn.annual_gain_AGB_IPCC_defaults_dir: cn.pattern_annual_gain_AGB_IPCC_defaults, + cn.BGB_AGB_ratio_dir: cn.pattern_BGB_AGB_ratio, + + cn.stdev_annual_gain_AGB_mangrove_dir: cn.pattern_stdev_annual_gain_AGB_mangrove, + cn.stdev_annual_gain_AGC_BGC_natrl_forest_Europe_dir: cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_Europe, + cn.stdev_annual_gain_AGC_BGC_planted_forest_unmasked_dir: cn.pattern_stdev_annual_gain_AGC_BGC_planted_forest_unmasked, + cn.stdev_annual_gain_AGC_BGC_natrl_forest_US_dir: cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_US, + cn.stdev_annual_gain_AGC_natrl_forest_young_dir: cn.pattern_stdev_annual_gain_AGC_natrl_forest_young, + cn.stdev_annual_gain_AGB_IPCC_defaults_dir: cn.pattern_stdev_annual_gain_AGB_IPCC_defaults + } + + output_pattern_list = [cn.pattern_removal_forest_type, cn.pattern_annual_gain_AGC_all_types, + cn.pattern_annual_gain_BGC_all_types, cn.pattern_annual_gain_AGC_BGC_all_types, + cn.pattern_stdev_annual_gain_AGC_all_types] + + # Makes input tiles for process being tested in specified test area + tu.make_test_tiles(tile_id, input_dict, cn.pattern_test_suffix, cn.test_data_dir, xmin, ymin, xmax, ymax) + + test_input_pattern = list(comparison_dict.values())[0] + + # Makes comparison tiles for output in specified test area + uu.print_log("Making comparison tile for output tile type") + tu.make_test_tiles(tile_id, comparison_dict, cn.pattern_comparison_suffix, cn.test_data_dir, xmin, ymin, xmax, ymax) + + # Deletes outputs of previous run if they exist. + # Only runs before first parametrized run to avoid deleting the difference raster created from previous parametrizations + print(delete_old_outputs) + + # Renames the input test tiles with the test suffix (except for biomass, which has its own rule) + def fake_impl_sensit_tile_rename(sensit_type, tile_id, raw_pattern): + return f"test/test_data/{tile_id}_{raw_pattern}_{cn.pattern_test_suffix}.tif" + sensit_tile_rename_fake.side_effect = fake_impl_sensit_tile_rename + + # Makes the output tile names with the test suffix + def fake_impl_make_tile_name(tile_id, out_pattern): + return f"test/test_data/tmp_out/{tile_id}_{out_pattern}_{cn.pattern_test_suffix}.tif" + make_tile_name_fake.side_effect = fake_impl_make_tile_name + + ### act + # Creates the fragment output tiles + annual_gain_rate_AGC_BGC_all_forest_types(tile_id=tile_id, + output_pattern_list = output_pattern_list) + + + ### assert + # The original and new rasters that need to be compared + original_raster = f'{cn.test_data_dir}{tile_id}_{test_input_pattern}_{cn.pattern_comparison_suffix}.tif' + # original_raster = f'{cn.test_data_dir}{tile_id}_{cn.pattern_deadwood_emis_year_2000}_{cn.pattern_comparison_suffix}.tif' # For forcing failure of litter test (compares litter to deadwood) + new_raster = f'{cn.test_data_out_dir}{tile_id}_{test_input_pattern}_{cn.pattern_test_suffix}.tif' + # new_raster = f'{cn.test_data_out_dir}{tile_id}_{cn.pattern_litter_emis_year_2000}_{cn.pattern_test_suffix}.tif' # For forcing failure of deadwood test (compares deadwood to litter) + + # # Converts the original and new rasters into numpy arrays for comparison. + # # Also creates a difference raster for visualization (not used in testing). + # # original_raster is from the previous run of the model. new_raster is the developmental version. + tu.assert_make_test_arrays_and_difference(original_raster, new_raster, tile_id, test_input_pattern) diff --git a/test/test_data/00N_000E_Mg_AGC_ha_emis_year_top_005deg.tif b/test/test_data/00N_000E_Mg_AGC_ha_emis_year_top_005deg.tif new file mode 100644 index 00000000..c12a9a1b Binary files /dev/null and b/test/test_data/00N_000E_Mg_AGC_ha_emis_year_top_005deg.tif differ diff --git a/test/test_data/00N_000E_elevation_top_005deg.tif b/test/test_data/00N_000E_elevation_top_005deg.tif new file mode 100644 index 00000000..bd0d916a Binary files /dev/null and b/test/test_data/00N_000E_elevation_top_005deg.tif differ diff --git a/test/test_data/00N_000E_fao_ecozones_bor_tem_tro_processed_top_005deg.tif b/test/test_data/00N_000E_fao_ecozones_bor_tem_tro_processed_top_005deg.tif new file mode 100644 index 00000000..11eaa67e Binary files /dev/null and b/test/test_data/00N_000E_fao_ecozones_bor_tem_tro_processed_top_005deg.tif differ diff --git a/test/test_data/00N_000E_fao_ecozones_continents_processed_top_005deg.tif b/test/test_data/00N_000E_fao_ecozones_continents_processed_top_005deg.tif new file mode 100644 index 00000000..32890461 Binary files /dev/null and b/test/test_data/00N_000E_fao_ecozones_continents_processed_top_005deg.tif differ diff --git a/test/test_data/00N_000E_mangrove_agb_t_ha_2000_top_005deg.tif b/test/test_data/00N_000E_mangrove_agb_t_ha_2000_top_005deg.tif new file mode 100644 index 00000000..8bf16b1f Binary files /dev/null and b/test/test_data/00N_000E_mangrove_agb_t_ha_2000_top_005deg.tif differ diff --git a/test/test_data/00N_000E_precip_mm_annual_top_005deg.tif b/test/test_data/00N_000E_precip_mm_annual_top_005deg.tif new file mode 100644 index 00000000..570135a6 Binary files /dev/null and b/test/test_data/00N_000E_precip_mm_annual_top_005deg.tif differ diff --git a/test/test_data/00N_000E_t_aboveground_biomass_ha_2000_top_005deg.tif b/test/test_data/00N_000E_t_aboveground_biomass_ha_2000_top_005deg.tif new file mode 100644 index 00000000..eff3826b Binary files /dev/null and b/test/test_data/00N_000E_t_aboveground_biomass_ha_2000_top_005deg.tif differ diff --git a/test/test_utilities.py b/test/test_utilities.py new file mode 100644 index 00000000..b1d593bf --- /dev/null +++ b/test/test_utilities.py @@ -0,0 +1,62 @@ +import glob +import numpy as np +import os +import pytest +import rasterio +import universal_util as uu +import constants_and_names as cn + +# Makes test tile fragments of specified size for testing purposes using vsis3 (rather than downloading full rasters to Docker instance) +def make_test_tiles(tile_id, input_dict, test_suffix, out_dir, xmin, ymin, xmax, ymax): + + # Iterates through all input files + for key, pattern in input_dict.items(): + + # Directory for vsis3 for input file + s3_dir = f'{key}'[5:] + vsis3_dir = f'/vsis3/{s3_dir}' + + # The full tile name and the test tile fragment name + in_file = f'{vsis3_dir}{tile_id}_{pattern}.tif' + out_file = f'{out_dir}{tile_id}_{pattern}_{test_suffix}.tif' + + # Skips creating the test tile fragment if it already exists + if os.path.exists(out_file): + uu.print_log(f'{out_file} already exists. Not creating.') + continue + + uu.print_log(f'Making test tile {out_file}') + + # Makes the test tile fragment + cmd = ['gdalwarp', '-tr', '{}'.format(str(cn.Hansen_res)), '{}'.format(str(cn.Hansen_res)), + '-co', 'COMPRESS=DEFLATE', '-tap', '-te', str(xmin), str(ymin), str(xmax), str(ymax), + '-dstnodata', '0', '-t_srs', 'EPSG:4326', '-overwrite', in_file, out_file] + uu.log_subprocess_output_full(cmd) + + +# Converts two rasters into numpy arrays, which can be compared in an assert statement. +# Also creates a raster that's the difference between the two compared rasters. Not used in assert statement. +# original_raster is from the previous run of the model. new_raster is the developmental version. +def assert_make_test_arrays_and_difference(original_raster, new_raster, tile_id, pattern): + + print(f'Comparing {new_raster} to {original_raster}') + + array_original = rasterio.open(original_raster).read() + array_new = rasterio.open(new_raster).read() + + # Array that is difference between the original and new rasters. Not used for testing, just for visualization. + difference = array_original - array_new + + # Saves the difference raster + with rasterio.open(original_raster) as src: + dsm_meta = src.profile + + diff_saved = f'{cn.test_data_out_dir}{tile_id}_{pattern}_{cn.pattern_test_suffix}_difference.tif' + + with rasterio.open(diff_saved, 'w', **dsm_meta) as diff_out: + diff_out.write(difference) + + # https://numpy.org/doc/stable/reference/generated/numpy.testing.assert_equal.html#numpy.testing.assert_equal + np.testing.assert_equal(array_original, array_new) + + print('\n') \ No newline at end of file diff --git a/universal_util.py b/universal_util.py index 3585dcf0..d58c545e 100644 --- a/universal_util.py +++ b/universal_util.py @@ -8,7 +8,7 @@ import logging import csv import psutil -from shutil import copyfile +from shutil import copyfile, move import os import multiprocessing from multiprocessing.pool import Pool @@ -39,10 +39,7 @@ def upload_log(): # Creates the log with a starting line -def initiate_log(tile_id_list=None, sensit_type=None, run_date=None, no_upload=None, - save_intermediates=None, stage_input=None, run_through=None, carbon_pool_extent=None, - emitted_pools=None, thresh=None, std_net_flux=None, - include_mangroves=None, include_us=None, log_note=None): +def initiate_log(tile_id_list): # For some reason, logging gets turned off when AWS credentials aren't provided. # This restores logging without AWS credentials. @@ -56,24 +53,29 @@ def initiate_log(tile_id_list=None, sensit_type=None, run_date=None, no_upload=N datefmt='%Y/%m/%d %I:%M:%S %p', level=logging.INFO) - logging.info("Log notes: {}".format(log_note)) - logging.info("Model version: {}".format(cn.version)) - logging.info("This is the start of the log for this model run. Below are the command line arguments for this run.") - logging.info("Sensitivity analysis type: {}".format(sensit_type)) - logging.info("Model stage argument: {}".format(stage_input)) - logging.info("Run model stages after the initial selected stage: {}".format(run_through)) - logging.info("Run date: {}".format(run_date)) - logging.info("Tile ID list: {}".format(tile_id_list)) - logging.info("Carbon emitted_pools to generate (optional): {}".format(carbon_pool_extent)) - logging.info("Emissions emitted_pools (optional): {}".format(emitted_pools)) - logging.info("TCD threshold for aggregated map (optional): {}".format(thresh)) - logging.info("Standard net flux for comparison with sensitivity analysis net flux (optional): {}".format(std_net_flux)) - logging.info("Include mangrove removal scripts in model run (optional): {}".format(include_mangroves)) - logging.info("Include US removal scripts in model run (optional): {}".format(include_us)) - logging.info("Do not upload anything to s3: {}".format(no_upload)) - logging.info("AWS credentials supplied: {}".format(check_aws_creds())) - logging.info("Save intermediate outputs: {}".format(save_intermediates)) - logging.info("AWS ec2 instance type and AMI ID:") + if cn.SENSIT_TYPE == 'std': + sensit_type = 'standard model' + else: + sensit_type = cn.SENSIT_TYPE + + logging.info(f'Log notes: {cn.LOG_NOTE}') + logging.info(f'Model version: {cn.version}') + logging.info(f'This is the start of the log for this model run. Below are the command line arguments for this run.') + logging.info(f'Sensitivity analysis type: {sensit_type}') + logging.info(f'Model stage argument: {cn.STAGE_INPUT}') + logging.info(f'Run model stages after the initial selected stage: {cn.RUN_THROUGH}') + logging.info(f'Run date: {cn.RUN_DATE}') + logging.info(f'Tile ID list: {tile_id_list}') + logging.info(f'Carbon emitted_pools to generate (optional): {cn.CARBON_POOL_EXTENT}') + logging.info(f'Emissions emitted_pools (optional): {cn.EMITTED_POOLS}') + logging.info(f'Standard net flux for comparison with sensitivity analysis net flux (optional): {cn.STD_NET_FLUX}') + logging.info(f'Include mangrove removal scripts in model run (optional): {cn.INCLUDE_MANGROVES}') + logging.info(f'Include US removal scripts in model run (optional): {cn.INCLUDE_US}') + logging.info(f'Do not upload anything to s3: {cn.NO_UPLOAD}') + logging.info(f'AWS credentials supplied: {check_aws_creds()}') + logging.info(f'Save intermediate outputs: {cn.SAVE_INTERMEDIATES}') + logging.info(f'Use single processor: {cn.SINGLE_PROCESSOR}') + logging.info(f'AWS ec2 instance type and AMI ID:') # https://stackoverflow.com/questions/13735051/how-to-capture-curl-output-to-a-file # https://stackoverflow.com/questions/625644/how-to-get-the-instance-id-from-within-an-ec2-instance @@ -90,27 +92,27 @@ def initiate_log(tile_id_list=None, sensit_type=None, run_date=None, no_upload=N type_file = open("instance_type.txt", "r") type_lines = type_file.readlines() for line in type_lines: - logging.info(" Instance type: {}".format(line.strip())) + logging.info(f' Instance type: {line.strip()}') ami_file = open("ami_id.txt", "r") ami_lines = ami_file.readlines() for line in ami_lines: - logging.info(" AMI ID: {}".format(line.strip())) + logging.info(f' AMI ID: {line.strip()}') os.remove("ami_id.txt") os.remove("instance_type.txt") except: - logging.info(" Not running on AWS ec2 instance") + logging.info(' Not running on AWS ec2 instance') - logging.info("Available processors: {}".format(cn.count) + "\n") + logging.info(f"Available processors: {cn.count}") # Suppresses logging from rasterio and botocore below ERROR level for the entire model logging.getLogger("rasterio").setLevel(logging.ERROR) # https://www.tutorialspoint.com/How-to-disable-logging-from-imported-modules-in-Python logging.getLogger("botocore").setLevel(logging.ERROR) # "Found credentials in environment variables." is logged by botocore: https://github.com/boto/botocore/issues/1841 # If no_upload flag is not activated, log is uploaded - if not no_upload: + if not cn.NO_UPLOAD: upload_log() @@ -138,7 +140,7 @@ def print_log(*args): # Logs fatal errors to the log txt, uploads to s3, and then terminates the program with an exception in the console -def exception_log(no_upload, *args): +def exception_log(*args): # Empty string full_statement = str(object='') @@ -151,7 +153,7 @@ def exception_log(no_upload, *args): logging.info(full_statement, stack_info=True) # If no_upload flag is not activated (by choice or by lack of AWS credentials), output is uploaded - if not no_upload: + if not cn.NO_UPLOAD: # Need to upload log before the exception stops the script upload_log() @@ -165,7 +167,7 @@ def exception_log(no_upload, *args): def log_subprocess_output(pipe): # Reads all the output into a string - for full_out in iter(pipe.readline, b''): # b'\n'-separated lines + for full_out in iter(pipe.readline, b''): # b"\n"-separated lines # Separates the string into an array, where each entry is one line of output line_array = full_out.splitlines() @@ -175,9 +177,6 @@ def log_subprocess_output(pipe): logging.info(line.decode("utf-8")) #https://stackoverflow.com/questions/37016946/remove-b-character-do-in-front-of-a-string-literal-in-python-3, answer by krock print(line.decode("utf-8")) - # logging.info("\n") - # print("\n") - # # After the subprocess finishes, the log is uploaded to s3. # # Having too many tiles finish running subprocesses at once can cause the upload to get overwhelmed and cause # # an error. So, I've commented out the log upload because it's not really necessary here. @@ -198,7 +197,7 @@ def log_subprocess_output_full(cmd): with pipe: # Reads all the output into a string - for full_out in iter(pipe.readline, b''): # b'\n'-separated lines + for full_out in iter(pipe.readline, b''): # b"\n"-separated lines # Separates the string into an array, where each entry is one line of output line_array = full_out.splitlines() @@ -210,8 +209,6 @@ def log_subprocess_output_full(cmd): print(line.decode( "utf-8")) # https://stackoverflow.com/questions/37016946/remove-b-character-do-in-front-of-a-string-literal-in-python-3, answer by krock - # logging.info("\n") - # print("\n") # # After the subprocess finishes, the log is uploaded to s3 # upload_log() @@ -236,8 +233,7 @@ def check_storage(): used_storage = df_output_lines[5][2] available_storage = df_output_lines[5][3] percent_storage_used = df_output_lines[5][4] - print_log("Storage used:", used_storage, "; Available storage:", available_storage, - "; Percent storage used:", percent_storage_used) + print_log(f'Storage used: {used_storage}; Available storage: {available_storage}; Percent storage used: {percent_storage_used}') # Obtains the absolute number of RAM gigabytes currently in use by the entire system (all processors). @@ -252,8 +248,8 @@ def check_memory(): print_log(f"Memory usage is: {round(used_memory,2)} GB out of {round(total_memory,2)} = {round(percent_memory,1)}% usage") if percent_memory > 99: - print_log("WARNING: MEMORY USAGE DANGEROUSLY HIGH! TERMINATING PROGRAM.") # Not sure if this is necessary - exception_log("EXCEPTION: MEMORY USAGE DANGEROUSLY HIGH! TERMINATING PROGRAM.") + print_log('WARNING: MEMORY USAGE DANGEROUSLY HIGH! TERMINATING PROGRAM.') # Not sure if this is necessary + exception_log('EXCEPTION: MEMORY USAGE DANGEROUSLY HIGH! TERMINATING PROGRAM.') # Not currently using because it shows 1 when using with multiprocessing @@ -292,7 +288,7 @@ def get_tile_type(tile_name): return tile_type -# Gets the tile id from the full tile name using a regular expression +# Gets the tile name from the full tile name using a regular expression def get_tile_name(tile): tile_name = os.path.split(tile)[1] @@ -308,6 +304,12 @@ def get_tile_dir(tile): return tile_dir +# Makes a complete tile name out of component tile id and pattern +def make_tile_name(tile_id, pattern): + + return f'{tile_id}_{pattern}.tif' + + # Lists the tiles in a folder in s3 def tile_list_s3(source, sensit_type='std'): @@ -320,7 +322,7 @@ def tile_list_s3(source, sensit_type='std'): else: new_source = source.replace('standard', sensit_type) - print_log('\n' + "Creating list of tiles in", new_source) + print_log("\n" + f'Creating list of tiles in {new_source}') ## For an s3 folder in a bucket using AWSCLI # Captures the list of the files in the folder @@ -338,8 +340,8 @@ def tile_list_s3(source, sensit_type='std'): # Iterates through the text file to get the names of the tiles and appends them to list with open(os.path.join(cn.docker_tmp, 'tiles.txt'), 'r') as tile: for line in tile: - num = len(line.strip('\n').split(" ")) - tile_name = line.strip('\n').split(" ")[num - 1] + num = len(line.strip("\n").split(" ")) + tile_name = line.strip("\n").split(" ")[num - 1] # Only tifs will be in the tile list if '.tif' in tile_name: @@ -354,7 +356,7 @@ def tile_list_s3(source, sensit_type='std'): # In case the change of directories to look for sensitivity versions yields an empty folder. # This could be done better by using boto3 to check the potential s3 folders for files upfront but I couldn't figure # out how to do that. - print_log('\n' + "Creating list of tiles in", source) + print_log("\n" + f'Creating list of tiles in {source}') ## For an s3 folder in a bucket using AWSCLI # Captures the list of the files in the folder @@ -372,8 +374,8 @@ def tile_list_s3(source, sensit_type='std'): # Iterates through the text file to get the names of the tiles and appends them to list with open(os.path.join(cn.docker_tmp, 'tiles.txt'), 'r') as tile: for line in tile: - num = len(line.strip('\n').split(" ")) - tile_name = line.strip('\n').split(" ")[num - 1] + num = len(line.strip("\n").split(" ")) + tile_name = line.strip("\n").split(" ")[num - 1] # Only tifs will be in the tile list if '.tif' in tile_name: @@ -401,8 +403,8 @@ def tile_list_spot_machine(source, pattern): # Iterates through the text file to get the names of the tiles and appends them to list with open(os.path.join(cn.docker_tmp, 'tiles.txt'), 'r') as tile: for line in tile: - num = len(line.strip('\n').split(" ")) - tile_name = line.strip('\n').split(" ")[num - 1] + num = len(line.strip("\n").split(" ")) + tile_name = line.strip("\n").split(" ")[num - 1] # Only files with the specified pattern will be in the tile list if pattern in tile_name: @@ -412,200 +414,81 @@ def tile_list_spot_machine(source, pattern): return file_list -# Creates a list of all tiles found in either two or three s3 folders and removes duplicates from the list -def create_combined_tile_list(set1, set2, set3=None, sensit_type='std'): +# Creates a list of all tile ids found in input s3 folders, removes duplicate tile ids from the list, and orders them +def create_combined_tile_list(list_of_tile_dirs, sensit_type='std'): - print_log("Making a combined tile list...") + print_log('Making a combined tile id list...') # Changes the directory to list tiles according to the model run. - # Ff the model run is the biomass_swap or US_removals sensitivity analyses + # If the model run is the biomass_swap or US_removals sensitivity analyses # (JPL AGB extent and US extent, respectively), particular sets of tiles are designated. - # If the sensitivity analysis is biomass_swap or US_removals, there's no need to merge tile lists because the tile - # list is defined by the extent of the sensitivity analysis. # If the model run is standard, the names don't change. - # If the model is any other sensitivity run, those tiles are used. + # WARNING: Other sensitivity analyses aren't included in this and may result in unintended behaviors. + # WARNING: No sensitivity analyses have been tested with this function. if sensit_type == 'biomass_swap': source = cn.JPL_processed_dir tile_list = tile_list_s3(source, sensit_type='std') return tile_list - elif sensit_type == 'US_removals': + if sensit_type == 'US_removals': source = cn.annual_gain_AGC_BGC_natrl_forest_US_dir tile_list = tile_list_s3(source, sensit_type='std') return tile_list - elif sensit_type == 'std': - set1 = set1 - set2 = set2 - else: - set1 = set1.replace('standard', sensit_type) - set2 = set2.replace('standard', sensit_type) - - - # out = Popen(['aws', 's3', 'ls', set1, '--no-sign-request'], stdout=PIPE, stderr=STDOUT) - out = Popen(['aws', 's3', 'ls', set1], stdout=PIPE, stderr=STDOUT) - stdout, stderr = out.communicate() - # Writes the output string to a text file for easier interpretation - set1_tiles = open("set1.txt", "wb") - set1_tiles.write(stdout) - set1_tiles.close() - - # out = Popen(['aws', 's3', 'ls', set2, '--no-sign-request'], stdout=PIPE, stderr=STDOUT) - out = Popen(['aws', 's3', 'ls', set2], stdout=PIPE, stderr=STDOUT) - stdout2, stderr2 = out.communicate() - # Writes the output string to a text file for easier interpretation - set2_tiles = open("set2.txt", "wb") - set2_tiles.write(stdout2) - set2_tiles.close() - - # Empty lists for filling with biomass tile ids - file_list_set1 = [] - file_list_set2 = [] - # Iterates through the first text file to get the names of the tiles and appends them to list - with open("set1.txt", 'r') as tile: - - for line in tile: - num = len(line.strip('\n').split(" ")) - tile_name = line.strip('\n').split(" ")[num - 1] - - # Only tifs will be in the tile list - if '.tif' in tile_name: - - tile_id = get_tile_id(tile_name) - file_list_set1.append(tile_id) - - # Iterates through the second text file to get the names of the tiles and appends them to list - with open("set2.txt", 'r') as tile: - - for line in tile: - num = len(line.strip('\n').split(" ")) - tile_name = line.strip('\n').split(" ")[num - 1] - - # Only tifs will be in the tile list - if '.tif' in tile_name: - - tile_id = get_tile_id(tile_name) - file_list_set2.append(tile_id) - - if len(file_list_set1) > 1: - print_log("There are {} tiles in {}. Using this tile set.".format(len(file_list_set1), set1)) - else: - print_log("There are 0 tiles in {}. Looking for alternative tile set...".format(set1)) - set1 = set1.replace(sensit_type, 'standard') - print_log(" Looking for alternative tile set in {}".format(set1)) + # Iterates through the s3 locations and makes a txt file of tiles for each one + for i, tile_set in enumerate(list_of_tile_dirs): # out = Popen(['aws', 's3', 'ls', set1, '--no-sign-request'], stdout=PIPE, stderr=STDOUT) - out = Popen(['aws', 's3', 'ls', set1], stdout=PIPE, stderr=STDOUT) + out = Popen(['aws', 's3', 'ls', tile_set], stdout=PIPE, stderr=STDOUT) stdout, stderr = out.communicate() # Writes the output string to a text file for easier interpretation - set1_tiles = open("set1.txt", "wb") + set1_tiles = open(f'tile_set_{i}.txt', "wb") set1_tiles.write(stdout) set1_tiles.close() - # Empty lists for filling with biomass tile ids - file_list_set1 = [] - - # Iterates through the first text file to get the names of the tiles and appends them to list - with open("set1.txt", 'r') as tile: - - for line in tile: - num = len(line.strip('\n').split(" ")) - tile_name = line.strip('\n').split(" ")[num - 1] - - # Only tifs will be in the tile list - if '.tif' in tile_name: - tile_id = get_tile_id(tile_name) - file_list_set1.append(tile_id) - - print_log("There are {} tiles in {}. Using this tile set.".format(len(file_list_set1), set1)) - - if len(file_list_set2) > 1: - print_log("There are {} tiles in {}. Using this tile set.".format(len(file_list_set2), set2)) - else: - print_log("There are 0 tiles in {}. Looking for alternative tile set.".format(set2)) - set2 = set2.replace(sensit_type, 'standard') - print_log(" Looking for alternative tile set in {}".format(set2)) - - # out = Popen(['aws', 's3', 'ls', set2, '--no-sign-request'], stdout=PIPE, stderr=STDOUT) - out = Popen(['aws', 's3', 'ls', set2], stdout=PIPE, stderr=STDOUT) - stdout2, stderr2 = out.communicate() - # Writes the output string to a text file for easier interpretation - set2_tiles = open("set2.txt", "wb") - set2_tiles.write(stdout2) - set2_tiles.close() - - file_list_set2 = [] - - # Iterates through the second text file to get the names of the tiles and appends them to list - with open("set2.txt", 'r') as tile: - - for line in tile: - num = len(line.strip('\n').split(" ")) - tile_name = line.strip('\n').split(" ")[num - 1] - - # Only tifs will be in the tile list - if '.tif' in tile_name: - tile_id = get_tile_id(tile_name) - file_list_set2.append(tile_id) - - print_log("There are {} tiles in {}. Using this tile set.".format(len(file_list_set2), set2)) - - # If there's a third folder supplied, iterates through that - if set3 != None: - - print_log("Third set of tiles input. Adding to first two sets of tiles...") - - if sensit_type == 'std': - set3 = set3 - else: - set3 = set3.replace('standard', sensit_type) - - # out = Popen(['aws', 's3', 'ls', set3, '--no-sign-request'], stdout=PIPE, stderr=STDOUT) - out = Popen(['aws', 's3', 'ls', set3], stdout=PIPE, stderr=STDOUT) - stdout3, stderr3 = out.communicate() - # Writes the output string to a text file for easier interpretation - set3_tiles = open("set3.txt", "wb") - set3_tiles.write(stdout3) - set3_tiles.close() + # Empty lists for filling with tile ids + file_list_set = [] - file_list_set3 = [] + # The list of text files with tile info from s3 + tile_set_txt_list = glob.glob('tile_set_*txt') - # Iterates through the text file to get the names of the tiles and appends them to list - with open("set3.txt", 'r') as tile: + # Combines all tile text files into a single tile text file + # https://stackoverflow.com/a/13613375 + with open('tile_set_consolidated.txt', 'w') as outfile: + for fname in tile_set_txt_list: + with open(fname) as infile: + outfile.write(infile.read()) - for line in tile: - num = len(line.strip('\n').split(" ")) - tile_name = line.strip('\n').split(" ")[num - 1] + # Iterates through the rows of the consolidated text file to get the tile ids and appends them to the list + with open('tile_set_consolidated.txt', 'r') as tile: - # Only tifs will be in the tile list - if '.tif' in tile_name: - tile_id = get_tile_id(tile_name) - file_list_set3.append(tile_id) - - print_log("There are {} tiles in {}".format(len(file_list_set3), set3)) + for line in tile: - # Combines both tile lists - all_tiles = file_list_set1 + file_list_set2 + num = len(line.strip("\n").split(" ")) + tile_name = line.strip("\n").split(" ")[num - 1] - # If a third directory is supplied, the tiles from that list are added to the list from the first two - if set3 != None: + # Only tifs will be in the tile list + if '.tif' in tile_name: - all_tiles = all_tiles + file_list_set3 + tile_id = get_tile_id(tile_name) + file_list_set.append(tile_id) # Tile list with tiles found in multiple lists removed, so now duplicates are gone - unique_tiles = list(set(all_tiles)) + unique_tiles = list(set(file_list_set)) # Converts the set to a pandas dataframe to put the tiles in the correct order df = pd.DataFrame(unique_tiles, columns=['tile_id']) df = df.sort_values(by=['tile_id']) - # Converts the pandas dataframe to a Python list + # Converts the pandas dataframe back to a Python list unique_tiles_ordered_list = df.tile_id.tolist() # Removes the text files with the lists of tiles - set_txt = glob.glob("set*.txt") - for i in set_txt: + tile_set_txt_list = glob.glob('tile_set_*txt') # Adds the consolidated tile txt to the list + for i in tile_set_txt_list: os.remove(i) + print_log(f'There are {len(unique_tiles_ordered_list)} unique tiles in {len(list_of_tile_dirs)} s3 folders ({len(file_list_set)} tiles overall)') + return unique_tiles_ordered_list @@ -626,16 +509,19 @@ def count_tiles_s3(source, pattern=None): file_list = [] + if source == cn.gain_dir: + print_log("Not counting gain tiles... No good mechanism for it, sadly.") + return + # Iterates through the text file to get the names of the tiles and appends them to list with open(os.path.join(cn.docker_tmp, tile_list_name), 'r') as tile: for line in tile: - num = len(line.strip('\n').split(" ")) - tile_name = line.strip('\n').split(" ")[num - 1] + num = len(line.strip("\n").split(" ")) + tile_name = line.strip("\n").split(" ")[num - 1] - # For gain, tcd, pixel area, and loss tiles (and their rewindowed versions), + # For tcd, pixel area, and loss tiles (and their rewindowed versions), # which have the tile_id after the the pattern - if pattern in [cn.pattern_gain, cn.pattern_tcd, cn.pattern_pixel_area, cn.pattern_loss, - cn.pattern_gain_rewindow, cn.pattern_tcd_rewindow, cn.pattern_pixel_area_rewindow]: + if pattern in [cn.pattern_tcd, cn.pattern_pixel_area, cn.pattern_loss]: if tile_name.endswith('.tif'): tile_id = get_tile_id(tile_name) file_list.append(tile_id) @@ -656,7 +542,6 @@ def count_tiles_s3(source, pattern=None): return len(file_list) - # Gets the bounding coordinates of a tile def coords(tile_id): NS = tile_id.split("_")[0][-1:] @@ -691,11 +576,12 @@ def s3_flexible_download(source_dir, pattern, dest, sensit_type, tile_id_list): # Creates a full download name (path and file) for tile_id in tile_id_list: - if pattern in [cn.pattern_gain, cn.pattern_tcd, cn.pattern_pixel_area, cn.pattern_loss, - cn.pattern_gain_rewindow, cn.pattern_tcd_rewindow, cn.pattern_pixel_area_rewindow]: # For tiles that do not have the tile_id first - source = '{0}{1}_{2}.tif'.format(source_dir, pattern, tile_id) + if pattern in [cn.pattern_tcd, cn.pattern_pixel_area, cn.pattern_loss]: # For tiles that do not have the tile_id first + source = f'{source_dir}{pattern}_{tile_id}.tif' + elif pattern in [cn.pattern_gain_data_lake]: + source = f'{source_dir}{tile_id}.tif' else: # For every other type of tile - source = '{0}{1}_{2}.tif'.format(source_dir, tile_id, pattern) + source = f'{source_dir}{tile_id}_{pattern}.tif' s3_file_download(source, dest, sensit_type) @@ -712,14 +598,17 @@ def s3_folder_download(source, dest, sensit_type, pattern = None): # The number of tiles with the given pattern on the spot machine. # Special cases are below. - local_tile_count = len(glob.glob('*{}*.tif'.format(pattern))) + local_tile_count = len(glob.glob(f'*{pattern}*.tif')) - # For tile types that have the tile_id after the pattern - if pattern in [cn.pattern_gain, cn.pattern_tcd, cn.pattern_pixel_area, cn.pattern_loss]: + # For gain tiles, which have a different pattern on the ec2 instance from s3 + if source == cn.gain_dir: + local_tile_count = len(glob.glob(f'*{cn.pattern_gain_ec2}*.tif')) - local_tile_count = len(glob.glob('{}*.tif'.format(pattern))) + # For tile types that have the tile_id after the pattern + if pattern in [cn.pattern_tcd, cn.pattern_pixel_area, cn.pattern_loss]: + local_tile_count = len(glob.glob(f'{pattern}*.tif')) - print_log("There are", local_tile_count, "tiles on the spot machine with the pattern", pattern) + print_log(f'There are {local_tile_count} tiles on the spot machine with the pattern {pattern}') # Changes the path to download from based on the sensitivity analysis being run and whether that particular input # has a sensitivity analysis path on s3 @@ -728,15 +617,15 @@ def s3_folder_download(source, dest, sensit_type, pattern = None): # Creates the appropriate path for getting sensitivity analysis tiles source_sens = source.replace('standard', sensit_type) - print_log("Attempting to change source directory {0} to {1} to reflect sensitivity analysis".format(source, source_sens)) + print_log(f'Attempting to change source directory {source} to {source_sens} to reflect sensitivity analysis') # Counts how many tiles are in the sensitivity analysis source s3 folder s3_count_sens = count_tiles_s3(source_sens) - print_log("There are", s3_count_sens, "tiles in sensitivity analysis folder", source_sens, "with the pattern", pattern) + print_log(f'There are {s3_count_sens} tiles in sensitivity analysis folder {source_sens} with the pattern {pattern}') # Counts how many tiles are in the standard model source s3 folder s3_count_std = count_tiles_s3(source) - print_log("There are", s3_count_std, "tiles in standard model folder", source, "with the pattern", pattern) + print_log(f'There are {s3_count_std} tiles in standard model folder {source} with the pattern {pattern}') # Decides which source folder to use the count from: standard model or sensitivity analysis. # If there are sensitivity analysis tiles, that source folder should be used. @@ -750,22 +639,22 @@ def s3_folder_download(source, dest, sensit_type, pattern = None): # If there are as many tiles on the spot machine with the relevant pattern as there are on s3, no tiles are downloaded if local_tile_count == s3_count: - print_log("Tiles with pattern", pattern, "are already on spot machine. Not downloading.", '\n') + print_log(f'Tiles with pattern {pattern} are already on spot machine. Not downloading.', "\n") return # If there appears to be a full set of tiles in the sensitivity analysis folder (7 is semi arbitrary), # the sensitivity folder is downloaded if s3_count > 7: - print_log("Source directory used:", source_final) + print_log(f'Source directory used: {source_final}') - cmd = ['aws', 's3', 'cp', source_final, dest, '--no-sign-request', '--recursive', '--exclude', '*tiled/*', - '--exclude', '*geojason', '--exclude', '*vrt', '--exclude', '*csv', '--no-progress'] - # cmd = ['aws', 's3', 'cp', source_final, dest, '--no-sign-request', '--recursive', '--exclude', '*tiled/*', - # '--exclude', '*geojason', '--exclude', '*vrt', '--exclude', '*csv'] + cmd = ['aws', 's3', 'cp', source_final, dest, '--no-sign-request', '--exclude', '*tiled/*', + '--exclude', '*geojason', '--exclude', '*vrt', '--exclude', '*csv', '--no-progress', '--recursive'] + # cmd = ['aws', 's3', 'cp', source_final, dest, '--no-sign-request', '--exclude', '*tiled/*', + # '--exclude', '*geojason', '--exclude', '*vrt', '--exclude', '*csv', '--recursive'] log_subprocess_output_full(cmd) - print_log('\n') + print_log("\n") # If there are fewer than 7 files in the sensitivity folder (i.e., either folder doesn't exist or it just has # a few test tiles), the standard folder is downloaded. @@ -773,38 +662,69 @@ def s3_folder_download(source, dest, sensit_type, pattern = None): # for this date. else: - print_log("Source directory used:", source) + print_log(f'Source directory used: {source}') - cmd = ['aws', 's3', 'cp', source, dest, '--no-sign-request', '--recursive', '--exclude', '*tiled/*', - '--exclude', '*geojason', '--exclude', '*vrt', '--exclude', '*csv', '--no-progress'] - # cmd = ['aws', 's3', 'cp', source, dest, '--no-sign-request', '--recursive', '--exclude', '*tiled/*', - # '--exclude', '*geojason', '--exclude', '*vrt', '--exclude', '*csv'] + cmd = ['aws', 's3', 'cp', source, dest, '--no-sign-request', '--exclude', '*tiled/*', + '--exclude', '*geojason', '--exclude', '*vrt', '--exclude', '*csv', '--no-progress', '--recursive'] + # cmd = ['aws', 's3', 'cp', source, dest, '--no-sign-request', '--exclude', '*tiled/*', + # '--exclude', '*geojason', '--exclude', '*vrt', '--exclude', '*csv', '--recursive'] log_subprocess_output_full(cmd) - print_log('\n') + print_log("\n") # For the standard model, the standard folder is downloaded. else: # Counts how many tiles are in the source s3 folder s3_count = count_tiles_s3(source, pattern=pattern) - print_log("There are", s3_count, "tiles at", source, "with the pattern", pattern) + print_log(f'There are {s3_count} tiles at {source} with the pattern {pattern}') # If there are as many tiles on the spot machine with the relevant pattern as there are on s3, no tiles are downloaded if local_tile_count == s3_count: - print_log("Tiles with pattern", pattern, "are already on spot machine. Not downloading.", '\n') + print_log(f'Tiles with pattern {pattern} are already on spot machine. Not downloading.', "\n") return - print_log("Tiles with pattern", pattern, "are not on spot machine. Downloading...") + print_log(f'Tiles with pattern {pattern} are not on spot machine. Downloading...') - cmd = ['aws', 's3', 'cp', source, dest, '--no-sign-request', '--recursive', '--exclude', '*tiled/*', - '--exclude', '*geojason', '--exclude', '*vrt', '--exclude', '*csv', '--no-progress'] - # cmd = ['aws', 's3', 'cp', source, dest, '--no-sign-request', '--recursive', '--exclude', '*tiled/*', - # '--exclude', '*geojason', '--exclude', '*vrt', '--exclude', '*csv'] + # Downloads tile sets from the gfw-data-lake. + # They need a special process because they don't have a tile pattern on the data-lake, + # so I have to download them into their own folder and then give them a pattern while moving them to the main folder + if 'gfw-data-lake' in source: - log_subprocess_output_full(cmd) + # Deletes special folder for downloads from data-lake (if it already exists) + if os.path.exists(os.path.join(dest, 'data-lake-downloads')): + os.rmdir(os.path.join(dest, 'data-lake-downloads')) + + # Special folder for the tile set that doesn't have a pattern when downloaded + os.mkdir(os.path.join(dest, 'data-lake-downloads')) + + cmd = ['aws', 's3', 'cp', source, os.path.join(dest, 'data-lake-downloads'), + '--request-payer', 'requester', '--exclude', '*xml', + '--exclude', '*geojason', '--exclude', '*vrt', '--exclude', '*csv', '--no-progress', '--recursive'] + log_subprocess_output_full(cmd) + + # Copies pattern-less tiles from their special folder to main tile folder and renames them with + # pattern along the way + print_log("Copying tiles to main tile folder...") + for filename in os.listdir(os.path.join(dest, 'data-lake-downloads')): + move(os.path.join(dest, f'data-lake-downloads/{filename}'), + os.path.join(cn.docker_tile_dir, f'{filename[:-4]}_{cn.pattern_gain_ec2}.tif')) + + # Deletes special folder for downloads from data-lake + os.rmdir(os.path.join(dest, 'data-lake-downloads')) + print_log("Tree cover gain tiles copied to main tile folder...") + + # Downloads non-data-lake inputs + else: + + cmd = ['aws', 's3', 'cp', source, dest, '--no-sign-request', '--exclude', '*tiled/*', + '--exclude', '*geojason', '--exclude', '*vrt', '--exclude', '*csv', '--no-progress', '--recursive'] + # cmd = ['aws', 's3', 'cp', source, dest, '--no-sign-request', '--exclude', '*tiled/*', + # '--exclude', '*geojason', '--exclude', '*vrt', '--exclude', '*csv', '--recursive'] + + log_subprocess_output_full(cmd) - print_log('\n') + print_log("\n") # Downloads individual tiles from s3 @@ -817,6 +737,11 @@ def s3_file_download(source, dest, sensit_type): dir = get_tile_dir(source) file_name = get_tile_name(source) + try: + tile_id = get_tile_id(file_name) + except: + pass + # Changes the file to download based on the sensitivity analysis being run and whether that particular input # has a sensitivity analysis path on s3. # Files that have standard and sensitivity analysis variants are handled differently from ones without variants @@ -832,13 +757,13 @@ def s3_file_download(source, dest, sensit_type): file_name_sens = file_name[:-4] + '_' + sensit_type + '.tif' # Doesn't download the tile if sensitivity version is already on the spot machine - print_log("Option 1: Checking if {} is already on spot machine...".format(file_name_sens)) + print_log(f'Option 1: Checking if {file_name_sens} is already on spot machine...') if os.path.exists(file_name_sens): - print_log(" Option 1 success:", file_name_sens, "already downloaded", "\n") + print_log(f' Option 1 success: {file_name_sens} already downloaded', "\n") return else: - print_log(" Option 1 failure: {0} is not already on spot machine.".format(file_name_sens)) - print_log("Option 2: Checking for sensitivity analysis tile {0}/{1} on s3...".format(dir_sens[15:], file_name_sens)) + print_log(f' Option 1 failure: {file_name_sens} is not already on spot machine.') + print_log(f'Option 2: Checking for sensitivity analysis tile {dir_sens[15:]}/{file_name_sens} on s3...') # If not already downloaded, first tries to download the sensitivity analysis version # cmd = ['aws', 's3', 'cp', '{0}/{1}'.format(dir_sens, file_name_sens), dest, '--no-sign-request', '--only-show-errors'] @@ -846,22 +771,22 @@ def s3_file_download(source, dest, sensit_type): log_subprocess_output_full(cmd) if os.path.exists(file_name_sens): - print_log(" Option 2 success: Sensitivity analysis tile {0}/{1} found on s3 and downloaded".format(dir_sens, file_name_sens), "\n") + print_log(f' Option 2 success: Sensitivity analysis tile {dir_sens}/{file_name_sens} found on s3 and downloaded', "\n") return else: - print_log(" Option 2 failure: Tile {0}/{1} not found on s3. Looking for standard model source...".format(dir_sens, file_name_sens)) + print_log(f' Option 2 failure: Tile {dir_sens}/{file_name_sens} not found on s3. Looking for standard model source...') # Next option is to use standard version of tile if on spot machine. # This can happen despite it being a sensitivity run because this input file doesn't have a sensitivity version # for this date. - print_log("Option 3: Checking if standard version {} is already on spot machine...".format(file_name)) + print_log(f'Option 3: Checking if standard version {file_name} is already on spot machine...') if os.path.exists(file_name): - print_log(" Option 3 success:", file_name, "already downloaded", "\n") + print_log(f' Option 3 success: {file_name} already downloaded', "\n") return else: - print_log(" Option 3 failure: {} is not already on spot machine. ".format(file_name)) - print_log("Option 4: Looking for standard version of {} to download...".format(file_name)) + print_log(f' Option 3 failure: {file_name} is not already on spot machine. ') + print_log(f'Option 4: Looking for standard version of {file_name} to download...') # If not already downloaded, final option is to try to download the standard version of the tile. # If this doesn't work, the script throws a fatal error because no variant of this tile was found. @@ -870,46 +795,75 @@ def s3_file_download(source, dest, sensit_type): log_subprocess_output_full(cmd) if os.path.exists(file_name): - print_log(" Option 4 success: Standard tile {} found on s3 and downloaded".format(source), "\n") + print_log(f' Option 4 success: Standard tile {source} found on s3 and downloaded', "\n") return else: - print_log(" Option 4 failure: Tile {0} not found on s3. Tile not found but it seems it should be. Check file paths and names.".format(source), "\n") + print_log(f' Option 4 failure: Tile {source} not found on s3. Tile not found but it seems it should be. Check file paths and names.', "\n") # If not a sensitivity run or a tile type without sensitivity analysis variants, the standard file is downloaded + + # Special download procedures for tree cover gain because the tiles have no pattern, just an ID. + # Tree cover gain tiles are renamed as their downloaded to get a pattern added to them. else: - print_log("Option 1: Checking if {} is already on spot machine...".format(file_name)) - if os.path.exists(os.path.join(dest, file_name)): - print_log(" Option 1 success:", os.path.join(dest, file_name), "already downloaded", "\n") - return + if dir == cn.gain_dir[:-1]: # Delete last character of gain_dir because it has the terminal / while dir does not have terminal / + ec2_file_name = f'{tile_id}_{cn.pattern_gain_ec2}.tif' + print_log(f'Option 1: Checking if {ec2_file_name} is already on spot machine...') + if os.path.exists(os.path.join(dest, ec2_file_name)): + print_log(f' Option 1 success: {os.path.join(dest, ec2_file_name)} already downloaded', "\n") + return + else: + print_log(f' Option 1 failure: {ec2_file_name} is not already on spot machine.') + print_log(f'Option 2: Checking for tile {source} on s3...') + + # If the tile isn't already downloaded, download is attempted + source = os.path.join(dir, file_name) + + # cmd = ['aws', 's3', 'cp', source, dest, '--no-sign-request', '--only-show-errors'] + cmd = ['aws', 's3', 'cp', source, f'{dest}{ec2_file_name}', + '--request-payer', 'requester', '--only-show-errors'] + log_subprocess_output_full(cmd) + if os.path.exists(os.path.join(dest, ec2_file_name)): + print_log(f' Option 2 success: Tile {source} found on s3 and downloaded', "\n") + return + else: + print_log( + f' Option 2 failure: Tile {source} not found on s3. Tile not found but it seems it should be. Check file paths and names.', "\n") + + # All other tiles besides tree cover gain else: - print_log(" Option 1 failure: {0} is not already on spot machine.".format(file_name)) - print_log("Option 2: Checking for tile {} on s3...".format(source)) - - - # If the tile isn't already downloaded, download is attempted - source = os.path.join(dir, file_name) - - # cmd = ['aws', 's3', 'cp', source, dest, '--no-sign-request', '--only-show-errors'] - cmd = ['aws', 's3', 'cp', source, dest, '--only-show-errors'] - log_subprocess_output_full(cmd) + print_log(f'Option 1: Checking if {file_name} is already on spot machine...') if os.path.exists(os.path.join(dest, file_name)): - print_log(" Option 2 success: Tile {} found on s3 and downloaded".format(source), "\n") + print_log(f' Option 1 success: {os.path.join(dest, file_name)} already downloaded', "\n") return else: - print_log(" Option 2 failure: Tile {} not found on s3. Tile not found but it seems it should be. Check file paths and names.".format(source), "\n") + print_log(f' Option 1 failure: {file_name} is not already on spot machine.') + print_log(f'Option 2: Checking for tile {source} on s3...') + + + # If the tile isn't already downloaded, download is attempted + source = os.path.join(dir, file_name) + + # cmd = ['aws', 's3', 'cp', source, dest, '--no-sign-request', '--only-show-errors'] + cmd = ['aws', 's3', 'cp', source, dest, '--only-show-errors'] + log_subprocess_output_full(cmd) + if os.path.exists(os.path.join(dest, file_name)): + print_log(f' Option 2 success: Tile {source} found on s3 and downloaded', "\n") + return + else: + print_log(f' Option 2 failure: Tile {source} not found on s3. Tile not found but it seems it should be. Check file paths and names.', "\n") # Uploads all tiles of a pattern to specified location def upload_final_set(upload_dir, pattern): - print_log("Uploading tiles with pattern {0} to {1}".format(pattern, upload_dir)) + print_log(f'Uploading tiles with pattern {pattern} to {upload_dir}') - cmd = ['aws', 's3', 'cp', cn.docker_base_dir, upload_dir, '--exclude', '*', '--include', '*{}*tif'.format(pattern), + cmd = ['aws', 's3', 'cp', cn.docker_tile_dir, upload_dir, '--exclude', '*', '--include', '*{}*tif'.format(pattern), '--recursive', '--no-progress'] try: log_subprocess_output_full(cmd) - print_log(" Upload of tiles with {} pattern complete!".format(pattern)) + print_log(f' Upload of tiles with {pattern} pattern complete!') except: - print_log("Error uploading output tile(s)") + print_log('Error uploading output tile(s)') # Uploads the log as each model output tile set is finished upload_log() @@ -927,7 +881,7 @@ def upload_final(upload_dir, tile_id, pattern): try: log_subprocess_output_full(cmd) except: - print_log("Error uploading output tile") + print_log('Error uploading output tile') # This version of checking for data is bad because it can miss tiles that have very little data in them. @@ -935,7 +889,7 @@ def upload_final(upload_dir, tile_id, pattern): # This method creates a tif.aux.xml file that I tried to add a line to delete but couldn't get to work. def check_and_delete_if_empty_light(tile_id, output_pattern): - tile_name = '{0}_{1}.tif'.format(tile_id, output_pattern) + tile_name = f'{tile_id}_{output_pattern}.tif' # Source: http://gis.stackexchange.com/questions/90726 # Opens raster and chooses band to find min, max @@ -945,9 +899,9 @@ def check_and_delete_if_empty_light(tile_id, output_pattern): print_log(" Tile stats = Minimum=%.3f, Maximum=%.3f, Mean=%.3f, StdDev=%.3f" % (stats[0], stats[1], stats[2], stats[3])) if stats[0] != 0: - print_log(" Data found in {}. Keeping file...".format(tile_name)) + print_log(f' Data found in {tile_name}. Keeping file...') else: - print_log(" No data found. Deleting {}...".format(tile_name)) + print_log(f' Data not found in {tile_name}. Deleting...') os.remove(tile_name) # Using this gdal data check method creates a tif.aux.xml file that is unnecessary. @@ -960,57 +914,57 @@ def check_for_data(tile): with rasterio.open(tile) as img: msk = img.read_masks(1).astype(bool) if msk[msk].size == 0: - # print_log("Tile {} is empty".format(tile)) + # print_log(f"Tile {tile} is empty") return True else: - # print_log("Tile {} is not empty".format(tile)) + # print_log(f"Tile {tile} is not empty") return False def check_and_delete_if_empty(tile_id, output_pattern): - tile_name = '{0}_{1}.tif'.format(tile_id, output_pattern) + tile_name = f'{tile_id}_{output_pattern}.tif' # Only checks for data if the tile exists if not os.path.exists(tile_name): - print_log(tile_name, "does not exist. Skipping check of whether there is data.") + print_log(f'{tile_name} does not exist. Skipping check of whether there is data.') return - print_log("Checking if {} contains any data...".format(tile_name)) + print_log(f'Checking if {tile_name} contains any data...') no_data = check_for_data(tile_name) if no_data: - print_log(" No data found in {}. Deleting tile...".format(tile_name)) + print_log(f' Data not found in {tile_name}. Deleting...') os.remove(tile_name) else: - print_log(" Data found in {}. Keeping tile to copy to s3...".format(tile_name)) + print_log(f' Data found in {tile_name}. Keeping tile to copy to s3...') # Checks if there's data in a tile and, if so, uploads it to s3 def check_and_upload(tile_id, upload_dir, pattern): - print_log("Checking if {} contains any data...".format(tile_id)) - out_tile = '{0}_{1}.tif'.format(tile_id, pattern) + print_log(f'Checking if {tile_id} contains any data...') + out_tile = f'{tile_id}_{pattern}.tif' no_data = check_for_data(out_tile) if no_data: - print_log(" No data found. Not copying {}.".format(tile_id)) + print_log(f' Data not found in {tile_id}. Not copying to s3...') else: - print_log(" Data found in {}. Copying tile to s3...".format(tile_id)) + print_log(f' Data found in {tile_id}. Copying tile to s3...') upload_final(upload_dir, tile_id, pattern) - print_log(" Tile copied to s3") + print_log(' Tile copied to s3') # Prints the number of tiles that have been processed so far def count_completed_tiles(pattern): - completed = len(glob.glob1(cn.docker_base_dir, '*{}*'.format(pattern))) + completed = len(glob.glob1(cn.docker_tile_dir, '*{}*'.format(pattern))) - print_log("Number of completed or in-progress tiles:", completed) + print_log(f'Number of completed or in-progress tiles: {completed}') # Returns the NoData value of a raster @@ -1028,25 +982,25 @@ def get_raster_nodata_value(tile): # Prints information about the tile that was just processed: how long it took and how many tiles have been completed -def end_of_fx_summary(start, tile_id, pattern, no_upload): +def end_of_fx_summary(start, tile_id, pattern): # Checking memory at this point (end of the function) seems to record memory usage when it is at its peak check_memory() end = datetime.datetime.now() elapsed_time = end-start - print_log("Processing time for tile", tile_id, ":", elapsed_time) + print_log(f'Processing time for tile {tile_id}: {elapsed_time}') count_completed_tiles(pattern) # If no_upload flag is not activated, log is uploaded - if not no_upload: + if not cn.NO_UPLOAD: # Uploads the log as each tile is finished upload_log() # Warps raster to Hansen tiles using multiple processors -def mp_warp_to_Hansen(tile_id, source_raster, out_pattern, dt, no_upload): +def mp_warp_to_Hansen(tile_id, source_raster, out_pattern, dt): # Start time start = datetime.datetime.now() @@ -1054,7 +1008,7 @@ def mp_warp_to_Hansen(tile_id, source_raster, out_pattern, dt, no_upload): print_log("Getting extent of", tile_id) xmin, ymin, xmax, ymax = coords(tile_id) - out_tile = '{0}_{1}.tif'.format(tile_id, out_pattern) + out_tile = f'{tile_id}_{out_pattern}.tif' cmd = ['gdalwarp', '-t_srs', 'EPSG:4326', '-co', 'COMPRESS=DEFLATE', '-tr', str(cn.Hansen_res), str(cn.Hansen_res), '-tap', '-te', str(xmin), str(ymin), str(xmax), str(ymax), '-dstnodata', '0', '-ot', dt, '-overwrite', source_raster, out_tile] @@ -1062,7 +1016,7 @@ def mp_warp_to_Hansen(tile_id, source_raster, out_pattern, dt, no_upload): with process.stdout: log_subprocess_output(process.stdout) - end_of_fx_summary(start, tile_id, out_pattern, no_upload) + end_of_fx_summary(start, tile_id, out_pattern) def warp_to_Hansen(in_file, out_file, xmin, ymin, xmax, ymax, dt): @@ -1093,27 +1047,27 @@ def rasterize(in_shape, out_tif, xmin, ymin, xmax, ymax, blocksizex, blocksizey, # Creates a tile of all 0s for any tile passed to it. # Uses the Hansen loss tile for information about the tile. # Based on https://gis.stackexchange.com/questions/220753/how-do-i-create-blank-geotiff-with-same-spatial-properties-as-existing-geotiff -def make_blank_tile(tile_id, pattern, folder, sensit_type): +def make_blank_tile(tile_id, pattern, folder): # Creates tile names for standard and sensitivity analyses. # Going into this, the function doesn't know whether there should be a standard tile or a sensitivity tile. # Thus, it has to be prepared for either one. - file_name = '{0}{1}_{2}.tif'.format(folder, tile_id, pattern) - file_name_sens = '{0}{1}_{2}_{3}.tif'.format(folder, tile_id, pattern, sensit_type) + file_name = f'{folder}{tile_id}_{pattern}.tif' + file_name_sens = f'{folder}{tile_id}_{pattern}_{cn.SENSIT_TYPE}.tif' # Checks if the standard file exists. If it does, a blank tile isn't created. if os.path.exists(file_name): - print_log('{} exists. Not creating a blank tile.'.format(os.path.join(folder, file_name))) + print_log(f'{os.path.join(folder, file_name)} exists. Not creating a blank tile.') return # Checks if the sensitivity analysis file exists. If it does, a blank tile isn't created. elif os.path.exists(file_name_sens): - print_log('{} exists. Not creating a blank tile.'.format(os.path.join(folder, file_name_sens))) + print_log(f'{os.path.join(folder, file_name_sens)} exists. Not creating a blank tile.') return # If neither a standard tile nor a sensitivity analysis tile exists, a blank tile is created. else: - print_log('{} does not exist. Creating a blank tile.'.format(file_name)) + print_log(f'{file_name} does not exist. Creating a blank tile.') with open(os.path.join(cn.docker_tmp, cn.blank_tile_txt), 'a') as f: f.write('{0}_{1}.tif'.format(tile_id, pattern)) @@ -1123,8 +1077,8 @@ def make_blank_tile(tile_id, pattern, folder, sensit_type): # Preferentially uses Hansen loss tile as the template for creating a blank plantation tile # (tile extent, resolution, pixel alignment, compression, etc.). # If the tile is already on the spot machine, it uses the downloaded tile. - if os.path.exists(os.path.join(folder, '{0}_{1}.tif'.format(cn.pattern_loss, tile_id))): - print_log("Hansen loss tile exists for {}. Using that as template for blank tile.".format(tile_id)) + if os.path.exists(os.path.join(folder, f'{cn.pattern_loss}_{tile_id}.tif')): + print_log(f'Hansen loss tile exists for {tile_id}. Using that as template for blank tile.') cmd = ['gdal_merge.py', '-createonly', '-init', '0', '-co', 'COMPRESS=DEFLATE', '-ot', 'Byte', '-o', '{0}{1}_{2}.tif'.format(folder, tile_id, pattern), '{0}{1}_{2}.tif'.format(folder, cn.pattern_loss, tile_id)] @@ -1135,7 +1089,7 @@ def make_blank_tile(tile_id, pattern, folder, sensit_type): s3_file_download('{0}{1}_{2}.tif'.format(cn.pixel_area_dir, cn.pattern_pixel_area, tile_id), os.path.join(folder, '{0}_{1}.tif'.format(tile_id, 'empty_tile_template')), 'std') - print_log("Downloaded pixel area tile for", tile_id, "to create a blank tile") + print_log(f'Downloaded pixel area tile for {tile_id} to create a blank tile') # Determines what pattern to use (standard or sensitivity) based on the first tile in the list tile_list= tile_list_spot_machine(folder, pattern) @@ -1147,7 +1101,7 @@ def make_blank_tile(tile_id, pattern, folder, sensit_type): '-o', '{0}/{1}_{2}.tif'.format(folder, tile_id, full_pattern), '{0}/{1}_{2}.tif'.format(folder, tile_id, 'empty_tile_template')] check_call(cmd) - print_log("Created raster of all 0s for", file_name) + print_log(f'Created raster of all 0s for {file_name}') # Creates a txt that will have blank dummy tiles listed in it for certain scripts that need those @@ -1161,88 +1115,43 @@ def create_blank_tile_txt(): def list_and_delete_blank_tiles(): blank_tiles_list = open(os.path.join(cn.docker_tmp, cn.blank_tile_txt)).read().splitlines() - print_log("Blank tile list:", blank_tiles_list) + print_log(f'Blank tile list: {blank_tiles_list}') - print_log("Deleting blank tiles...") + print_log('Deleting blank tiles...') for blank_tile in blank_tiles_list: os.remove(blank_tile) - print_log("Deleting blank tile textfile...") + print_log('Deleting blank tile textfile...') os.remove(os.path.join(cn.docker_tmp, cn.blank_tile_txt)) # Reformats the patterns for the 10x10 degree model output tiles for the aggregated output names -def name_aggregated_output(pattern, thresh, sensit_type): +def name_aggregated_output(pattern): + # print(pattern) out_pattern = re.sub('ha_', '', pattern) - # print out_pattern - out_pattern = re.sub('2001_{}'.format(cn.loss_years), 'per_year', out_pattern) - # print out_pattern - out_pattern = re.sub('gross_emis_year', 'gross_emis_per_year', out_pattern) - # print out_pattern - out_pattern = re.sub('_Mg_', '_Mt_', out_pattern) - # print out_pattern + # print(out_pattern) + out_pattern = re.sub(f'2001_{cn.loss_years}', '', out_pattern) + # print(out_pattern) + out_pattern = re.sub('_Mg_', '_Mt_per_year', out_pattern) + # print(out_pattern) out_pattern = re.sub('all_drivers_Mt_CO2e', 'all_drivers_Mt_CO2e_per_year', out_pattern) - # print out_pattern + # print(out_pattern) date = datetime.datetime.now() date_formatted = date.strftime("%Y%m%d") - # print thresh - # print cn.pattern_aggreg - # print sensit_type - # print date_formatted - - out_name = '{0}_tcd{1}_{2}_{3}_{4}'.format(out_pattern, thresh, cn.pattern_aggreg, sensit_type, date_formatted) - - # print out_name + out_name = f'{out_pattern}_tcd{cn.canopy_threshold}_{cn.pattern_aggreg}_{cn.SENSIT_TYPE}_{date_formatted}' + # print(out_name) return out_name -# Removes plantations that existed before 2000 from loss tile -def mask_pre_2000_plantation(pre_2000_plant, tile_to_mask, out_name, tile_id): - - if os.path.exists(pre_2000_plant): - - print_log("Pre-2000 plantation exists for {}. Cutting out pixels in those plantations...".format(tile_id)) - - # In order to mask out the pre-2000 plantation pixels from the loss raster, the pre-2000 plantations need to - # become a vrt. I couldn't get gdal_calc to work while keeping pre-2000 plantations as a raster; it wasn't - # recognizing the 0s (nodata). - # Based on https://gis.stackexchange.com/questions/238397/how-to-indicate-nodata-into-gdal-calc-formula - # Only the pre-2000 plantation raster needed to be converted to a vrt; the loss raster did not. - cmd = ['gdal_translate', '-of', 'VRT', pre_2000_plant, - '{0}_{1}.vrt'.format(tile_id, cn.pattern_plant_pre_2000), '-a_nodata', 'none'] - check_call(cmd) - - # Removes the pre-2000 plantation pixels from the loss tile - pre_2000_vrt = '{0}_{1}.vrt'.format(tile_id, cn.pattern_plant_pre_2000) - calc = '--calc=A*(B==0)' - loss_outfilearg = '--outfile={}'.format(out_name) - cmd = ['gdal_calc.py', '-A', tile_to_mask, '-B', pre_2000_vrt, - calc, loss_outfilearg, '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=DEFLATE', '--quiet'] - check_call(cmd) - - # Basically, does nothing if there is no pre-2000 plantation and the output name is the same as the - # input name - elif tile_to_mask == out_name: - return - - else: - print_log("No pre-2000 plantation exists for {}. Tile done.".format(tile_id)) - # print tile_to_mask - # print out_name - copyfile(tile_to_mask, out_name) - - print_log(" Pre-2000 plantations for {} complete".format(tile_id)) - - # Checks whether the provided sensitivity analysis type is valid def check_sensit_type(sensit_type): # Checks the validity of the two arguments. If either one is invalid, the script ends. if (sensit_type not in cn.sensitivity_list): - exception_log('Invalid model type. Please provide a model type from {}.'.format(cn.sensitivity_list)) + exception_log(f'Invalid model type. Please provide a model type from {cn.sensitivity_list}.') else: pass @@ -1250,38 +1159,48 @@ def check_sensit_type(sensit_type): # Changes the name of the input or output directory according to the sensitivity analysis def alter_dirs(sensit_type, raw_dir_list): - print_log("Raw output directory list:", raw_dir_list) + print_log(f'Raw output directory list: {raw_dir_list}') processed_dir_list = [d.replace('standard', sensit_type) for d in raw_dir_list] - print_log("Processed output directory list:", processed_dir_list, "\n") + print_log(f'Processed output directory list: {processed_dir_list}', "\n") return processed_dir_list # Alters the file patterns in a list according to the sensitivity analysis def alter_patterns(sensit_type, raw_pattern_list): - print_log("Raw output pattern list:", raw_pattern_list) + print_log(f'Raw output pattern list: {raw_pattern_list}') processed_pattern_list = [(d + '_' + sensit_type) for d in raw_pattern_list] - print_log("Processed output pattern list:", processed_pattern_list, "\n") + print_log(f'Processed output pattern list: {processed_pattern_list}', "\n") return processed_pattern_list # Creates the correct input tile name for processing based on the sensitivity analysis being done def sensit_tile_rename(sensit_type, tile_id, raw_pattern): - # print '{0}_{1}_{2}.tif'.format(tile_id, raw_pattern, sensit_type) - # Uses whatever name of the tile is found on the spot machine - if os.path.exists('{0}_{1}_{2}.tif'.format(tile_id, raw_pattern, sensit_type)): - processed_name = '{0}_{1}_{2}.tif'.format(tile_id, raw_pattern, sensit_type) + if os.path.exists(f'{tile_id}_{raw_pattern}_{sensit_type}.tif'): + processed_name = f'{tile_id}_{raw_pattern}_{sensit_type}.tif' else: - processed_name = '{0}_{1}.tif'.format(tile_id, raw_pattern) + processed_name = f'{tile_id}_{raw_pattern}.tif' return processed_name +# Creates the correct input biomass tile name for processing based on the sensitivity analysis being done. +# Because there are actual different input biomass tiles, this doesn't fit well within sensit_tile_rename(). +def sensit_tile_rename_biomass(sensit_type, tile_id): + + if cn.SENSIT_TYPE == 'biomass_swap': + natrl_forest_biomass_2000 = f'{tile_id}_{cn.pattern_JPL_unmasked_processed}.tif' + print_log(f'Using JPL biomass tile {tile_id} for {sensit_type} sensitivity analysis') + else: + natrl_forest_biomass_2000 = f'{tile_id}_{cn.pattern_WHRC_biomass_2000_unmasked}.tif' + print_log(f'Using WHRC biomass tile {tile_id} for {sensit_type} model run') + + return natrl_forest_biomass_2000 # Determines what stages should actually be run def analysis_stages(stage_list, stage_input, run_through, sensit_type, @@ -1323,7 +1242,7 @@ def analysis_stages(stage_list, stage_input, run_through, sensit_type, def tile_id_list_check(tile_id_list): if tile_id_list == 'all': - print_log("All tiles will be run through model. Actual list of tiles will be listed for each model stage as it begins...") + print_log('All tiles will be run through model. Actual list of tiles will be listed for each model stage as it begins...') return tile_id_list # Checks tile id list input validity against the pixel area tiles else: @@ -1340,28 +1259,30 @@ def tile_id_list_check(tile_id_list): for tile_id in tile_id_list: if tile_id not in possible_tile_list: - exception_log('Tile_id {} not valid'.format(tile_id)) + exception_log(f'Tile_id {tile_id} not valid') else: - print_log("{} tiles have been supplied for running through the model".format(str(len(tile_id_list))), "\n") + print_log(f'{str(len(tile_id_list))} tiles have been supplied for running through the model', "\n") return tile_id_list # Replaces the date specified in constants_and_names with the date provided by the model run-through def replace_output_dir_date(output_dir_list, run_date): - print_log("Changing output directory date based on date provided with model run-through") + print_log('Changing output directory date based on date provided with model run-through') output_dir_list = [output_dir.replace(output_dir[-9:-1], run_date) for output_dir in output_dir_list] print_log(output_dir_list, "\n") return output_dir_list # Adds various metadata tags to the raster -def add_rasterio_tags(output_dst, sensit_type): +def add_universal_metadata_rasterio(output_dst): # based on https://rasterio.readthedocs.io/en/latest/topics/tags.html - if sensit_type == 'std': + if cn.SENSIT_TYPE == 'std': sensit_type = 'standard model' + else: + sensit_type = cn.SENSIT_TYPE output_dst.update_tags( model_version=cn.version) @@ -1374,70 +1295,62 @@ def add_rasterio_tags(output_dst, sensit_type): output_dst.update_tags( citation='Harris et al. 2021 Nature Climate Change https://www.nature.com/articles/s41558-020-00976-6') output_dst.update_tags( - model_year_range='2001 through 20{}'.format(cn.loss_years) + model_year_range=f'2001 through 20{cn.loss_years}' ) return output_dst -def add_universal_metadata_tags(output_raster, sensit_type): +def add_universal_metadata_gdal(output_raster): print_log("Adding universal metadata tags to", output_raster) - cmd = ['gdal_edit.py', '-mo', 'model_version={}'.format(cn.version), - '-mo', 'date_created={}'.format(date_today), - '-mo', 'model_type={}'.format(sensit_type), + cmd = ['gdal_edit.py', + '-mo', f'model_version={cn.version}', + '-mo', f'date_created={date_today}', + '-mo', f'model_type={cn.SENSIT_TYPE}', '-mo', 'originator=Global Forest Watch at the World Resources Institute', - '-mo', 'model_year_range=2001 through 20{}'.format(cn.loss_years), + '-mo', f'model_year_range=2001 through 20{cn.loss_years}', output_raster] log_subprocess_output_full(cmd) -# Adds metadata tags to raster. -# Certain tags are included for all rasters, while other tags can be customized for each input set. -def add_metadata_tags(tile_id, output_pattern, sensit_type, metadata_list): +# Adds metadata tags to the output rasters +def add_emissions_metadata(tile_id, output_pattern): - output_raster = '{0}_{1}.tif'.format(tile_id, output_pattern) - - print_log("Adding metadata tags to", output_raster) - - # Universal metadata tags - cmd = ['gdal_edit.py', '-mo', 'model_version={}'.format(cn.version), - '-mo', 'date_created={}'.format(date_today), - '-mo', 'model_type={}'.format(sensit_type), - '-mo', 'originator=Global Forest Watch at the World Resources Institute', - '-mo', 'model_year_range=2001 through 20{}'.format(cn.loss_years)] - - # Metadata tags specifically for this dataset - for metadata in metadata_list: - cmd += ['-mo', metadata] - - cmd += [output_raster] + # Adds metadata tags to output rasters + add_universal_metadata_gdal(f'{tile_id}_{output_pattern}.tif') + cmd = ['gdal_edit.py', '-mo', + f'units=Mg CO2e/ha over model duration (2001-20{cn.loss_years})', + '-mo', 'source=many data sources', + '-mo', 'extent=Tree cover loss pixels within model extent (and tree cover loss driver, if applicable)', + f'{tile_id}_{output_pattern}.tif'] log_subprocess_output_full(cmd) + # Converts 10x10 degree Hansen tiles that are in windows of 40000x1 pixels to windows of 160x160 pixels, # which is the resolution of the output tiles. This allows the 30x30 m pixels in each window to be summed # into 0.04x0.04 degree rasters. -def rewindow(tile_id, download_pattern_name, no_upload): +def rewindow(tile_id, download_pattern_name): # start time start = datetime.datetime.now() # These tiles have the tile_id after the pattern - if download_pattern_name in [cn.pattern_pixel_area, cn.pattern_tcd, cn.pattern_gain, cn.pattern_loss]: - in_tile = "{0}_{1}.tif".format(download_pattern_name, tile_id) - out_tile = "{0}_rewindow_{1}.tif".format(download_pattern_name, tile_id) + if download_pattern_name in [cn.pattern_pixel_area, cn.pattern_tcd, cn.pattern_loss]: + in_tile = f'{download_pattern_name}_{tile_id}.tif' + out_tile = f'{download_pattern_name}_rewindow_{tile_id}.tif' else: - in_tile = "{0}_{1}.tif".format(tile_id, download_pattern_name) - out_tile = "{0}_{1}_rewindow.tif".format(tile_id, download_pattern_name) + in_tile = f'{tile_id}_{download_pattern_name}.tif' + out_tile = f'{tile_id}_{download_pattern_name}_rewindow.tif' check_memory() # Only rewindows if the tile exists if os.path.exists(in_tile): - print_log("{0} exists. Rewindowing to {1} at {2}x{3} pixel windows...".format(in_tile, out_tile, cn.agg_pixel_window, cn.agg_pixel_window)) + print_log(f'{in_tile} exists. Rewindowing to {out_tile} with {cn.agg_pixel_window}x{cn.agg_pixel_window} pixel windows...') # Just using gdalwarp inflated the output rasters about 10x, even with COMPRESS=LZW. # Solution was to use gdal_translate instead, although, for unclear reasons, this still inflates the size @@ -1449,7 +1362,10 @@ def rewindow(tile_id, download_pattern_name, no_upload): else: - print_log("{} does not exist. Not rewindowing".format(in_tile)) + print_log(f'{in_tile} does not exist. Not rewindowing') # Prints information about the tile that was just processed - end_of_fx_summary(start, tile_id, "{}_rewindow".format(download_pattern_name), no_upload) + end_of_fx_summary(start, tile_id, "{}_rewindow".format(download_pattern_name)) + + +