From 6c679e5edaf7302ec00b78d87dfb56a6c12a2b55 Mon Sep 17 00:00:00 2001 From: CJ Yetman - RMI Date: Wed, 14 Aug 2024 01:25:56 +0200 Subject: [PATCH 1/2] import and adapt the `run_pacta` script --- plots.R | 361 +++++++++++++++++++++++++++++++++++++++++++++++++ run_pacta.R | 382 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 743 insertions(+) create mode 100644 run_pacta.R diff --git a/plots.R b/plots.R index 1a2eabea..62e5f3a1 100644 --- a/plots.R +++ b/plots.R @@ -119,3 +119,364 @@ plot_match_success_rate <- function(data, } plot } + + +generate_individual_outputs <- function(data, + matched_prioritized, + output_directory, + target_type = c("tms", "sda"), + group_id, + scenario_source, + scenario, + region = "global", + sector, + start_year, + time_horizon) { + + # match input values + target_type <- match.arg(target_type) + + target_scenario <- paste0("target_", scenario) + + # validate input values + validate_input_args_generate_individual_outputs( + output_directory = output_directory, + group_id = group_id, + scenario_source = scenario_source, + target_scenario = target_scenario, + region = region, + sector = sector, + start_year = start_year, + time_horizon = time_horizon + ) + + # validate input data + validate_input_data_generate_individual_outputs( + data = data, + matched_prioritized = matched_prioritized, + target_type = target_type + ) + + # create sub directory for the selected institute + dir.create(file.path(output_directory, group_id), showWarnings = FALSE) + + data <- data %>% + dplyr::filter( + group_id == .env$group_id, + scenario_source == .env$scenario_source, + region == .env$region, + sector %in% .env$sector + ) + + matched_prioritized <- matched_prioritized %>% + dplyr::filter( + group_id == .env$group_id, + sector %in% .env$sector + ) + + if (target_type == "tms") { + # plot tech mix for given sector + data_techmix <- data %>% + dplyr::filter( + .data$metric %in% c("projected", "corporate_economy", .env$target_scenario), + dplyr::between(.data$year, .env$start_year, .env$start_year + .env$time_horizon) + ) %>% + dplyr::mutate( + label = case_when( + .data$metric == "projected" ~ "Portfolio", + .data$metric == "corporate_economy" ~ "Corporate Economy", + .data$metric == .env$target_scenario ~ glue::glue("{r2dii.plot::to_title(toupper(.env$scenario))} Scenario") + ) + ) %>% + r2dii.plot::prep_techmix( + span_5yr = TRUE + ) + + plot_techmix <- data_techmix %>% + r2dii.plot::plot_techmix() + + # colors in tech mix plot set to make technologies more distinguishable + if (sector == "automotive") { + plot_techmix <- plot_techmix + + ggplot2::scale_fill_manual( + values = c("#4a5e54", "#d0d7e1", "#1b324f", "#00c082"), + labels = c("ICE", "Hybrid", "Fuelcell","Electric") + ) + } else if (sector == "power") { + plot_techmix <- plot_techmix + + ggplot2::scale_fill_manual( + values = c("#4a5e54", "#d0d7e1", "#a63d57", "#f2e06e", "#1b324f", "#00c082"), + labels = paste(c("Coal", "Oil", "Gas", "Nuclear", "Hydro", "Renewables"), "Cap.") + ) + } + + # export tech mix + data_techmix %>% + readr::write_csv( + file = file.path( + output_directory, + group_id, + glue::glue("data_tech_mix_{sector}_{group_id}.csv") + ), + na = "" + ) + + ggplot2::ggsave( + filename = glue::glue("plot_tech_mix_{sector}_{group_id}.png"), + plot = plot_techmix, + device = "png", + path = file.path(output_directory, group_id) + ) + + # plot trajectory charts for all available techs in given sector + technologies_in_sector <- r2dii.data::increasing_or_decreasing %>% + dplyr::filter(.data$sector == .env$sector) %>% + dplyr::pull(.data$technology) + + technologies_to_plot <- data %>% + dplyr::filter( + .data$metric == .env$target_scenario, + .data$technology %in% .env$technologies_in_sector + ) %>% + dplyr::distinct(.data$technology) %>% + dplyr::arrange(.data$technology) %>% + dplyr::pull() + + for (i in 1:length(technologies_to_plot)) { + data_trajectory <- data %>% + dplyr::filter( + .data$technology == .env$technologies_to_plot[i], + dplyr::between(.data$year, .env$start_year, .env$start_year + .env$time_horizon) + ) %>% + r2dii.plot::prep_trajectory( + convert_label = r2dii.plot::recode_metric_trajectory, + span_5yr = TRUE, + value_col = "percentage_of_initial_production_by_scope" + ) + + if (sector == "power") { + y_lab_trajectory <- "Normalized Capacity" + } else { + y_lab_trajectory <- "Normalized Production" + } + + plot_trajectory <- data_trajectory %>% + r2dii.plot::plot_trajectory( + center_y = TRUE, + perc_y_scale = TRUE + ) + + ggplot2::xlab("Year") + + ggplot2::ylab(y_lab_trajectory) + + # export trajectory chart + data_trajectory %>% + readr::write_csv( + file = file.path( + output_directory, + group_id, + glue::glue("data_trajectory_{sector}_{technologies_to_plot[i]}_{group_id}.csv") + ), + na = "" + ) + + ggplot2::ggsave( + filename = glue::glue("plot_trajectory_{sector}_{technologies_to_plot[i]}_{group_id}.png"), + plot = plot_trajectory, + device = "png", + path = file.path(output_directory, group_id) + ) + } + } else { + # plot convergence chart for given sector + adjusted_scenario <- paste0("adjusted_scenario_", scenario) + + data_emission_intensity <- data %>% + dplyr::filter( + dplyr::between( + .data$year, + .env$start_year, + .env$start_year + .env$time_horizon) + ) %>% + dplyr::filter( + .data$emission_factor_metric %in% c( + "projected", + "corporate_economy", + .env$target_scenario, + .env$adjusted_scenario + ) + ) %>% + dplyr::mutate( + emission_factor_metric = factor( + .data$emission_factor_metric, + levels = c( + "projected", + "corporate_economy", + .env$target_scenario, + .env$adjusted_scenario + ) + ) + ) %>% + r2dii.plot::prep_emission_intensity( + span_5yr = TRUE + ) + + colours_scale <- c( + "dark_blue", + "green", + "orange", + "grey" + ) + + if (sector == "cement") { + y_lab_emissions_intensity <- "Tons of CO2 per Ton of Cement Produced" + } else if (sector == "steel") { + y_lab_emissions_intensity <- "Tons of CO2 per Ton of Steel Produced" + } else if (sector == "aviation") { + y_lab_emissions_intensity <- "Tons of CO2 per Passenger Kilometer" + } else { + y_lab_emissions_intensity <- "Emission Factor Value" + } + + plot_emission_intensity <- data_emission_intensity %>% + r2dii.plot::plot_emission_intensity() + + r2dii.plot::scale_colour_r2dii( + colour_labels = colours_scale, + labels = c( + "Portfolio", + "Corporate Economy", + glue::glue("Target {r2dii.plot::to_title(toupper(scenario))}"), + glue::glue("Adjusted Scenario {r2dii.plot::to_title(toupper(scenario))}") + ) + ) + + ggplot2::xlab("Year") + + ggplot2::ylab(y_lab_emissions_intensity) + + # export convergence chart + data_emission_intensity %>% + readr::write_csv( + file = file.path( + output_directory, + group_id, + glue::glue("data_emission_intensity_{sector}_{group_id}.csv") + ), + na = "" + ) + + ggplot2::ggsave( + filename = glue::glue("plot_emission_intensity_{sector}_{group_id}.png"), + plot = plot_emission_intensity, + device = "png", + path = file.path(output_directory, group_id) + ) + } + companies_included <- matched_prioritized %>% + dplyr::select( + "group_id", "name_abcd", "sector_abcd", "loan_size_outstanding", + "loan_size_outstanding_currency", "loan_size_credit_limit", + "loan_size_credit_limit_currency" + ) + + companies_included %>% + readr::write_csv( + file = file.path( + output_directory, + group_id, + glue::glue("companies_included_{sector}_{group_id}.csv") + ), + na = "" + ) + +} + + +validate_input_args_generate_individual_outputs <- function(output_directory, + group_id, + scenario_source, + target_scenario, + region, + sector, + start_year, + time_horizon) { + if (!length(output_directory) == 1) { + stop("Argument output_directory must be of length 1. Please check your input.") + } + if (!inherits(output_directory, "character")) { + stop("Argument output_directory must be of class character. Please check your input.") + } + if (!length(group_id) == 1) { + stop("Argument group_id must be of length 1. Please check your input.") + } + if (!length(scenario_source) == 1) { + stop("Argument scenario_source must be of length 1. Please check your input.") + } + if (!inherits(scenario_source, "character")) { + stop("Argument scenario_source must be of class character. Please check your input.") + } + if (!length(target_scenario) == 1) { + stop("Argument target_scenario must be of length 1. Please check your input.") + } + if (!inherits(target_scenario, "character")) { + stop("Argument target_scenario must be of class character. Please check your input.") + } + if (!length(region) == 1) { + stop("Argument region must be of length 1. Please check your input.") + } + if (!inherits(region, "character")) { + stop("Argument region must be of class character. Please check your input.") + } + if (!length(sector) == 1) { + stop("Argument sector must be of length 1. Please check your input.") + } + if (!inherits(sector, "character")) { + stop("Argument sector must be of class character. Please check your input.") + } + if (!length(start_year) == 1) { + stop("Argument start_year must be of length 1. Please check your input.") + } + if (!inherits(start_year, "integer")) { + stop("Argument start_year must be of class integer. Please check your input.") + } + if (!length(time_horizon) == 1) { + stop("Argument time_horizon must be of length 1. Please check your input.") + } + if (!inherits(time_horizon, "integer")) { + stop("Argument time_horizon must be of class integer. Please check your input.") + } + + invisible() +} + + +validate_input_data_generate_individual_outputs <- function(data, + matched_prioritized, + target_type) { + if (target_type == "sda") { + validate_data_has_expected_cols( + data = data, + expected_columns = c( + "sector", "year", "region", "scenario_source", "emission_factor_metric", + "emission_factor_value", "group_id" + ) + ) + } else if (target_type == "tms") { + validate_data_has_expected_cols( + data = data, + expected_columns = c( + "sector", "technology", "year", "region", "scenario_source", "metric", + "production", "technology_share", "scope", + "percentage_of_initial_production_by_scope", "group_id" + ) + ) + } + + validate_data_has_expected_cols( + data = matched_prioritized, + expected_columns = c( + "group_id", "name_abcd", "sector", "sector_abcd", "loan_size_outstanding", + "loan_size_outstanding_currency", "loan_size_credit_limit", + "loan_size_credit_limit_currency" + ) + ) + + invisible() +} diff --git a/run_pacta.R b/run_pacta.R new file mode 100644 index 00000000..e326f140 --- /dev/null +++ b/run_pacta.R @@ -0,0 +1,382 @@ +# load packages---- +# library(dotenv) +library(dplyr) +library(pacta.multi.loanbook.analysis) +library(r2dii.analysis) +library(r2dii.data) +library(r2dii.plot) + + +# source helpers---- +source("expected_columns.R") +source("plots.R") + +# load config---- +config <- config::get() + +input_path_scenario <- config$directories$dir_scenario +input_dir_abcd <- config$directories$dir_abcd +input_path_matched <- config$directories$dir_matched + +input_path_scenario_tms <- file.path(input_path_scenario, config$file_names$filename_scenario_tms) +input_path_scenario_sda <- file.path(input_path_scenario, config$file_names$filename_scenario_sda) + +input_path_abcd <- file.path(input_dir_abcd, "abcd_final.csv") + +output_path <- config$directories$dir_output +output_path_standard <- file.path(output_path, "standard") + +scenario_source_input <- config$project_parameters$scenario_source +scenario_select <- config$project_parameters$scenario_select +region_select <- config$project_parameters$region_select +start_year <- config$project_parameters$start_year +time_frame_select <- config$project_parameters$time_frame +apply_sector_split <- config$sector_split$apply_sector_split +if (is.null(apply_sector_split)) { apply_sector_split <- FALSE } +sector_split_type_select <- config$sector_split$sector_split_type +remove_inactive_companies <- config$prepare_abcd$remove_inactive_companies +if (is.null(remove_inactive_companies)) { remove_inactive_companies <- FALSE } + +# if a sector split is applied, write results into a directory that states the type +if (apply_sector_split) { + output_path_standard <- file.path(output_path, sector_split_type_select, "standard") +} + +dir.create(output_path_standard, recursive = TRUE) + +# TODO: add check if all files exist, resort to test files if not + +# load input data---- +region_isos_select <- r2dii.data::region_isos %>% + dplyr::filter( + .data$source == .env$scenario_source_input, + .data$region %in% .env$region_select + ) + +scenario_input_tms <- readr::read_csv( + input_path_scenario_tms, + col_types = col_types_scenario_tms, + col_select = dplyr::all_of(col_select_scenario_tms) +) + +scenario_input_sda <- readr::read_csv( + input_path_scenario_sda, + col_types = col_types_scenario_sda, + col_select = dplyr::all_of(col_select_scenario_sda) +) + +abcd <- readr::read_csv( + input_path_abcd, + col_select = dplyr::all_of(cols_abcd), + col_types = col_types_abcd_final +) + +# read matched and prioritized loan book---- +list_matched_prio <- list.files(input_path_matched)[grepl("matched_prio_", list.files(input_path_matched))] + +matched_prioritized <- NULL + +# combine all matched loan books into one object to loop over +for (i in list_matched_prio) { + matched_prioritized_i <- readr::read_csv( + file.path(input_path_matched, i), + col_types = col_types_matched_prioritized, + col_select = dplyr::all_of(col_select_matched_prioritized) + ) + + matched_prioritized <- matched_prioritized %>% + dplyr::bind_rows(matched_prioritized_i) +} + +# meta loan book---- +# aggregate all individual loan books into one meta loan book and add that to +# the full list of loan books +matched_prioritized_meta <- matched_prioritized %>% + dplyr::mutate( + id_loan = paste0(.data$id_loan, "_", .data$group_id), + group_id = "meta_loanbook" + ) + +matched_prioritized <- matched_prioritized %>% + dplyr::bind_rows(matched_prioritized_meta) + +# generate all P4B outputs---- +unique_loanbooks_matched <- unique(matched_prioritized$group_id) + +## generate SDA outputs---- +results_sda_total <- NULL + +# generate SDA results for each individual loan book, including the meta loan book +for (i in unique_loanbooks_matched) { + matched_i <- matched_prioritized %>% + dplyr::filter(.data$group_id == i) %>% + dplyr::select(-"group_id") + + results_sda_i <- matched_i %>% + r2dii.analysis::target_sda( + abcd = abcd, + co2_intensity_scenario = scenario_input_sda, + region_isos = region_isos_select + ) %>% + dplyr::mutate(group_id = .env$i) + + results_sda_total <- results_sda_total %>% + dplyr::bind_rows(results_sda_i) +} + +# write SDA results to csv +results_sda_total %>% + readr::write_csv( + file.path(output_path_standard, "sda_results_all_groups.csv"), + na = "" + ) + + +## generate TMS outputs---- + +results_tms_total <- NULL + +# generate TMS results for each individual loan book, including the meta loan book +for (i in unique_loanbooks_matched) { + matched_i <- matched_prioritized %>% + dplyr::filter(.data$group_id == i) %>% + dplyr::select(-"group_id") + + results_tms_i <- matched_i %>% + r2dii.analysis::target_market_share( + abcd = abcd, + scenario = scenario_input_tms, + region_isos = region_isos_select + ) %>% + dplyr::mutate(group_id = .env$i) + + results_tms_total <- results_tms_total %>% + dplyr::bind_rows(results_tms_i) +} + +# write TMS results to csv +results_tms_total %>% + readr::write_csv( + file.path(output_path_standard, "tms_results_all_groups.csv"), + na = "" + ) + +# generate P4B plots---- + +## retrieve set of unique groups to loop over---- +unique_groups_tms <- unique(results_tms_total$group_id) +unique_groups_sda <- unique(results_sda_total$group_id) + +## run automatic result generation ---------- + +### automotive---- +sector_select <- "automotive" +for (tms_i in unique_groups_tms) { + available_rows <- results_tms_total %>% + dplyr::filter( + group_id == tms_i, + scenario_source == scenario_source_input, + grepl(scenario_select, .data$metric), + region == region_select, + sector == sector_select + ) %>% + nrow() + if (available_rows > 0) { + generate_individual_outputs( + data = results_tms_total, + matched_prioritized = matched_prioritized, + output_directory = output_path_standard, + target_type = "tms", + group_id = tms_i, + scenario_source = scenario_source_input, + scenario = scenario_select, + region = region_select, + sector = sector_select, + start_year = start_year, + time_horizon = time_frame_select + ) + } else { + next() + } +} +### coal---- +sector_select <- "coal" +for (tms_i in unique_groups_tms) { + available_rows <- results_tms_total %>% + dplyr::filter( + group_id == tms_i, + scenario_source == scenario_source_input, + grepl(scenario_select, .data$metric), + region == region_select, + sector == sector_select + ) %>% + nrow() + if (available_rows > 0) { + generate_individual_outputs( + data = results_tms_total, + matched_prioritized = matched_prioritized, + output_directory = output_path_standard, + target_type = "tms", + group_id = tms_i, + scenario_source = scenario_source_input, + scenario = scenario_select, + region = region_select, + sector = sector_select, + start_year = start_year, + time_horizon = time_frame_select + ) + } else { + next() + } +} +### oil and gas---- +sector_select <- "oil and gas" +for (tms_i in unique_groups_tms) { + available_rows <- results_tms_total %>% + dplyr::filter( + group_id == tms_i, + scenario_source == scenario_source_input, + grepl(scenario_select, .data$metric), + region == region_select, + sector == sector_select + ) %>% + nrow() + if (available_rows > 0) { + generate_individual_outputs( + data = results_tms_total, + matched_prioritized = matched_prioritized, + output_directory = output_path_standard, + target_type = "tms", + group_id = tms_i, + scenario_source = scenario_source_input, + scenario = scenario_select, + region = region_select, + sector = sector_select, + start_year = start_year, + time_horizon = time_frame_select + ) + } else { + next() + } +} +### power---- +sector_select <- "power" +for (tms_i in unique_groups_tms) { + available_rows <- results_tms_total %>% + dplyr::filter( + group_id == tms_i, + scenario_source == scenario_source_input, + grepl(scenario_select, .data$metric), + region == region_select, + sector == sector_select + ) %>% + nrow() + if (available_rows > 0) { + generate_individual_outputs( + data = results_tms_total, + matched_prioritized = matched_prioritized, + output_directory = output_path_standard, + target_type = "tms", + group_id = tms_i, + scenario_source = scenario_source_input, + scenario = scenario_select, + region = region_select, + sector = sector_select, + start_year = start_year, + time_horizon = time_frame_select + ) + } else { + next() + } +} + +### aviation---- +sector_select <- "aviation" +for (sda_i in unique_groups_sda) { + available_rows <- results_sda_total %>% + dplyr::filter( + group_id == sda_i, + scenario_source == scenario_source_input, + grepl(scenario_select, .data$emission_factor_metric), + region == region_select, + sector == sector_select + ) %>% + nrow() + if (available_rows > 0) { + generate_individual_outputs( + data = results_sda_total, + matched_prioritized = matched_prioritized, + output_directory = output_path_standard, + target_type = "sda", + group_id = sda_i, + scenario_source = scenario_source_input, + scenario = scenario_select, + region = region_select, + sector = sector_select, + start_year = start_year, + time_horizon = time_frame_select + ) + } else { + next() + } +} +### cement---- +sector_select <- "cement" +for (sda_i in unique_groups_sda) { + available_rows <- results_sda_total %>% + dplyr::filter( + group_id == sda_i, + scenario_source == scenario_source_input, + grepl(scenario_select, .data$emission_factor_metric), + region == region_select, + sector == sector_select + ) %>% + nrow() + if (available_rows > 0) { + generate_individual_outputs( + data = results_sda_total, + matched_prioritized = matched_prioritized, + output_directory = output_path_standard, + target_type = "sda", + group_id = sda_i, + scenario_source = scenario_source_input, + scenario = scenario_select, + region = region_select, + sector = sector_select, + start_year = start_year, + time_horizon = time_frame_select + ) + } else { + next() + } +} +### steel---- +sector_select <- "steel" +for (sda_i in unique_groups_sda) { + available_rows <- results_sda_total %>% + dplyr::filter( + group_id == sda_i, + scenario_source == scenario_source_input, + grepl(scenario_select, .data$emission_factor_metric), + region == region_select, + sector == sector_select + ) %>% + nrow() + if (available_rows > 0) { + generate_individual_outputs( + data = results_sda_total, + matched_prioritized = matched_prioritized, + output_directory = output_path_standard, + target_type = "sda", + group_id = sda_i, + scenario_source = scenario_source_input, + scenario = scenario_select, + region = region_select, + sector = sector_select, + start_year = start_year, + time_horizon = time_frame_select + ) + } else { + next() + } +} From 5149ad75721224a11e3deec0bd9de396b3671982 Mon Sep 17 00:00:00 2001 From: CJ Yetman Date: Wed, 14 Aug 2024 11:27:47 +0200 Subject: [PATCH 2/2] remove dotenv load --- run_pacta.R | 1 - 1 file changed, 1 deletion(-) diff --git a/run_pacta.R b/run_pacta.R index e326f140..042a2c32 100644 --- a/run_pacta.R +++ b/run_pacta.R @@ -1,5 +1,4 @@ # load packages---- -# library(dotenv) library(dplyr) library(pacta.multi.loanbook.analysis) library(r2dii.analysis)