From ecda8501aa841943acf1d5a90a7d13aa97a686f7 Mon Sep 17 00:00:00 2001 From: Jackson Hoffart Date: Mon, 25 Mar 2024 18:01:36 +0100 Subject: [PATCH] feat: `target_market_share` gains extended time-horizon for `target_*` outputs, to entire time-horizon of input `scenario` (#481) --- R/join_abcd_scenario.R | 76 ++++++++++++++++--- R/summarize_weighted_production.R | 30 ++++++-- R/target_market_share.R | 25 +++--- tests/testthat/_snaps/join_abcd_scenario.md | 6 +- tests/testthat/test-join_abcd_scenario.R | 63 ++++++++++++++- .../test-summarize_weighted_production.R | 2 - tests/testthat/test-target_market_share.R | 14 ++++ vignettes/production-percent-change.Rmd | 3 +- 8 files changed, 182 insertions(+), 37 deletions(-) diff --git a/R/join_abcd_scenario.R b/R/join_abcd_scenario.R index 6c0d6fc2..6c6d8b39 100644 --- a/R/join_abcd_scenario.R +++ b/R/join_abcd_scenario.R @@ -53,10 +53,63 @@ join_abcd_scenario <- function(data, abcd <- add_green_technologies_to_abcd(abcd, scenario) } + abcd <- abcd %>% + arrange(.data[["year"]]) %>% + mutate( + .start_year_abcd = first(.data[["year"]]), + .by = c("name_company", "sector", "plant_location") + ) + + scenario <- scenario %>% + arrange(.data[["year"]]) %>% + mutate( + .start_year_scenario = first(.data[["year"]]), + .by = c("sector") + ) + + abcd_wide <- tidyr::pivot_wider( + abcd, + names_from = "year", + names_sep = "~", + values_from = c("production", "emission_factor") + ) + + scenario_wide <- tidyr::pivot_wider( + scenario, + names_from = "year", + names_sep = "~", + values_from = c("tmsr", "smsp") + ) + + data <- data %>% + left_join(abcd_wide, by = abcd_columns(), relationship = "many-to-many") %>% + left_join(scenario_wide, by = scenario_columns(), relationship = "many-to-many") %>% + warn_if_has_zero_rows("Joining `scenario` outputs 0 rows.") + + data <- tidyr::pivot_longer( + data, + cols = tidyr::contains("~"), + names_sep = "~", + names_to = c("metric", "year") + ) + + data <- dplyr::mutate(data, year = as.integer(.data[["year"]])) + + data <- tidyr::pivot_wider( + data, + names_from = "metric", + values_from = "value" + ) + + data <- data %>% + dplyr::filter(.data[["year"]] >= .data[[".start_year_abcd"]]) %>% + dplyr::filter(.data[["year"]] >= .data[[".start_year_scenario"]]) %>% + dplyr::mutate( + .start_year_abcd = NULL, + .start_year_scenario = NULL + ) + out <- data %>% - left_join(abcd, by = abcd_columns(), relationship = "many-to-many") %>% - inner_join(scenario, by = scenario_columns(), relationship = "many-to-many") %>% - warn_if_has_zero_rows("Joining `scenario` outputs 0 rows.") %>% mutate(plant_location = tolower(.data$plant_location)) %>% inner_join( region_isos, @@ -76,17 +129,21 @@ check_portfolio_abcd_scenario <- function(valid_matches, abcd, scenario) { walk_(names(abcd_columns()), ~ check_no_value_is_missing(valid_matches, .x)) check_crucial_names( - abcd, c("name_company", "plant_location", unname(scenario_columns())) + abcd, + c("name_company", "plant_location", "year", unname(scenario_columns())) ) walk_( - c("name_company", unname(scenario_columns())), + c("name_company", "year", unname(scenario_columns())), ~ check_no_value_is_missing(abcd, .x) ) - check_crucial_names(scenario, c(scenario_columns(), "scenario_source", "region")) + check_crucial_names( + scenario, + c(scenario_columns(), "scenario_source", "region", "year") + ) walk_( - c(scenario_columns(), "scenario_source", "region"), + c(scenario_columns(), "scenario_source", "region", "year"), ~ check_no_value_is_missing(scenario, .x) ) @@ -105,7 +162,7 @@ add_green_technologies_to_abcd <- function(data, scenario) { increasing_techs_not_in_abcd <- dplyr::filter( increasing_techs_in_scenario, - !(technology %in% unique(data$technology)) + !(.data[["technology"]] %in% unique(data$technology)) ) green_rows_to_add <- data %>% @@ -137,7 +194,6 @@ abcd_columns <- function() { scenario_columns <- function() { c( sector_abcd = "sector", - technology = "technology", - year = "year" + technology = "technology" ) } diff --git a/R/summarize_weighted_production.R b/R/summarize_weighted_production.R index 04a17b89..fab668f2 100644 --- a/R/summarize_weighted_production.R +++ b/R/summarize_weighted_production.R @@ -58,13 +58,13 @@ summarize_weighted_production_ <- function(data, ..., use_credit_limit = FALSE, old_groups <- dplyr::groups(data) - crucial <- c("production", "sector_abcd", "year", "technology") + crucial <- c("sector_abcd", "year", "technology") if (with_targets) { crucial <- c(crucial, "production_target") } - check_crucial_names(data, crucial) + check_crucial_names(data, c("production", crucial)) walk_(crucial, ~ check_no_value_is_missing(data, .x)) data <- data %>% @@ -187,10 +187,24 @@ summarize_unweighted_emission_factor <- function(data, ...) { } calculate_weighted_loan_metric <- function(data, metric) { - crucial <- c(metric, "loan_weight") - check_crucial_names(data, crucial) - walk_(crucial, ~ check_no_value_is_missing(data, .x)) + check_crucial_names(data, c(metric, "loan_weight")) + + allowed_missing_vals <- c( + "production", + "production_target", + "technology_share", + "technology_share_target", + "percent_change" + ) + + if (metric %in% allowed_missing_vals) { + no_missing_vals <- "loan_weight" + } else { + no_missing_vals <- c("loan_weight", metric) + } + + walk_(no_missing_vals, ~ check_no_value_is_missing(data, .x)) data %>% mutate( @@ -234,9 +248,9 @@ add_loan_weight <- function(data, use_credit_limit) { } add_percent_change <- function(data) { - crucial <- c("production", "sector_abcd", "year", "technology", "scenario") + crucial <- c("sector_abcd", "year", "technology", "scenario") - check_crucial_names(data, crucial) + check_crucial_names(data, c("production", crucial)) walk_(crucial, ~ check_no_value_is_missing(data, .x)) check_zero_initial_production(data) @@ -303,7 +317,7 @@ add_technology_share_target <- function(data) { check_zero_initial_production <- function(data) { companies_with_zero_initial_production <- data %>% - group_by(.data$technology, .data$name_abcd, .data$year) %>% + group_by(.data$technology, .data$name_abcd) %>% arrange(.data$year) %>% filter(.data$year == first(.data$year)) %>% summarize(production_at_start_year = sum(.data$production)) %>% diff --git a/R/target_market_share.R b/R/target_market_share.R index 7645da91..33b42dab 100644 --- a/R/target_market_share.R +++ b/R/target_market_share.R @@ -85,8 +85,6 @@ target_market_share <- function(data, is.logical(weight_production) ) - abcd <- fill_and_warn_na(abcd, "production") - region_isos <- change_to_lowercase_and_warn(region_isos, "isos") warn_if_by_company_and_weight_production(by_company, weight_production) @@ -95,8 +93,23 @@ target_market_share <- function(data, check_input_for_crucial_columns(data, abcd, scenario) + abcd <- fill_and_warn_na(abcd, "production") + abcd <- dplyr::summarize( + abcd, + production = sum(.data[["production"]]), + .by = -"production" + ) + data <- aggregate_by_name_abcd(data) + if ("production" %in% colnames(scenario)) { + warn("The column `production` has been removed from the dataset `scenario`. + The columns `tmsr` and `smsp` will be used instead", + class = "scenario_production_column_removed") + scenario <- dplyr::select(scenario, -all_of("production")) + return(scenario) + } + data <- join_abcd_scenario( data, abcd, @@ -109,14 +122,6 @@ target_market_share <- function(data, return(empty_target_market_share_output()) } - if ("production" %in% colnames(scenario)) { - warn("The column `production` has been removed from the dataset `scenario`. - The columns `tmsr` and `smsp` will be used instead", - class = "scenario_production_column_removed") - scenario <- dplyr::select(scenario, -all_of("production")) - return(scenario) - } - crucial_groups <- c( "id_loan", "loan_size_outstanding", diff --git a/tests/testthat/_snaps/join_abcd_scenario.md b/tests/testthat/_snaps/join_abcd_scenario.md index 2f41a763..a8029a9d 100644 --- a/tests/testthat/_snaps/join_abcd_scenario.md +++ b/tests/testthat/_snaps/join_abcd_scenario.md @@ -10,7 +10,7 @@ # i abbreviated name: 1: loan_size_outstanding_currency # i 18 more variables: loan_size_credit_limit_currency , id_2dii , # level , score , sector , name_abcd , sector_abcd , - # technology , year , production , emission_factor , - # plant_location , is_ultimate_owner , scenario , - # region , tmsr , smsp , scenario_source + # technology , plant_location , is_ultimate_owner , + # scenario , region , scenario_source , year , + # production , emission_factor , tmsr , smsp diff --git a/tests/testthat/test-join_abcd_scenario.R b/tests/testthat/test-join_abcd_scenario.R index 746013ca..4f1dc955 100644 --- a/tests/testthat/test-join_abcd_scenario.R +++ b/tests/testthat/test-join_abcd_scenario.R @@ -117,8 +117,8 @@ test_that("outputs a number of rows equal to matches by `scenario_source`", { matching_2 <- join_abcd_scenario( fake_matched(), abcd = fake_abcd(plant_location = "a"), - scenario = fake_scenario(region = "b", scenario_source = c("c", "c")), - region_isos = tibble(isos = "a", region = "b", source = "c") + scenario = fake_scenario(region = "b", scenario_source = c("c", "d")), + region_isos = tibble(isos = "a", region = "b", source = c("c", "d")) ) expect_equal(nrow(matching_2), 2L) }) @@ -217,7 +217,7 @@ test_that("warns 0-rows caused by scenario or region_isos", { test_that("include/excludes `plant_location` inside/outside a region", { # styler: off - region_isos_toy <- tribble( + region_isos_toy <- dplyr::tribble( ~region, ~isos, ~source, "north america", "us", "demo_2020", "oecd", "de", "demo_2020", @@ -259,3 +259,60 @@ test_that("outputs the same with upper/lower abcd$sector or abcd$technology", { out_upper <- join_abcd_scenario(matched, upper_technology, scenario, regions) expect_equal(out_upper, out_lower) }) + +test_that("outputs full timeline of scenario #157", { + + out <- join_abcd_scenario( + fake_matched(), + fake_abcd(year = 2020), + fake_scenario(scenario = "1.5c-scen", year = c(2020, 2025)), + region_isos = region_isos_stable + ) + + expect_equal(max(out$year), 2025L) + +}) + +test_that("doesnt output sectors that aren't in input data #157", { + + out <- join_abcd_scenario( + fake_matched(sector_abcd = "power"), + fake_abcd(sector = "power", technology = "hydrocap"), + fake_scenario( + sector = c("power", "automotive"), + technology = c("hydrocap", "ice") + ), + region_isos = region_isos_stable + ) + + expect_equal(unique(out$sector_abcd), "power") + +}) + +test_that("only extend timeline beyond t0 of abcd #157", { + + out <- join_abcd_scenario( + fake_matched(name_abcd = c("a", "b")), + fake_abcd( + name_company = c("a", "b"), + year = c(2020, 2021) + ), + fake_scenario(year = c(2020, 2021)), + region_isos = region_isos_stable + ) + + out_a <- filter(out, name_abcd == "a") + + out_b <- filter(out, name_abcd == "b") + + expect_equal(max(out_a$year), 2021L) + expect_equal(min(out_b$year), 2021L) + + out_a <- split(out_a, out_a$year) + out_b <- split(out_b, out_b$year) + + expect_equal(out_a$`2020`$production, 1) + expect_equal(out_a$`2021`$production, NA_real_) + expect_equal(out_b$`2021`$production, 1) + +}) diff --git a/tests/testthat/test-summarize_weighted_production.R b/tests/testthat/test-summarize_weighted_production.R index e72f8b4e..60d4f344 100644 --- a/tests/testthat/test-summarize_weighted_production.R +++ b/tests/testthat/test-summarize_weighted_production.R @@ -67,7 +67,6 @@ test_that("with NAs in crucial columns errors with informative message", { expect_error_crucial_NAs("id_loan") expect_error_crucial_NAs("loan_size_outstanding") expect_error_crucial_NAs("loan_size_credit_limit", use_credit_limit = TRUE) - expect_error_crucial_NAs("production") expect_error_crucial_NAs("sector_abcd") expect_error_crucial_NAs("technology") expect_error_crucial_NAs("year") @@ -260,7 +259,6 @@ test_that("with NAs in crucial columns errors with informative message", { expect_error_crucial_NAs("id_loan") expect_error_crucial_NAs("loan_size_outstanding") expect_error_crucial_NAs("loan_size_credit_limit", use_credit_limit = TRUE) - expect_error_crucial_NAs("production") expect_error_crucial_NAs("sector_abcd") expect_error_crucial_NAs("technology") expect_error_crucial_NAs("year") diff --git a/tests/testthat/test-target_market_share.R b/tests/testthat/test-target_market_share.R index 5637cc71..ca772404 100644 --- a/tests/testthat/test-target_market_share.R +++ b/tests/testthat/test-target_market_share.R @@ -1442,3 +1442,17 @@ test_that("correctly splits scenario names with hyphen #425", { expect_equal(unique(out$metric), "target_1.5c-scen") }) + +test_that("outputs `target` for full timeline of scenario #157", { + + out <- target_market_share( + fake_matched(), + fake_abcd(year = 2020), + fake_scenario(scenario = "1.5c-scen", year = c(2020, 2025)), + region_isos_stable + ) %>% + filter(grepl("target", metric)) + + expect_equal(max(out$year), 2025L) + +}) diff --git a/vignettes/production-percent-change.Rmd b/vignettes/production-percent-change.Rmd index 9cbfb2fa..10a56d28 100644 --- a/vignettes/production-percent-change.Rmd +++ b/vignettes/production-percent-change.Rmd @@ -34,7 +34,8 @@ master <- loanbook_demo %>% join_abcd_scenario( abcd = abcd_demo, scenario = scenario_demo_2020, - region_isos = region_isos_demo + region_isos = region_isos_demo, + add_green_technologies = FALSE ) summarize_weighted_production(master)