From ecda8501aa841943acf1d5a90a7d13aa97a686f7 Mon Sep 17 00:00:00 2001
From: Jackson Hoffart <jackson.hoffart@gmail.com>
Date: Mon, 25 Mar 2024 18:01:36 +0100
Subject: [PATCH] feat: `target_market_share` gains extended time-horizon for
 `target_*` outputs, to entire time-horizon of input `scenario` (#481)

---
 R/join_abcd_scenario.R                        | 76 ++++++++++++++++---
 R/summarize_weighted_production.R             | 30 ++++++--
 R/target_market_share.R                       | 25 +++---
 tests/testthat/_snaps/join_abcd_scenario.md   |  6 +-
 tests/testthat/test-join_abcd_scenario.R      | 63 ++++++++++++++-
 .../test-summarize_weighted_production.R      |  2 -
 tests/testthat/test-target_market_share.R     | 14 ++++
 vignettes/production-percent-change.Rmd       |  3 +-
 8 files changed, 182 insertions(+), 37 deletions(-)

diff --git a/R/join_abcd_scenario.R b/R/join_abcd_scenario.R
index 6c0d6fc2..6c6d8b39 100644
--- a/R/join_abcd_scenario.R
+++ b/R/join_abcd_scenario.R
@@ -53,10 +53,63 @@ join_abcd_scenario <- function(data,
     abcd <- add_green_technologies_to_abcd(abcd, scenario)
   }
 
+  abcd <- abcd %>%
+    arrange(.data[["year"]]) %>%
+    mutate(
+      .start_year_abcd = first(.data[["year"]]),
+      .by = c("name_company", "sector", "plant_location")
+      )
+
+  scenario <- scenario %>%
+    arrange(.data[["year"]]) %>%
+    mutate(
+      .start_year_scenario = first(.data[["year"]]),
+      .by = c("sector")
+    )
+
+  abcd_wide <- tidyr::pivot_wider(
+    abcd,
+    names_from = "year",
+    names_sep = "~",
+    values_from = c("production", "emission_factor")
+  )
+
+  scenario_wide <- tidyr::pivot_wider(
+    scenario,
+    names_from = "year",
+    names_sep = "~",
+    values_from = c("tmsr", "smsp")
+  )
+
+  data <- data %>%
+    left_join(abcd_wide, by = abcd_columns(), relationship = "many-to-many") %>%
+    left_join(scenario_wide, by = scenario_columns(), relationship = "many-to-many") %>%
+    warn_if_has_zero_rows("Joining `scenario` outputs 0 rows.")
+
+  data <- tidyr::pivot_longer(
+    data,
+    cols = tidyr::contains("~"),
+    names_sep = "~",
+    names_to = c("metric", "year")
+  )
+
+  data <- dplyr::mutate(data, year = as.integer(.data[["year"]]))
+
+  data <- tidyr::pivot_wider(
+    data,
+    names_from = "metric",
+    values_from = "value"
+  )
+
+  data <- data %>%
+    dplyr::filter(.data[["year"]] >= .data[[".start_year_abcd"]]) %>%
+    dplyr::filter(.data[["year"]] >= .data[[".start_year_scenario"]]) %>%
+    dplyr::mutate(
+      .start_year_abcd = NULL,
+      .start_year_scenario = NULL
+      )
+
   out <- data %>%
-    left_join(abcd, by = abcd_columns(), relationship = "many-to-many") %>%
-    inner_join(scenario, by = scenario_columns(), relationship = "many-to-many") %>%
-    warn_if_has_zero_rows("Joining `scenario` outputs 0 rows.") %>%
     mutate(plant_location = tolower(.data$plant_location)) %>%
     inner_join(
       region_isos,
@@ -76,17 +129,21 @@ check_portfolio_abcd_scenario <- function(valid_matches, abcd, scenario) {
   walk_(names(abcd_columns()), ~ check_no_value_is_missing(valid_matches, .x))
 
   check_crucial_names(
-    abcd, c("name_company", "plant_location", unname(scenario_columns()))
+    abcd,
+    c("name_company", "plant_location", "year", unname(scenario_columns()))
   )
   walk_(
-    c("name_company", unname(scenario_columns())),
+    c("name_company", "year", unname(scenario_columns())),
     ~ check_no_value_is_missing(abcd, .x)
   )
 
 
-  check_crucial_names(scenario, c(scenario_columns(), "scenario_source", "region"))
+  check_crucial_names(
+    scenario,
+    c(scenario_columns(), "scenario_source", "region", "year")
+    )
   walk_(
-    c(scenario_columns(), "scenario_source", "region"),
+    c(scenario_columns(), "scenario_source", "region", "year"),
     ~ check_no_value_is_missing(scenario, .x)
   )
 
@@ -105,7 +162,7 @@ add_green_technologies_to_abcd <- function(data, scenario) {
 
   increasing_techs_not_in_abcd <- dplyr::filter(
     increasing_techs_in_scenario,
-    !(technology %in% unique(data$technology))
+    !(.data[["technology"]] %in% unique(data$technology))
   )
 
   green_rows_to_add <- data %>%
@@ -137,7 +194,6 @@ abcd_columns <- function() {
 scenario_columns <- function() {
   c(
     sector_abcd = "sector",
-    technology = "technology",
-    year = "year"
+    technology = "technology"
   )
 }
diff --git a/R/summarize_weighted_production.R b/R/summarize_weighted_production.R
index 04a17b89..fab668f2 100644
--- a/R/summarize_weighted_production.R
+++ b/R/summarize_weighted_production.R
@@ -58,13 +58,13 @@ summarize_weighted_production_ <- function(data, ..., use_credit_limit = FALSE,
 
   old_groups <- dplyr::groups(data)
 
-  crucial <- c("production", "sector_abcd", "year", "technology")
+  crucial <- c("sector_abcd", "year", "technology")
 
   if (with_targets) {
     crucial <- c(crucial, "production_target")
   }
 
-  check_crucial_names(data, crucial)
+  check_crucial_names(data, c("production", crucial))
   walk_(crucial, ~ check_no_value_is_missing(data, .x))
 
   data <- data %>%
@@ -187,10 +187,24 @@ summarize_unweighted_emission_factor <- function(data, ...) {
 }
 
 calculate_weighted_loan_metric <- function(data, metric) {
-  crucial <- c(metric, "loan_weight")
 
-  check_crucial_names(data, crucial)
-  walk_(crucial, ~ check_no_value_is_missing(data, .x))
+  check_crucial_names(data, c(metric, "loan_weight"))
+
+  allowed_missing_vals <- c(
+    "production",
+    "production_target",
+    "technology_share",
+    "technology_share_target",
+    "percent_change"
+  )
+
+  if (metric %in% allowed_missing_vals) {
+    no_missing_vals <- "loan_weight"
+  } else {
+    no_missing_vals <- c("loan_weight", metric)
+  }
+
+  walk_(no_missing_vals, ~ check_no_value_is_missing(data, .x))
 
   data %>%
     mutate(
@@ -234,9 +248,9 @@ add_loan_weight <- function(data, use_credit_limit) {
 }
 
 add_percent_change <- function(data) {
-  crucial <- c("production", "sector_abcd", "year", "technology", "scenario")
+  crucial <- c("sector_abcd", "year", "technology", "scenario")
 
-  check_crucial_names(data, crucial)
+  check_crucial_names(data, c("production", crucial))
   walk_(crucial, ~ check_no_value_is_missing(data, .x))
 
   check_zero_initial_production(data)
@@ -303,7 +317,7 @@ add_technology_share_target <- function(data) {
 
 check_zero_initial_production <- function(data) {
   companies_with_zero_initial_production <- data %>%
-    group_by(.data$technology, .data$name_abcd, .data$year) %>%
+    group_by(.data$technology, .data$name_abcd) %>%
     arrange(.data$year) %>%
     filter(.data$year == first(.data$year)) %>%
     summarize(production_at_start_year = sum(.data$production)) %>%
diff --git a/R/target_market_share.R b/R/target_market_share.R
index 7645da91..33b42dab 100644
--- a/R/target_market_share.R
+++ b/R/target_market_share.R
@@ -85,8 +85,6 @@ target_market_share <- function(data,
     is.logical(weight_production)
   )
 
-  abcd <- fill_and_warn_na(abcd, "production")
-
   region_isos <- change_to_lowercase_and_warn(region_isos, "isos")
 
   warn_if_by_company_and_weight_production(by_company, weight_production)
@@ -95,8 +93,23 @@ target_market_share <- function(data,
 
   check_input_for_crucial_columns(data, abcd, scenario)
 
+  abcd <- fill_and_warn_na(abcd, "production")
+  abcd <- dplyr::summarize(
+    abcd,
+    production = sum(.data[["production"]]),
+    .by = -"production"
+  )
+
   data <- aggregate_by_name_abcd(data)
 
+  if ("production" %in% colnames(scenario)) {
+    warn("The column `production` has been removed from the dataset `scenario`.
+         The columns `tmsr` and `smsp` will be used instead",
+         class = "scenario_production_column_removed")
+    scenario <- dplyr::select(scenario, -all_of("production"))
+    return(scenario)
+  }
+
   data <- join_abcd_scenario(
     data,
     abcd,
@@ -109,14 +122,6 @@ target_market_share <- function(data,
     return(empty_target_market_share_output())
   }
 
-  if ("production" %in% colnames(scenario)) {
-    warn("The column `production` has been removed from the dataset `scenario`.
-         The columns `tmsr` and `smsp` will be used instead",
-         class = "scenario_production_column_removed")
-    scenario <- dplyr::select(scenario, -all_of("production"))
-    return(scenario)
-  }
-
   crucial_groups <- c(
     "id_loan",
     "loan_size_outstanding",
diff --git a/tests/testthat/_snaps/join_abcd_scenario.md b/tests/testthat/_snaps/join_abcd_scenario.md
index 2f41a763..a8029a9d 100644
--- a/tests/testthat/_snaps/join_abcd_scenario.md
+++ b/tests/testthat/_snaps/join_abcd_scenario.md
@@ -10,7 +10,7 @@
       # i abbreviated name: 1: loan_size_outstanding_currency
       # i 18 more variables: loan_size_credit_limit_currency <chr>, id_2dii <chr>,
       #   level <chr>, score <dbl>, sector <chr>, name_abcd <chr>, sector_abcd <chr>,
-      #   technology <chr>, year <dbl>, production <dbl>, emission_factor <dbl>,
-      #   plant_location <chr>, is_ultimate_owner <lgl>, scenario <chr>,
-      #   region <chr>, tmsr <dbl>, smsp <dbl>, scenario_source <chr>
+      #   technology <chr>, plant_location <chr>, is_ultimate_owner <lgl>,
+      #   scenario <chr>, region <chr>, scenario_source <chr>, year <int>,
+      #   production <dbl>, emission_factor <dbl>, tmsr <dbl>, smsp <dbl>
 
diff --git a/tests/testthat/test-join_abcd_scenario.R b/tests/testthat/test-join_abcd_scenario.R
index 746013ca..4f1dc955 100644
--- a/tests/testthat/test-join_abcd_scenario.R
+++ b/tests/testthat/test-join_abcd_scenario.R
@@ -117,8 +117,8 @@ test_that("outputs a number of rows equal to matches by `scenario_source`", {
   matching_2 <- join_abcd_scenario(
     fake_matched(),
     abcd = fake_abcd(plant_location = "a"),
-    scenario = fake_scenario(region = "b", scenario_source = c("c", "c")),
-    region_isos = tibble(isos = "a", region = "b", source = "c")
+    scenario = fake_scenario(region = "b", scenario_source = c("c", "d")),
+    region_isos = tibble(isos = "a", region = "b", source = c("c", "d"))
   )
   expect_equal(nrow(matching_2), 2L)
 })
@@ -217,7 +217,7 @@ test_that("warns 0-rows caused by scenario or region_isos", {
 
 test_that("include/excludes `plant_location` inside/outside a region", {
   # styler: off
-  region_isos_toy <- tribble(
+  region_isos_toy <- dplyr::tribble(
     ~region,         ~isos, ~source,
     "north america", "us",  "demo_2020",
     "oecd",          "de",  "demo_2020",
@@ -259,3 +259,60 @@ test_that("outputs the same with upper/lower abcd$sector or abcd$technology", {
   out_upper <- join_abcd_scenario(matched, upper_technology, scenario, regions)
   expect_equal(out_upper, out_lower)
 })
+
+test_that("outputs full timeline of scenario #157", {
+
+  out <- join_abcd_scenario(
+    fake_matched(),
+    fake_abcd(year = 2020),
+    fake_scenario(scenario = "1.5c-scen", year = c(2020, 2025)),
+    region_isos = region_isos_stable
+  )
+
+  expect_equal(max(out$year), 2025L)
+
+})
+
+test_that("doesnt output sectors that aren't in input data #157", {
+
+  out <- join_abcd_scenario(
+    fake_matched(sector_abcd = "power"),
+    fake_abcd(sector = "power", technology = "hydrocap"),
+    fake_scenario(
+      sector = c("power", "automotive"),
+      technology = c("hydrocap", "ice")
+      ),
+    region_isos = region_isos_stable
+  )
+
+  expect_equal(unique(out$sector_abcd), "power")
+
+})
+
+test_that("only extend timeline beyond t0 of abcd #157", {
+
+  out <- join_abcd_scenario(
+    fake_matched(name_abcd = c("a", "b")),
+    fake_abcd(
+      name_company = c("a", "b"),
+      year = c(2020, 2021)
+      ),
+    fake_scenario(year = c(2020, 2021)),
+    region_isos = region_isos_stable
+  )
+
+  out_a <- filter(out, name_abcd == "a")
+
+  out_b <- filter(out, name_abcd == "b")
+
+  expect_equal(max(out_a$year), 2021L)
+  expect_equal(min(out_b$year), 2021L)
+
+  out_a <- split(out_a, out_a$year)
+  out_b <- split(out_b, out_b$year)
+
+  expect_equal(out_a$`2020`$production, 1)
+  expect_equal(out_a$`2021`$production, NA_real_)
+  expect_equal(out_b$`2021`$production, 1)
+
+})
diff --git a/tests/testthat/test-summarize_weighted_production.R b/tests/testthat/test-summarize_weighted_production.R
index e72f8b4e..60d4f344 100644
--- a/tests/testthat/test-summarize_weighted_production.R
+++ b/tests/testthat/test-summarize_weighted_production.R
@@ -67,7 +67,6 @@ test_that("with NAs in crucial columns errors with informative message", {
   expect_error_crucial_NAs("id_loan")
   expect_error_crucial_NAs("loan_size_outstanding")
   expect_error_crucial_NAs("loan_size_credit_limit", use_credit_limit = TRUE)
-  expect_error_crucial_NAs("production")
   expect_error_crucial_NAs("sector_abcd")
   expect_error_crucial_NAs("technology")
   expect_error_crucial_NAs("year")
@@ -260,7 +259,6 @@ test_that("with NAs in crucial columns errors with informative message", {
   expect_error_crucial_NAs("id_loan")
   expect_error_crucial_NAs("loan_size_outstanding")
   expect_error_crucial_NAs("loan_size_credit_limit", use_credit_limit = TRUE)
-  expect_error_crucial_NAs("production")
   expect_error_crucial_NAs("sector_abcd")
   expect_error_crucial_NAs("technology")
   expect_error_crucial_NAs("year")
diff --git a/tests/testthat/test-target_market_share.R b/tests/testthat/test-target_market_share.R
index 5637cc71..ca772404 100644
--- a/tests/testthat/test-target_market_share.R
+++ b/tests/testthat/test-target_market_share.R
@@ -1442,3 +1442,17 @@ test_that("correctly splits scenario names with hyphen #425", {
   expect_equal(unique(out$metric), "target_1.5c-scen")
 
 })
+
+test_that("outputs `target` for full timeline of scenario #157", {
+
+  out <- target_market_share(
+    fake_matched(),
+    fake_abcd(year = 2020),
+    fake_scenario(scenario = "1.5c-scen", year = c(2020, 2025)),
+    region_isos_stable
+  ) %>%
+    filter(grepl("target", metric))
+
+  expect_equal(max(out$year), 2025L)
+
+})
diff --git a/vignettes/production-percent-change.Rmd b/vignettes/production-percent-change.Rmd
index 9cbfb2fa..10a56d28 100644
--- a/vignettes/production-percent-change.Rmd
+++ b/vignettes/production-percent-change.Rmd
@@ -34,7 +34,8 @@ master <- loanbook_demo %>%
   join_abcd_scenario(
     abcd = abcd_demo,
     scenario = scenario_demo_2020,
-    region_isos = region_isos_demo
+    region_isos = region_isos_demo,
+    add_green_technologies = FALSE
   )
 
 summarize_weighted_production(master)