diff --git a/DESCRIPTION b/DESCRIPTION index 5fcfa052..c02f8f88 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: IEATools Type: Package Title: Tools for Working with International Energy Agency Data -Version: 0.1.75 -Date: 2024-02-03 +Version: 0.1.76 +Date: 2024-12-09 Authors@R: c(person(given = "Matthew Kuperus", family = "Heun", role = c("aut", "cre"), email = "matthew.heun@me.com", comment = c(ORCID = "0000-0002-7438-214X")), @@ -15,7 +15,7 @@ License: MIT + file LICENSE Language: en-US Encoding: UTF-8 LazyData: true -RoxygenNote: 7.3.1 +RoxygenNote: 7.3.2 Roxygen: list(markdown = TRUE) Depends: R (>= 2.10) Config/testthat/edition: 3 diff --git a/NAMESPACE b/NAMESPACE index f9ff9bdd..6ad22b17 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -44,6 +44,7 @@ export(gather_producer_autoproducer) export(iea_df) export(iea_file_OK) export(insert_after) +export(load_electricity_heat_output) export(load_eta_fu_data) export(load_fu_allocation_data) export(load_phi_constants_table) @@ -72,10 +73,13 @@ export(slurp_iea_to_raw_df) export(sort_iea_df) export(specify_all) export(specify_bunkers) +export(specify_distribution_losses) +export(specify_electricity_grid) export(specify_interface_industries) export(specify_non_energy_use) export(specify_primary_production) export(specify_production_to_resources) +export(specify_renewable_plants) export(specify_tp_eiou) export(split_oil_gas_extraction_eiou) export(stack_final_useful_df) diff --git a/NEWS.md b/NEWS.md index dae75b62..d0401636 100644 --- a/NEWS.md +++ b/NEWS.md @@ -8,7 +8,34 @@ Cite all releases with doi [10.5281/zenodo.5086371](https://doi.org/10.5281/zeno which always resolves to the latest release. -## IEATools 0.1.75 (2024-02-03) +## IEATools 0.1.76 (2024-12-09) + +* New function `load_electricity_heat_output()` + creates a data frame of IEA electricity and heat output + information. +* Change names of columns to avoid PostgreSQL + database conflicts. + PostgreSQL doesn't like column names containing "." + because "." is the separator for "schema.table". +* New `specify_distribution_losses()` function + to specify distribution industries, within which transportation and + distribution losses occur. +* Argument added to `add_nuclear_industry()` to ascribe some EIOU + to the nuclear industry according to its output share. +* New `specify_renewable_plants()` function + to specify renewable energy plants. +* New `specify_electricity_grid()` function + to add an electricity grid. +* The `.iea_file` argument to `slurp_iea_to_raw_df()` + is now vectorized, which will enable sending + a vector of country IEA data files in `.iea_file`. +* Fixed several tests for new column names and added + a couple new tests for new features. + * Now at 1365 tests, all passing. + * Test coverage remains at 100%. + + +## IEATools 0.1.75 (2024-02-03) [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.10613604.svg)](https://doi.org/10.5281/zenodo.10613604) * `extend_to_useful_helper()` and `extend_to_useful()` now return additional matrices, namely diff --git a/R/data.R b/R/data.R index 8c0c31ac..9d0c624e 100644 --- a/R/data.R +++ b/R/data.R @@ -127,7 +127,7 @@ #' \item{C_eiou}{The name of the EIOU allocation rows in final-to-useful templates.} #' \item{C_Y}{The name of the final demand allocation rows in final-to-useful templates.} #' \item{C_perc}{The name of the percentage allocation rows in final-to-useful templates.} -#' \item{e_dot_max}{The name of the maximum E.dot column in final-to-useful templates.} +#' \item{e_dot_max}{The name of the maximum Edot column in final-to-useful templates.} #' \item{e_dot_dest}{The name of the column representing the destination for energy flows in final-to-useful templates.} #' \item{e_dot_perc}{The name of the energy flow percentage column in final-to-useful templates.} #' \item{e_dot_machine}{The name of the column representing energy flow into a machine in final-to-useful templates.} @@ -181,6 +181,7 @@ #' \item{sector}{The type of entity that absorbs final demand.} #' \item{product}{The inputs and outputs of industries.} #' \item{unit}{Units of physical measurement such as ktoe or TJ.} +#' \item{other}{Used for, e.g., the 1-dimension of a vector.} #' } #' #' @examples @@ -478,6 +479,34 @@ "electricity_products" +#' Heat products +#' +#' A string vector containing names of products classified by the IEA as heat products. +#' +#' @format A string vector with `r length(heat_products)` entries. +#' \describe{ +#' \item{heat}{The string identifier for Heat.} +#' } +#' +#' @examples +#' heat_products +"heat_products" + + +#' Nuclear products +#' +#' A string vector containing names of products classified by the IEA as nuclear products. +#' +#' @format A string vector with `r length(nuclear_products)` entries. +#' \describe{ +#' \item{nuclear}{The string identifier for Nuclear} +#' } +#' +#' @examples +#' nuclear_products +"nuclear_products" + + #' Non-energy products #' #' A string vector containing names of products classified as "Non-energy" by @@ -1149,8 +1178,8 @@ #' \item{phi_constants_names}{The string name of the tab in the Excel file containing the constant phi values table.} #' \item{product_colname}{The string name of the energy product column in the constant phi values table.} #' \item{phi_colname}{The string name of the constant phi value column in the constant phi values table.} -#' \item{is_useful_colname}{The string name of the is.useful column in the constant phi values table.} -#' \item{phi_source_colname}{The string name of the phi.source column in the completed phi values table.} +#' \item{is_useful_colname}{The string name of the IsUseful column in the constant phi values table.} +#' \item{phi_source_colname}{The string name of the PhiSource column in the completed phi values table.} #' } #' #' @examples @@ -1311,4 +1340,67 @@ "Fixed_RUSEST_heat" +#' Renewable energy industries names +#' +#' A string list containing the names of renewable industries added with the `specify_renewable_plants()` function. +#' +#' @format A string list with `r length(renewable_industries)` entries. +#' \describe{ +#' \item{geothermal_plants}{The string name of geothermal plants.} +#' \item{hydro_plants}{The string name of hydropower plants.} +#' \item{solar_pv_plants}{The string name of solar photovoltaics plants.} +#' \item{solar_th_plants}{The string name of solar thermal plants.} +#' \item{oceanic_plants}{The string name of oceanic power plants.} +#' \item{wind_power_plants}{The string name of wind power plants.} +#' } +#' +#' @examples +#' renewable_industries +"renewable_industries" + +#' Grid industries names +#' +#' A string list containing the names of the grid industries that can be added. +#' +#' @format A string list with `r length(grid_industries)` entries. +#' \describe{ +#' \item{electricity_grid}{The string name of the electricity grid industry.} +#' } +#' +#' @examples +#' grid_industries +"grid_industries" + + +#' Distribution industry name +#' +#' A character string containing the name of the distribution industry +#' +#' @format A character string +#' \describe{ +#' A character string containing the name of the distribution industry +#' } +#' +#' @examples +#' distribution_industry +"distribution_industry" + + +#' Electricity and heat output names +#' +#' A character vector containing the prefixes for +#' electricity and heat outputs. +#' +#' @format A character string +#' \describe{ +#' \item{electricity_output_prefix}{The string prefix for electricity output.} +#' \item{heat_output_prefix}{The string prefix for heat output.} +#' \item{output_machine_delimiter}{The string delimiter between output energy flow and the machine name.} +#' \item{input_product}{The name of the input product column.} +#' \item{output_machine_delimiter}{The name of the output product column.} +#' } +#' +#' @examples +#' elec_heat_output +"elec_heat_output" diff --git a/R/electricity_heat_output.R b/R/electricity_heat_output.R new file mode 100644 index 00000000..c7be2be5 --- /dev/null +++ b/R/electricity_heat_output.R @@ -0,0 +1,85 @@ +#' Extract a data frame of electricity and heat output information +#' +#' @param .iea_file The IEA data file to read +#' @param electricity_output_prefix The prefix for electricity output rows. +#' Default is `IEATools::elec_heat_output$electricity_output_prefix`. +#' @param heat_output_prefix The prefix for heat output rows. +#' Default is `IEATools::elec_heat_output$heat_output_prefix`. +#' @param country,year,flow,product,e_dot,unit_colname See `IEATools::iea_cols`. +#' @param input_colname,output_colname,output_machine_delimiter See `IEATools::elec_heat_output`. +#' @param machine_colname See `IEATools::template_cols`. +#' @param unit The desired output unit. Default is "TJ". Best not to change this. +#' @param electricity A string that defines electricity output. +#' Default is "Electricity". +#' @param total See `IEATools::memo_aggregation_product_prefixes`. +#' @param memo See `IEATools::memo_aggregation_flow_prefixes`. +#' +#' @return A data frame of electricity and heat output data. +#' +#' @export +#' +#' @examples +#' sample_iea_data_path() |> +#' load_electricity_heat_output() +load_electricity_heat_output <- function(.iea_file = NULL, + electricity_output_prefix = IEATools::elec_heat_output$electricity_output_prefix, + heat_output_prefix = IEATools::elec_heat_output$heat_output_prefix, + country = IEATools::iea_cols$country, + year = IEATools::iea_cols$year, + flow = IEATools::iea_cols$flow, + product = IEATools::iea_cols$product, + e_dot = IEATools::iea_cols$e_dot, + input_colname = IEATools::elec_heat_output$input_product, + output_colname = IEATools::elec_heat_output$output_product, + machine_colname = IEATools::template_cols$machine, + unit_colname = IEATools::iea_cols$unit, + unit = "TJ", + electricity = "Electricity", + output_machine_delimiter = IEATools::elec_heat_output$output_machine_delimiter, + total = IEATools::memo_aggregation_product_prefixes$total, + memo = IEATools::memo_aggregation_flow_prefixes$memo) { + + iea_data <- .iea_file |> + iea_df() |> + rename_iea_df_cols() |> + clean_iea_whitespace() |> + use_iso_countries() + elec_heat_data <- iea_data |> + dplyr::filter((startsWith(.data[[flow]], electricity_output_prefix) | + startsWith(.data[[flow]], heat_output_prefix)), + .data[[product]] != total, + !startsWith(.data[[product]], memo)) |> + tidyr::separate_wider_delim(dplyr::all_of(flow), delim = "-", names = c(output_colname, machine_colname)) |> + dplyr::mutate( + # Capitalize first letter of machine name. + "{machine_colname}" := stringr::str_to_sentence(.data[[machine_colname]]), + # Select only the first word in the output column, either "Electricity" or "Heat" + "{output_colname}" := stringr::word(.data[[output_colname]], 1), + # Fix chp + "{machine_colname}" := stringr::str_replace(.data[[machine_colname]], "chp", "CHP") + ) |> + dplyr::rename( + "{input_colname}" := dplyr::all_of(product) + ) |> + tidyr::pivot_longer(cols = !dplyr::all_of(c(country, output_colname, machine_colname, input_colname)), + names_to = year, + values_to = e_dot) |> + dplyr::filter(.data[[e_dot]] != 0) |> + dplyr::mutate( + # Convert GWh to TJ for electricity only. + "{e_dot}" := dplyr::case_when( + # Convert for electricity only. + .data[[output_colname]] == electricity ~ .data[[e_dot]] * 3.6, + # Heat is already in TJ. + TRUE ~ .data[[e_dot]] + ), + # Set the unit string. + "{unit_colname}" := unit, + # Year should be numeric + "{year}" := as.numeric(.data[[year]]) + ) |> + # Update the order of columns. + dplyr::select(dplyr::all_of(c(country, year, input_colname, machine_colname, output_colname, e_dot, unit_colname))) + + return(elec_heat_data) +} \ No newline at end of file diff --git a/R/energy_balance.R b/R/energy_balance.R index 5f084402..9a6522cb 100644 --- a/R/energy_balance.R +++ b/R/energy_balance.R @@ -8,7 +8,7 @@ #' before calling this function. #' Grouping should _definitely_ be done on the `Product` column. #' Typically, grouping is also done on -#' `Country`, `Method`, `Year`, `Energy.type`, `Last.stage`, etc. columns. +#' `Country`, `Method`, `Year`, `EnergyType`, `Last.stage`, etc. columns. #' Grouping should _not_ be done on the `Ledger.side` column or the `Flow` column. #' To test whether all balances are OK, #' use the `tidy_iea_df_balanced()` function. @@ -167,7 +167,7 @@ tidy_iea_df_balanced <- function(.tidy_iea_df_balances, #' The `Product` column should definitely be included in `grouping_vars`, #' but any other grouping level is fine. #' Typically, grouping should be done by -#' `Country`, `Year`, `Energy.type`, `Last.stage`, `Product`, etc. columns. +#' `Country`, `Year`, `EnergyType`, `Last.stage`, `Product`, etc. columns. #' Grouping should _not_ be done on the `flow_aggregation_point`, `Flow`, or `ledger_side` columns. #' #' Internally, this function calls [calc_tidy_iea_df_balances()] @@ -202,7 +202,7 @@ tidy_iea_df_balanced <- function(.tidy_iea_df_balances, #' # Remember that grouping should _not_ be done on #' # the `flow_aggregation_point`, `Flow`, or `ledger_side` columns. #' grouped_iea_df <- load_tidy_iea_df() %>% -#' group_by(Country, Method, Energy.type, Last.stage, Year, Product) +#' group_by(Country, Method, EnergyType, LastStage, Year, Product) #' # unbalanced will not be balanced, because the IEA data are not in perfect balance. #' # Because we have grouped by key variables, #' # `calc_tidy_iea_df_balances` provides energy balances @@ -296,4 +296,4 @@ fix_tidy_iea_df_balances <- function(.tidy_iea_df, dplyr::filter(!(.data[[e_dot]] == 0)) } return(out) -} \ No newline at end of file +} diff --git a/R/final_to_useful.R b/R/final_to_useful.R index b417977c..3fd1131c 100644 --- a/R/final_to_useful.R +++ b/R/final_to_useful.R @@ -6,9 +6,9 @@ #' This function uses information in a filled allocation template (created by `write_fu_allocation_template()`) #' to create allocation matrices (**C**). #' -#' rownames of the **C** matrices are taken from the `Ef.product` and `Destination` columns of `.fu_allocation_table` -#' and have the form "`Ef.product` `r RCLabels::arrow_notation[["pref_end"]]` `Destination`". -#' colnames of the **C** matrices are taken from the `Machine` and `Eu.product` columns of `.fu_allocation_table` +#' rownames of the **C** matrices are taken from the `EfProduct` and `Destination` columns of `.fu_allocation_table` +#' and have the form "`EfProduct` `r RCLabels::arrow_notation[["pref_end"]]` `Destination`". +#' colnames of the **C** matrices are taken from the `Machine` and `EuProduct` columns of `.fu_allocation_table` #' and have the form "machine `r RCLabels::arrow_notation[["pref_end"]]` useful energy form". #' #' **C** matrices are created for both energy industry own use @@ -150,9 +150,9 @@ form_C_mats <- function(.fu_allocation_table, prepped <- gathered %>% # Create row and column names. dplyr::mutate( - # Row names come from Ef.product -> Destination for both C_Y and C_EIOU. + # Row names come from EfProduct -> Destination for both C_Y and C_EIOU. "{rownames}" := RCLabels::paste_pref_suff(pref = .data[[ef_product]], suff = .data[[destination]], notation = notation), - # Column names come from Machine -> Eu.product for both C_Y and C_EIOU. + # Column names come from Machine -> EuProduct for both C_Y and C_EIOU. "{colnames}" := RCLabels::paste_pref_suff(pref = .data[[machine]], suff = .data[[eu_product]], notation = notation), # Row types are Product -> Industry # "{rowtypes}" := product, @@ -712,7 +712,8 @@ extend_to_useful <- function(.sutdata = NULL, # Get the detail Y_u matrix Y_fu_details_mat <- res_Y[[details_fu]] # Add a NULL U_EIOU_u_details matrix - U_eiou_fu_details_mat <- NULL + # U_eiou_fu_details_mat <- NULL + U_eiou_fu_details_mat <- matsbyname::hadamardproduct_byname(U_useful_mat, 0) # Now check to see if we have any EIOU. # If so, make further adjustments to the matrices. diff --git a/R/fixes.R b/R/fixes.R index 38503f4c..a5df8f05 100644 --- a/R/fixes.R +++ b/R/fixes.R @@ -52,10 +52,10 @@ #' # Compare production of Primary solid biofuels in 1991 #' example_tidy_iea_df %>% #' filter(Year == 1991, Flow == "Production") %>% -#' select("E.dot", "Unit") +#' select("Edot", "Unit") #' fixed %>% #' filter(Year == 1991, Flow == "Production") %>% -#' select("E.dot", "Unit") +#' select("Edot", "Unit") fix_GHA_psb <- function(.tidy_iea_df, country = IEATools::iea_cols$country, year = IEATools::iea_cols$year, @@ -111,12 +111,12 @@ fix_GHA_psb <- function(.tidy_iea_df, #' dplyr::filter(Flow %in% c("Main activity producer electricity plants", #' "Autoproducer electricity plants"), #' Product == "Electricity") %>% -#' dplyr::select("Year", "Flow", "E.dot", "Unit") +#' dplyr::select("Year", "Flow", "Edot", "Unit") #' fixed %>% #' dplyr::filter(Flow %in% c("Main activity producer electricity plants", #' "Autoproducer electricity plants"), #' Product == "Electricity") %>% -#' dplyr::select("Year", "Flow", "E.dot", "Unit") +#' dplyr::select("Year", "Flow", "Edot", "Unit") fix_COL_WRLD_electricity <- function(.tidy_iea_df, country = IEATools::iea_cols$country, year = IEATools::iea_cols$year, @@ -165,12 +165,12 @@ fix_COL_WRLD_electricity <- function(.tidy_iea_df, #' dplyr::filter(Flow %in% c("Production", #' "Charcoal production plants"), #' Product %in% c("Charcoal", "Primary solid biofuels")) |> -#' dplyr::select("Year", "Flow", "Product", "E.dot", "Unit") +#' dplyr::select("Year", "Flow", "Product", "Edot", "Unit") #' fixed %>% #' dplyr::filter(Flow %in% c("Production", #' "Charcoal production plants"), #' Product %in% c("Charcoal", "Primary solid biofuels")) |> -#' dplyr::select("Year", "Flow", "Product", "E.dot", "Unit") +#' dplyr::select("Year", "Flow", "Product", "Edot", "Unit") fix_OAMR_cpp <- function(.tidy_iea_df, country = IEATools::iea_cols$country, year = IEATools::iea_cols$year, @@ -219,12 +219,12 @@ fix_OAMR_cpp <- function(.tidy_iea_df, #' dplyr::filter(Flow %in% c("Production", #' "Gas works"), #' Product %in% c("Gas works gas", "Natural gas")) |> -#' dplyr::select("Year", "Flow", "Product", "E.dot", "Unit") +#' dplyr::select("Year", "Flow", "Product", "Edot", "Unit") #' fixed %>% #' dplyr::filter(Flow %in% c("Production", #' "Gas works"), #' Product %in% c("Gas works gas", "Natural gas")) |> -#' dplyr::select("Year", "Flow", "Product", "E.dot", "Unit") +#' dplyr::select("Year", "Flow", "Product", "Edot", "Unit") fix_OAMR_gw <- function(.tidy_iea_df, country = IEATools::iea_cols$country, year = IEATools::iea_cols$year, diff --git a/R/initialize.R b/R/initialize.R index 28574251..e9a7141a 100644 --- a/R/initialize.R +++ b/R/initialize.R @@ -29,9 +29,17 @@ #' 2. Arrange the columns in the following order: "COUNTRY", "FLOW", "PRODUCT", followed by years. #' 3. Change to the unit (ktoe or TJ) desired. #' 4. Save the results in .csv format. (Saving may take a while.) +#' +#' This function is vectorized over `.iea_file`. #' -#' @param .iea_file The path to the raw IEA data file for which quality assurance is desired +#' @param .iea_file The path to the raw IEA data file for which quality assurance is desired. +#' Can be a vector of file paths, in which case +#' each file is loaded sequentially and stacked together +#' with [dplyr::bind_rows()]. #' @param text A string containing text to be parsed as an IEA file. +#' Can be a vector of text strings, in which case +#' each string is processed sequentially and stacked together +#' with [dplyr::bind_rows()]. #' @param expected_1st_line_start The expected start of the first line of `iea_file`. Default is ",,TIME". #' @param country The name of the country column. #' Default is "COUNTRY". @@ -71,86 +79,95 @@ slurp_iea_to_raw_df <- function(.iea_file = NULL, assertthat::assert_that(xor(!is.null(.iea_file), !is.null(text)), msg = "need to supply only one of .iea_file or text arguments to iea_df") if (!is.null(.iea_file)) { - conn <- file(.iea_file, open = "rt") # open file connection + conns <- lapply(.iea_file, FUN = function(this_iea_file) { + file(this_iea_file, open = "rt") # open file connection + }) + text <- rep_len("", length(.iea_file)) } else { # text has been provided, probably for testing purposes. - conn <- textConnection(text) + conns <- list(textConnection(text)) + text <- list(text) + .iea_file <- list(NULL) } - # Check if the first line has the simple format - first_two_lines <- conn %>% readLines(n = 2) - close(conn) - assertthat::assert_that(length(first_two_lines) == 2, msg = "couldn't read 2 lines in iea_df") - # first_line <- first_two_lines[[1]] - # second_line <- first_two_lines[[2]] - # Eliminate any quotes that are present - first_line <- gsub(pattern = '\\"', replacement = "", x = first_two_lines[[1]]) - second_line <- gsub(pattern = '\\"', replacement = "", x = first_two_lines[[2]]) - # Ensure that we have an expected format for the first line or two in first_two_lines. - assertthat::assert_that(first_line %>% startsWith(expected_simple_start) | - (first_line %>% startsWith(expected_1st_line_start) & second_line %>% startsWith(expected_2nd_line_start)), - msg = paste0(".iea_file must start with ", - "first line: '", expected_simple_start, "', ", - "or ", - "first line: '", expected_1st_line_start, "' and ", - "second line: '", expected_2nd_line_start, "'. ", - "Instead, found ", - "first line: '", first_line, "', ", - "second line: '", second_line, "'.")) - if (first_line %>% startsWith(expected_simple_start)) { - # We have the simple start to the file, so we can assume a one-line header. - if (!is.null(.iea_file)) { - IEAData_withheader <- data.table::fread(file = .iea_file, header = TRUE, sep = ",") - } else { - IEAData_withheader <- data.table::fread(text = text, header = TRUE, sep = ",") - } - } else if (first_line %>% startsWith(expected_1st_line_start) & - second_line %>% startsWith(expected_2nd_line_start)) { - # We have the complicated start to the file, so go through some additional work to apply the proper header - # to the file. - if (second_line %>% endsWith(",")) { - # The file may have been opened in Excel and resaved. - # When that occurs, many commas are appended to the 2nd line. - # Strip out these commas before proceeding further. - # The pattern ,*$ means "match any number (*) of commas (,) at the end of the line ($)". - second_line <- sub(pattern = ",*$", replacement = "", second_line) + Map(conns, .iea_file, text, f = function(this_conn, this_iea_file, this_text) { + # Check if the first line has the simple format + first_two_lines <- this_conn |> + readLines(n = 2) + close(this_conn) + assertthat::assert_that(length(first_two_lines) == 2, msg = "couldn't read 2 lines in iea_df") + # first_line <- first_two_lines[[1]] + # second_line <- first_two_lines[[2]] + # Eliminate any quotes that are present + first_line <- gsub(pattern = '\\"', replacement = "", x = first_two_lines[[1]]) + second_line <- gsub(pattern = '\\"', replacement = "", x = first_two_lines[[2]]) + # Ensure that we have an expected format for the first line or two in first_two_lines. + assertthat::assert_that(first_line %>% startsWith(expected_simple_start) | + (first_line %>% startsWith(expected_1st_line_start) & second_line %>% startsWith(expected_2nd_line_start)), + msg = paste0(".iea_file must start with ", + "first line: '", expected_simple_start, "', ", + "or ", + "first line: '", expected_1st_line_start, "' and ", + "second line: '", expected_2nd_line_start, "'. ", + "Instead, found ", + "first line: '", first_line, "', ", + "second line: '", second_line, "'.")) + if (first_line %>% startsWith(expected_simple_start)) { + # We have the simple start to the file, so we can assume a one-line header. + if (!is.null(this_iea_file)) { + IEAData_withheader <- data.table::fread(file = this_iea_file, header = TRUE, sep = ",") + } else { + IEAData_withheader <- data.table::fread(text = this_text, header = TRUE, sep = ",") + } + } else if (first_line %>% startsWith(expected_1st_line_start) & + second_line %>% startsWith(expected_2nd_line_start)) { + # We have the complicated start to the file, so go through some additional work to apply the proper header + # to the file. + if (second_line %>% endsWith(",")) { + # The file may have been opened in Excel and resaved. + # When that occurs, many commas are appended to the 2nd line. + # Strip out these commas before proceeding further. + # The pattern ,*$ means "match any number (*) of commas (,) at the end of the line ($)". + second_line <- sub(pattern = ",*$", replacement = "", second_line) + } + if (!is.null(this_iea_file)) { + # Slurp the file. This slurping ignores the header, which we know are the first 2 lines. + # Note that I'm using data.table::fread at the recommendation of + # https://statcompute.wordpress.com/2014/02/11/efficiency-of-importing-large-csv-files-in-r/ + # which indicates this function is significantly faster than other options. + IEAData_noheader <- data.table::fread(file = this_iea_file, header = FALSE, sep = ",", skip = 2, encoding = "Latin-1") + } else { + IEAData_noheader <- data.table::fread(text = this_text, header = FALSE, sep = ",", skip = 2, encoding = "Latin-1") + } + # At this point, the IEAData_noheader data frame has default (meaningless) column names, V1, V2, V3, ... + # Create column names from the header lines that we read previously. + # The code here should be robust to adding more years through time, + # because it simply replaces the first 3 items of the first line + # with appropriate values from the 2nd line. + cnames <- gsub(pattern = expected_1st_line_start, replacement = expected_2nd_line_start, first_line) %>% + strsplit(",") %>% + unlist() + IEAData_withheader <- IEAData_noheader %>% + magrittr::set_names(cnames) } - if (!is.null(.iea_file)) { - # Slurp the file. This slurping ignores the header, which we know are the first 2 lines. - # Note that I'm using data.table::fread at the recommendation of - # https://statcompute.wordpress.com/2014/02/11/efficiency-of-importing-large-csv-files-in-r/ - # which indicates this function is significantly faster than other options. - IEAData_noheader <- data.table::fread(file = .iea_file, header = FALSE, sep = ",", skip = 2, encoding = "Latin-1") - } else { - IEAData_noheader <- data.table::fread(text = text, header = FALSE, sep = ",", skip = 2, encoding = "Latin-1") + # Convert the country column to pure ASCII, if desired. + if (ensure_ascii_countries) { + IEAData_withheader <- IEAData_withheader %>% + dplyr::mutate( + # This hint is from + # https://stackoverflow.com/questions/39148759/remove-accents-from-a-dataframe-column-in-r + # COUNTRY = stringi::stri_trans_general(COUNTRY,id = "Latin-ASCII") + # iconv is much faster than stringi. + # First, convert from latin1 to ascii. + "{country}" := iconv(.data[[country]], from = "latin1", to = "ASCII//TRANSLIT"), + # However, this results in "^o" for o with circumflex as in Côte d'Ivoire. + # So replace those strings with simple "o" + "{country}" := gsub(.data[[country]], pattern = "\\^o", replacement = "o") + ) } - # At this point, the IEAData_noheader data frame has default (meaningless) column names, V1, V2, V3, ... - # Create column names from the header lines that we read previously. - # The code here should be robust to adding more years through time, - # because it simply replaces the first 3 items of the first line - # with appropriate values from the 2nd line. - cnames <- gsub(pattern = expected_1st_line_start, replacement = expected_2nd_line_start, first_line) %>% - strsplit(",") %>% - unlist() - IEAData_withheader <- IEAData_noheader %>% - magrittr::set_names(cnames) - } - # Convert the country column to pure ASCII, if desired. - if (ensure_ascii_countries) { - IEAData_withheader <- IEAData_withheader %>% - dplyr::mutate( - # This hint is from - # https://stackoverflow.com/questions/39148759/remove-accents-from-a-dataframe-column-in-r - # COUNTRY = stringi::stri_trans_general(COUNTRY,id = "Latin-ASCII") - # iconv is much faster than stringi. - # First, convert from latin1 to ascii. - "{country}" := iconv(.data[[country]], from = "latin1", to = "ASCII//TRANSLIT"), - # However, this results in "^o" for o with circumflex as in Côte d'Ivoire. - # So replace those strings with simple "o" - "{country}" := gsub(.data[[country]], pattern = "\\^o", replacement = "o") - ) - } - return(IEAData_withheader) + return(IEAData_withheader) + }) |> + dplyr::bind_rows() } @@ -169,13 +186,17 @@ slurp_iea_to_raw_df <- function(.iea_file = NULL, #' Note that `.iea_file` is read internally with [data.table::fread()] *without* stripping white space. #' #' If `.slurped_iea_df` is supplied, arguments `.iea_file` or `text` are ignored. -#' If `.slurped_iea_df` is absent, +#' If `.slurped_iea_df` is `NULL` (the default), #' either `.iea_file` or `text` are required, and #' the helper function `slurp_iea_to_raw_df()` is called internally #' to load a raw data frame of data. - #' -#' @param .iea_file the path to the raw IEA data file for which quality assurance is desired +#' This function is vectorized over `.iea_file`. +#' +#' @param .iea_file The path to the raw IEA data file for which quality assurance is desired. +#' Can be a vector of file paths, in which case +#' each file is loaded sequentially and stacked together +#' with [dplyr::bind_rows()]. #' @param text a string containing text to be parsed as an IEA file. #' @param expected_1st_line_start the expected start of the first line of `iea_file`. Default is ",,TIME". #' @param expected_2nd_line_start the expected start of the second line of `iea_file`. Default is "COUNTRY,FLOW,PRODUCT". @@ -183,7 +204,7 @@ slurp_iea_to_raw_df <- function(.iea_file = NULL, #' Note that `expected_simple_start` is sometimes encountered in data supplied by the IEA. #' Furthermore, `expected_simple_start` could be the format of the file when somebody "helpfully" fiddles with #' the raw data from the IEA. -#' @param .slurped_iea_df a data frame created by `slurp_iea_to_raw_df()` +#' @param .slurped_iea_df a data frame created by [slurp_iea_to_raw_df()] #' @param country the name of the country column. Default is "COUNTRY". #' @param flow the name of the flow column. Default is "FLOW". #' @param product the name of the product column. Default is "PRODUCT". @@ -310,9 +331,14 @@ iea_file_OK <- function(.iea_file = NULL, #' To further prepare the data frame for use, call [augment_iea_df()], #' passing the output of this function to the `.iea_df` argument of [augment_iea_df()]. #' -#' @param .iea_file a string containing the path to a .csv file of extended energy balances from the IEA. -#' Default is the path to a sample IEA file provided in this package. -#' @param text a character string that can be parsed as IEA extended energy balances. +#' This function is vectorized over `.iea_file`. +#' +#' @param .iea_file A string containing the path to a .csv file of extended energy balances from the IEA. +#' Can be a vector of file paths, in which case +#' each file is loaded sequentially and stacked together +#' with [dplyr::bind_rows()]. +#' Default is the path to a sample IEA file provided in this package. +#' @param text A character string that can be parsed as IEA extended energy balances. #' (This argument is useful for testing.) #' @param expected_1st_line_start the expected start of the first line of `iea_file`. Default is ",,TIME". #' @param expected_2nd_line_start the expected start of the second line of `iea_file`. Default is "COUNTRY,FLOW,PRODUCT". @@ -614,7 +640,7 @@ remove_agg_regions <- function(.iea_df, #' so they are deleted. #' #' The third problem this function solves is that energy type and units are not specified in IEA data. -#' An `Energy.type` column is added with the value of `energy_type_val`. +#' An `EnergyType` column is added with the value of `energy_type_val`. #' (Default is `E`, for energy, as opposed to `X`, which would be exergy.) #' A `Unit` column is added with the value of `unit_val`. #' (Default is "TJ", although any string can be specified in `unit_val`.) @@ -656,40 +682,65 @@ remove_agg_regions <- function(.iea_df, #' Default is "TJ" for terajoule. #' @param supply The string that identifies supply ledger side. #' Default is `IEATools::iea_cols$ledger_side`. -#' @param consumption The string that identifies consumption `Ledger.side`. Default is "Consumption". -#' @param tpes The string that identifies total primary energy supply `Flow.aggregation.point`. Default is "Total primary energy supply". +#' @param consumption The string that identifies consumption `Ledger.side`. +#' Default is "Consumption". +#' @param tpes The string that identifies total primary energy supply `Flow.aggregation.point`. +#' Default is "Total primary energy supply". #' @param tpes_flows A vector of strings that give flows that are aggregated to `Total primary energy supply`. -#' @param tfc_compare A string that identifies the `TFC compare` flow aggregation point. Default is "TFC compare". +#' @param tfc_compare A string that identifies the `TFC compare` flow aggregation point. +#' Default is "TFC compare". #' @param tfc_compare_flows A vector of strings that give `Flow`s that are aggregated to `TFC compare`. -#' @param transfers = A string that identifies transfers in the flow column. Default is "Transfers". -#' @param statistical_differences A string that identifies statistical differences in flow column. Default is "Statistical differences". -#' @param losses The string that indicates losses in the `Flow` column. Default is "Losses". -#' @param transformation_processes The string that indicates transformation processes in the `Flow` column. Default is "Transformation processes". -#' @param tp_flows_suffix The suffix for transformation processes in the `Flow` column. Default is "(transf.)". -#' @param nstp_flows_suffix The suffix for non-specified transformation processes in the `Flow` column. Default is "(transformation)". -#' @param mapep The string that identifies main activity producer electricity plants in the `Flow` column. Default is "Main activity producer electricity plants". -#' @param eiou The string that identifies energy industry own use in the `Flow` column. Default is "Energy industry own use". -#' @param eiou_flows_suffix The suffix for energy industry own use in the `Flow` column. Default is "(energy)". -#' @param coal_mines The string that identifies coal mines in the `Flow` column. Default is "Coal mines". -#' @param non_specified The string that identifies non-specified flows in the `Flow` column. Default is "Non-specified". -#' @param tfc The string that identifies total final consumption in the `Flow` column. Default is "Total final consumption". +#' @param transfers = A string that identifies transfers in the flow column. +#' Default is "Transfers". +#' @param statistical_differences A string that identifies statistical differences in flow column. +#' Default is "Statistical differences". +#' @param losses The string that indicates losses in the `Flow` column. +#' Default is "Losses". +#' @param transformation_processes The string that indicates transformation processes in the `Flow` column. +#' Default is "Transformation processes". +#' @param tp_flows_suffix The suffix for transformation processes in the `Flow` column. +#' Default is "(transf.)". +#' @param nstp_flows_suffix The suffix for non-specified transformation processes in the `Flow` column. +#' Default is "(transformation)". +#' @param mapep The string that identifies main activity producer electricity plants in the `Flow` column. +#' Default is "Main activity producer electricity plants". +#' @param eiou The string that identifies energy industry own use in the `Flow` column. +#' Default is "Energy industry own use". +#' @param eiou_flows_suffix The suffix for energy industry own use in the `Flow` column. +#' Default is "(energy)". +#' @param coal_mines The string that identifies coal mines in the `Flow` column. +#' Default is "Coal mines". +#' @param non_specified The string that identifies non-specified flows in the `Flow` column. +#' Default is "Non-specified". +#' @param tfc The string that identifies total final consumption in the `Flow` column. +#' Default is "Total final consumption". #' @param tfc_flows A vector of strings that give total final consumption in the `Flow` column. -#' @param industry A string that names the industry `Flow.aggregation.point`. Default is "Industry". +#' @param industry A string that names the industry `Flow.aggregation.point`. +#' Default is "Industry". #' @param industry_flows A vector of strings representing `Flow`s to be aggregated in the `Industry` `Flow.aggregation.point`. -#' @param iron_and_steel A string that identifies the iron and steel industry. Default is "Iron and steel". -#' @param mining_and_quarrying A string that identifies the mining and quarrying industry. Default is "Mining and quarrying". -#' @param transport A string that names the transport `Flow.aggregation.point`. Default is "Transport". +#' @param iron_and_steel A string that identifies the iron and steel industry. +#' Default is "Iron and steel". +#' @param mining_and_quarrying A string that identifies the mining and quarrying industry. +#' Default is "Mining and quarrying". +#' @param transport A string that names the transport `Flow.aggregation.point`. +#' Default is "Transport". #' @param transport_flows A vector of strings representing `Flow`s to be aggregated in the `Transport` `Flow.aggregation.point`. -#' @param other A string that names the other `Flow.aggregation.point`. Default is "Other". +#' @param other A string that names the other `Flow.aggregation.point`. +#' Default is "Other". #' @param other_flows A vector of strings representing `Flow`s to be aggregated in the `Other` `Flow.aggregation.point`. -#' @param non_energy A string that names the non-energy `Flow.aggregation.point`. Default is "Non-energy use". +#' @param non_energy A string that names the non-energy `Flow.aggregation.point`. +#' Default is "Non-energy use". #' @param non_energy_flows A list of `Flow`s to be aggregated to the `Non-energy use` `Flow.aggregation.point`. #' @param memo_non_energy_flows A list of `Flow`s to be aggregated to "Memo: Non-energy use in industry". #' Default is `IEATools::memo_non_energy_flows`. -#' @param electricity_output A string that names the electricity output `Flow`. Default is "Electricity output (GWh)". -#' @param electricity_output_flows_prefix A string prefix for `Flow`s to be aggregated in electricity output. Default is "Electricity output (GWh)-". -#' @param heat_output A string that names the heat output `Flow`. Default is "Heat output". -#' @param heat_output_flows_prefix A string prefix for `Flow`s to be aggregated in heat output. Default is "Heat output-". +#' @param electricity_output A string that names the electricity output `Flow`. +#' Default is "Electricity output (GWh)". +#' @param electricity_output_flows_prefix A string prefix for `Flow`s to be aggregated in electricity output. +#' Default is "Electricity output (GWh)-". +#' @param heat_output A string that names the heat output `Flow`. +#' Default is "Heat output". +#' @param heat_output_flows_prefix A string prefix for `Flow`s to be aggregated in heat output. +#' Default is "Heat output-". #' @param .rownum The name of a column created (and destroyed) internally by this function. #' The `.rownum` column temporarily holds row numbers for internal calculations. #' The `.rownum` column is deleted before returning. @@ -1118,7 +1169,9 @@ specify_non_energy_use <- function(.iea_df, # Add the replacement rows. dplyr::bind_rows(to_add) |> # Finally, pivot wider to return. - tidyr::pivot_wider(values_from = .values, names_from = dplyr::all_of(year), values_fill = 0) + tidyr::pivot_wider(values_from = dplyr::all_of(.values), + names_from = dplyr::all_of(year), + values_fill = 0) } @@ -1225,7 +1278,9 @@ tidy_iea_df <- function(.iea_df, #' Each bundled function is called in turn using default arguments. #' See examples for two ways to achieve the same result. #' -#' @param .iea_file The path of the file to be loaded. Default loads example data bundled with the package via [sample_iea_data_path()]. +#' @param .iea_file The path of the file to be loaded. +#' Can be a vector of files to be loaded. +#' Default loads example data bundled with the package via [sample_iea_data_path()]. #' @param unit_val The units for this file. #' Default is "TJ". #' @param remove_zeroes A logical indicating whether data points with the value `0` are to be removed from the output. diff --git a/R/psut.R b/R/psut.R index dec05fb7..43181d1b 100644 --- a/R/psut.R +++ b/R/psut.R @@ -4,7 +4,7 @@ #' `1`s where a product is expressed in the unit and `0`s otherwise. #' #' `.tidy_iea_df` should be grouped as needed, typically on -#' `Country`, `Year`, `Energy.type`, `Last.stage`, etc., but +#' `Country`, `Year`, `EnergyType`, `Last.stage`, etc., but #' _not_ on `Unit`, `Flow` or `Product`. #' `.tidy_iea_df` is typically obtained from `tidy_iea_df()`. #' @@ -600,14 +600,14 @@ replace_null_RUV <- function(.sutmats = NULL, #' collapse_to_tidy_psut() %>% #' spread(key = matnames, value = matvals) %>% #' replace_null_RUV() %>% -#' full_join(S_units, by = c("Method", "Energy.type", "Last.stage", +#' full_join(S_units, by = c("Method", "EnergyType", "LastStage", #' "Country", "Year")) %>% #' gather(key = matnames, value = matvals, R, U_EIOU, U_feed, #' V, Y, S_units) %>% #' rename(matval_complicated = matvals) #' # Simple and Complicated are same. -#' full_join(Simple, Complicated, by = c("Method", "Energy.type", -#' "Last.stage", "Country", +#' full_join(Simple, Complicated, by = c("Method", "EnergyType", +#' "LastStage", "Country", #' "Year", "matnames")) %>% #' dplyr::mutate( #' same = matsbyname::equal_byname(matval_simple, matval_complicated) diff --git a/R/specify.R b/R/specify.R index 5b28770d..4e223e03 100644 --- a/R/specify.R +++ b/R/specify.R @@ -40,7 +40,7 @@ #' @param resources A string identifying resource industries to be added to `.tidy_iea_df`. #' Default is "`Resources`". #' @param production A string identifying production in the flow column. Default is "`Production`". -#' @param e_dot The name of the energy column in `.tidy_iea_df`. Default is "`E.dot`". +#' @param e_dot The name of the energy column in `.tidy_iea_df`. Default is "`Edot`". #' @param list_primary_coal_products The list of primary coal products for which the production industry needs to be changed. #' Default is `IEATools::primary_coal_products`. #' @param list_primary_oil_products The list of primary oil products for which the production industry needs to be changed. @@ -77,13 +77,13 @@ #' specify_primary_production() %>% #' add_psut_matnames() %>% #' dplyr::filter(Flow == "Coal mines" | stringr::str_detect(Flow, "Resources")) %>% -#' select(-Method, -Last.stage, -Ledger.side, -Unit) +#' select(-Method, -LastStage, -LedgerSide, -Unit) #' # EIOU by "Liquefaction (LNG) / regasification plants" is reassigned to "Oil and gas extraction" #' data.frame( -#' Flow.aggregation.point = c("Energy industry own use"), +#' FlowAggregationPoint = c("Energy industry own use"), #' Flow = c("Liquefaction (LNG) / regasification plants"), #' Product = c("Natural gas"), -#' E.dot = c(-42), +#' Edot = c(-42), #' stringsAsFactors = FALSE #' ) %>% #' specify_primary_production() @@ -100,7 +100,7 @@ specify_primary_production <- function(.tidy_iea_df, coal_mines = IEATools::industry_flows$coal_mines, oil_extraction = IEATools::industry_flows$oil_extraction, gas_extraction = IEATools::industry_flows$natural_gas_extraction, - liquefaction_regas = "Liquefaction (LNG) / regasification plants", + liquefaction_regas = IEATools::eiou_flows$liquefaction_regasification_plants, liquefaction_regas_reassign = IEATools::industry_flows$natural_gas_extraction, transformation_processes = IEATools::aggregation_flows$transformation_processes, resources = IEATools::tpes_flows$resources, @@ -359,6 +359,16 @@ specify_interface_industries <- function(.tidy_iea_df, #' Default is TRUE. #' @param route_non_specified_tp Boolean stating whether non-specified transformation processes flows should be routed to existing industries. #' Default is TRUE. +#' @param specify_renewable_plants Boolean stating whether renewable energy industries should be specified or not. +#' Default is FALSE. +#' @param specify_electricity_grid Boolean stating whether an electricity grid industry should be specified or not. +#' Default is FALSE. +#' @param specify_distribution_industries Boolean stating whether distribution industries should be specified or not. +#' Default is FALSE. +#' @param ascribe_eiou_to_renewable_plants A boolean defining whether a fraction of the EIOU of electricity, CHP and heat plants +#' should be ascribed to the new renewable industries. Default is FALSE. +#' @param ascribe_eiou_to_nuclear A boolean defining whether a fraction of the EIOU of electricity, CHP and heat plants +#' should be ascribed to the new nuclear industry. Default is FALSE. #' @param flow_aggregation_point The name of the flow aggregation point column in `.tidy_iea_df`. Default is "Flow.aggregation.point". #' @param eiou A string identifying energy industry own use in the flow aggregation point column. Default is "Energy industry own use". #' @param transformation_processes A string identifying transformation processes in the flow aggregation point column. Default is "Transformation processes". @@ -366,7 +376,7 @@ specify_interface_industries <- function(.tidy_iea_df, #' @param own_use_elect_chp_heat A string identifying own use in electricity, CHP and heat plants in the flow column. Default is "Own use in electricity, CHP and heat plants". #' @param pumped_storage A string identifying pumped storage plants in the flow column. Default is "Pumped storage plants". #' @param nuclear_industry A string identifying nuclear plants in the flow column. Default is "Nuclear industry". -#' @param e_dot The name of the energy flow column in `.tidy_iea_df`. Default is "E.dot". +#' @param e_dot The name of the energy flow column in `.tidy_iea_df`. Default is "Edot". #' @param negzeropos The name of a temporary column created in `.tidy_iea_df`. Default is ".negzeropos". #' @param main_act_producer_elect A string identifying main activity producer electricity plants. Default is "Main activity producer electricity plants". #' @@ -378,41 +388,60 @@ specify_interface_industries <- function(.tidy_iea_df, #' library(dplyr) #' load_tidy_iea_df() %>% #' specify_tp_eiou() %>% -#' filter(Flow.aggregation.point == "Energy industry own use" & +#' filter(FlowAggregationPoint == "Energy industry own use" & #' Flow == "Main activity producer electricity plants") specify_tp_eiou <- function(.tidy_iea_df, split_own_use_elect_chp_heat_using_shares_of = c("input", "output"), route_non_specified_eiou = TRUE, route_non_specified_tp = TRUE, - flow_aggregation_point = "Flow.aggregation.point", - eiou = "Energy industry own use", - transformation_processes = "Transformation processes", - flow = "Flow", + specify_renewable_plants = FALSE, + specify_electricity_grid = FALSE, + specify_distribution_industries = FALSE, + ascribe_eiou_to_renewable_plants = FALSE, + ascribe_eiou_to_nuclear = FALSE, + flow_aggregation_point = IEATools::iea_cols$flow_aggregation_point, + eiou = IEATools::tfc_compare_flows$energy_industry_own_use, + transformation_processes = IEATools::tfc_compare_flows$transformation_processes, + flow = IEATools::iea_cols$flow, # Industries that receive EIOU but are not in Transformation processes - own_use_elect_chp_heat = "Own use in electricity, CHP and heat plants", - pumped_storage = "Pumped storage plants", - nuclear_industry = "Nuclear industry", - e_dot = "E.dot", + own_use_elect_chp_heat = IEATools::eiou_flows$own_use_elect_chp_heat_plants, + pumped_storage = IEATools::eiou_flows$pumped_storage_plants, + nuclear_industry = IEATools::eiou_flows$nuclear_industry, + e_dot = IEATools::iea_cols$e_dot, negzeropos = ".negzeropos", - # Places where the EIOU will e reassigned - main_act_producer_elect = "Main activity producer electricity plants"){ - .tidy_iea_df %>% + # Places where the EIOU will be reassigned + main_act_producer_elect = IEATools::eiou_flows$main_activity_producer_electricity_plants){ + .tidy_iea_df |> matsindf::verify_cols_missing(negzeropos) split_own_use_elect_chp_heat_using_shares_of <- match.arg(split_own_use_elect_chp_heat_using_shares_of) - .tidy_iea_df %>% + .tidy_iea_df |> gather_producer_autoproducer() %>% - route_pumped_storage() %>% + route_pumped_storage( + specify_renewable_plants = specify_renewable_plants + ) |> split_oil_gas_extraction_eiou() %>% + add_nuclear_industry( + ascribe_eiou_to_nuclear = ascribe_eiou_to_nuclear + ) %>% + specify_renewable_plants( + specify_renewable_plants = specify_renewable_plants, + ascribe_eiou_to_renewable_plants = ascribe_eiou_to_renewable_plants + ) |> route_own_use_elect_chp_heat( split_using_shares_of = split_own_use_elect_chp_heat_using_shares_of - ) %>% - add_nuclear_industry() %>% + ) |> route_non_specified_flows( route_non_specified_eiou = route_non_specified_eiou, route_non_specified_tp = route_non_specified_tp - ) + ) |> + specify_electricity_grid( + specify_electricity_grid = specify_electricity_grid + ) |> + specify_distribution_losses( + specify_distribution_industries = specify_distribution_industries + ) } @@ -441,9 +470,9 @@ specify_tp_eiou <- function(.tidy_iea_df, #' #' Transformation sinks and sources are identified by the following algorithm: #' -#' 1. Identify (per group in `.tidy_iea_df`) all `Transformation processes` that consume energy (negative value for `E.dot`). +#' 1. Identify (per group in `.tidy_iea_df`) all `Transformation processes` that consume energy (negative value for `Edot`). #' Energy consumption can be for the transformation process itself or for Energy industry own use. -#' 2. Identify (per group in `.tidy_iea_df`) all `Transformation processes` that produce energy (positive value for `E.dot`). +#' 2. Identify (per group in `.tidy_iea_df`) all `Transformation processes` that produce energy (positive value for `Edot`). #' 3. Take the set difference between the two (consumers less producers for sinks and producers less consumers for sources). #' The set difference is the list of transformation sinks or sources, respectively. #' @@ -451,10 +480,10 @@ specify_tp_eiou <- function(.tidy_iea_df, #' it returns a summary containing grouping variables and industries that are transformation sinks or sources. #' So be sure to specify (or accept defaults for) #' the `grouping_vars` argument. -#' Typical grouping variables are `Method`, `Last.stage`, `Country`, `Year`, `Energy.type`. +#' Typical grouping variables are `Method`, `Last.stage`, `Country`, `Year`, `EnergyType`. #' Don't group on `Flow.aggregation.point`, because energy from different aggregation points #' (`Energy industry own use` and `Transformation processes`) flows into each machine. -#' Don't group on `Flow`, `Product`, or `E.dot`, either. +#' Don't group on `Flow`, `Product`, or `Edot`, either. #' If groups are not set, #' `flow`s will be analyzed together, possibly leading to missed transformation sinks or sources. #' @@ -476,7 +505,7 @@ specify_tp_eiou <- function(.tidy_iea_df, #' @param eiou a string that identifies energy industry own use in the `flow_aggregation_point` column. Default is "`Energy industry own use`". #' @param flow the name of the flow column in `.tidy_iea_df`. Default is "`Flow`". #' @param product the name of the product column in `.tidy_iea_df`. Default is "`Product`". -#' @param e_dot the name of the energy rate column in `.tidy_iea_df`. Default is "`E.dot`". +#' @param e_dot the name of the energy rate column in `.tidy_iea_df`. Default is "`Edot`". #' #' @return The `grouping_vars` and the `flow` column, #' with one row for each industry that is a transformation sink or source. @@ -498,12 +527,12 @@ specify_tp_eiou <- function(.tidy_iea_df, #' tp_sinks_sources(type = "sources") tp_sinks_sources <- function(.tidy_iea_df, type = c("sinks", "sources"), - flow_aggregation_point = "Flow.aggregation.point", - transformation_processes = "Transformation processes", - eiou = "Energy industry own use", - flow = "Flow", - product = "Product", - e_dot = "E.dot"){ + flow_aggregation_point = IEATools::iea_cols$flow_aggregation_point, + transformation_processes = IEATools::tfc_compare_flows$transformation_processes, + eiou = IEATools::tfc_compare_flows$energy_industry_own_use, + flow = IEATools::iea_cols$flow, + product = IEATools::iea_cols$product, + e_dot = IEATools::iea_cols$e_dot){ type <- match.arg(type) grouping_vars <- matsindf::everything_except(.tidy_iea_df, flow_aggregation_point, flow, product, e_dot) use_rows <- .tidy_iea_df %>% @@ -547,7 +576,7 @@ tp_sinks_sources <- function(.tidy_iea_df, #' @param flow the name of the flow column in `.tidy_iea_df`. Default is "`Flow`". #' @param non_energy_flow a sting identifying non-energy flows. Default is "`Non-energy use industry/transformation/energy`". #' @param product the name of the product column in `.tidy_iea_df`. Default is "`Product`". -#' @param e_dot the name of the energy rate column in `.tidy_iea_df`. Default is "`E.dot`". +#' @param e_dot the name of the energy rate column in `.tidy_iea_df`. Default is "`Edot`". #' #' @return `.tidy_iea_df` with energy sunk in Transformation processes sinks reassigned to Non-energy use #' @@ -556,21 +585,21 @@ tp_sinks_sources <- function(.tidy_iea_df, #' @examples #' library(dplyr) #' DF <- data.frame( -#' Ledger.side = c("Supply", "Supply", "Supply", "Consumption"), -#' Flow.aggregation.point = c("Transformation processes", +#' LedgerSide = c("Supply", "Supply", "Supply", "Consumption"), +#' FlowAggregationPoint = c("Transformation processes", #' "Transformation processes", #' "Transformation processes", #' "Non-energy use"), #' Flow = c("Automobiles", "Automobiles", "Furnaces", #' "Non-energy use industry/transformation/energy"), #' Product = c("Petrol", "MD", "Coal", "Coal"), -#' E.dot = c(-1, 1, -2, 8), +#' Edot = c(-1, 1, -2, 8), #' stringsAsFactors = FALSE #' ) %>% #' mutate( #' Method = "PCM", -#' Last.stage = "Final", -#' Energy.type = "E", +#' LastStage = "Final", +#' EnergyType = "E", #' Country = "Bogus", #' Year = 1971 #' ) @@ -578,17 +607,16 @@ tp_sinks_sources <- function(.tidy_iea_df, #' DF %>% #' tp_sinks_to_nonenergy() tp_sinks_to_nonenergy <- function(.tidy_iea_df, - ledger_side = "Ledger.side", - consumption = "Consumption", - flow_aggregation_point = "Flow.aggregation.point", - non_energy_flow_agg_point = "Non-energy use", - transformation_processes = "Transformation processes", - eiou = "Energy industry own use", - flow = "Flow", - non_energy_flow = "Non-energy use industry/transformation/energy", - product = "Product", - e_dot = "E.dot"){ - # grouping_vars = c("Method", "Last.stage", "Country", "Year", "Energy.type")){ + ledger_side = IEATools::iea_cols$ledger_side, + consumption = IEATools::ledger_sides$consumption, + flow_aggregation_point = IEATools::iea_cols$flow_aggregation_point, + non_energy_flow_agg_point = IEATools::tfc_flows$non_energy_use, + transformation_processes = IEATools::tfc_compare_flows$transformation_processes, + eiou = IEATools::tfc_compare_flows$energy_industry_own_use, + flow = IEATools::iea_cols$flow, + non_energy_flow = IEATools::non_energy_flows$non_energy_use_industry_transformation_energy, + product = IEATools::iea_cols$product, + e_dot = IEATools::iea_cols$e_dot){ # First step is to find all Transformation process sinks. # These items need to removed from the IEAData data frame, eventually. Sinks <- .tidy_iea_df %>% @@ -628,7 +656,7 @@ tp_sinks_to_nonenergy <- function(.tidy_iea_df, # This has the effect of adding the new Non-energy use to any existing non-energy use # in the same group. SummarizedNonenergy <- Nonenergy %>% - # Group by all columns except for E.dot + # Group by all columns except for Edot matsindf::group_by_everything_except(e_dot) %>% dplyr::summarise(!!as.name(e_dot) := sum(!!as.name(e_dot))) %>% dplyr::ungroup() @@ -661,6 +689,16 @@ tp_sinks_to_nonenergy <- function(.tidy_iea_df, #' Default is TRUE. #' @param route_non_specified_tp Boolean stating whether non-specified transformation processes flows should be routed to existing industries #' Default is TRUE. +#' @param specify_renewable_plants A boolean indicating whether renewable energy plants should be specified or not. +#' Default is FALSE. +#' @param specify_electricity_grid Boolean stating whether an electricity grid industry should be specified or not. +#' Default is FALSE. +#' @param specify_distribution_industries Boolean stating whether distribution industries should be specified or not. +#' Default is FALSE. +#' @param ascribe_eiou_to_renewable_plants A boolean defining whether a fraction of the EIOU of electricity, CHP and heat plants +#' should be ascribed to the new renewable industries. Default is FALSE. +#' @param ascribe_eiou_to_nuclear A boolean defining whether a fraction of the EIOU of electricity, CHP and heat plants +#' should be ascribed to the new nuclear industry. Default is FALSE. #' #' @return An enhanced and corrected version of `.tidy_iea_df` #' That is ready for physical supply-use table (PSUT) analysis. @@ -681,7 +719,12 @@ tp_sinks_to_nonenergy <- function(.tidy_iea_df, specify_all <- function(.tidy_iea_df, split_own_use_elect_chp_heat_using_shares_of = c("input", "output"), route_non_specified_eiou = TRUE, - route_non_specified_tp = TRUE){ + route_non_specified_tp = TRUE, + specify_renewable_plants = FALSE, + specify_electricity_grid = FALSE, + specify_distribution_industries = FALSE, + ascribe_eiou_to_renewable_plants = FALSE, + ascribe_eiou_to_nuclear = FALSE){ split_own_use_elect_chp_heat_using_shares_of <- match.arg(split_own_use_elect_chp_heat_using_shares_of) @@ -691,7 +734,12 @@ specify_all <- function(.tidy_iea_df, specify_tp_eiou( split_own_use_elect_chp_heat_using_shares_of = split_own_use_elect_chp_heat_using_shares_of, route_non_specified_eiou = route_non_specified_eiou, - route_non_specified_tp = route_non_specified_tp + route_non_specified_tp = route_non_specified_tp, + specify_renewable_plants = specify_renewable_plants, + specify_electricity_grid = specify_electricity_grid, + specify_distribution_industries = specify_distribution_industries, + ascribe_eiou_to_renewable_plants = ascribe_eiou_to_renewable_plants, + ascribe_eiou_to_nuclear = ascribe_eiou_to_nuclear, ) %>% specify_bunkers() %>% specify_interface_industries() %>% @@ -731,7 +779,7 @@ specify_all <- function(.tidy_iea_df, #' load_tidy_iea_df() %>% #' specify_all() %>% #' despecify_col(col = "Flow", despecified_col = "clean_Flow") %>% -#' select(Flow, Product, E.dot, clean_Flow) %>% +#' select(Flow, Product, Edot, clean_Flow) %>% #' filter(endsWith(Flow, RCLabels::bracket_notation[["suff_end"]])) despecify_col <- function(.df, col, despecified_col, notations = list(RCLabels::of_notation, RCLabels::from_notation), @@ -778,7 +826,7 @@ despecify_col <- function(.df, col, despecified_col, #' load_tidy_iea_df() %>% #' specify_all() %>% #' remove_suffix_specifications(col = "Flow", unsuffixed_col = "clean_Flow") %>% -#' select(Flow, Product, E.dot, clean_Flow) %>% +#' select(Flow, Product, Edot, clean_Flow) %>% #' filter(endsWith(Flow, RCLabels::bracket_notation[["suff_end"]])) remove_suffix_specifications <- function(.df, col, unsuffixed_col, notations = list(RCLabels::of_notation, RCLabels::from_notation)){ diff --git a/R/specify_tp_eiou.R b/R/specify_tp_eiou.R index f89fe0d5..cdb1b6b4 100644 --- a/R/specify_tp_eiou.R +++ b/R/specify_tp_eiou.R @@ -81,7 +81,7 @@ gather_producer_autoproducer <- function(.tidy_iea_df, ) ) %>% # Now sum similar rows using summarise. - # Group by everything except the energy flow rate column, "E.dot". + # Group by everything except the energy flow rate column, "Edot". matsindf::group_by_everything_except(e_dot) %>% dplyr::summarise( "{e_dot}" := sum(.data[[e_dot]]) @@ -116,8 +116,11 @@ gather_producer_autoproducer <- function(.tidy_iea_df, #' This function is called within the `specify_all()` function. #' #' @param .tidy_iea_df The `.tidy_iea_df` which flows need to be specified. +#' @param specify_renewable_plants A boolean indicating whether renewable energy plants should be specified or not. +#' Default is FALSE. #' @param flow_aggregation_point The name of the flow aggregation point column in the `.tidy_iea_df`. #' Default is `IEATools::iea_cols$flow_aggregation_point`. +#' @param country,method,energy_type,last_stage,unit,year,product See `IEATools::iea_cols`. #' @param flow The name of the flow column in the `.tidy_iea_df`. #' Default is `IEATools::iea_cols$flow`. #' @param e_dot The name of the energy column in the `.tidy_iea_df`. @@ -128,6 +131,10 @@ gather_producer_autoproducer <- function(.tidy_iea_df, #' Default is `IEATools::eiou_flows$pumped_storage_plants`. #' @param main_act_producer_elect A string identifying "Main activity producer electricity plants" in the `flow` column of the `.tidy_iea_df`. #' Default is `IEATools::main_act_plants$main_act_prod_elect_plants`. +#' @param hydro_plants The name of the newly created hydropower industry. +#' Default is `IEATools::renewable_industries$hydro_plants`. +#' @param hydro The name of the "Hydro" product. +#' Default is `IEATools::renewable_products$hydro`. #' @param negzeropos The name of a temporary column created in `.tidy_iea_df`. #' Default is ".negzeropos". #' @@ -140,31 +147,116 @@ gather_producer_autoproducer <- function(.tidy_iea_df, #' load_tidy_iea_df() %>% #' gather_producer_autoproducer() route_pumped_storage <- function(.tidy_iea_df, + specify_renewable_plants = FALSE, # Column names flow_aggregation_point = IEATools::iea_cols$flow_aggregation_point, + country = IEATools::iea_cols$country, + method = IEATools::iea_cols$method, + energy_type = IEATools::iea_cols$energy_type, + year = IEATools::iea_cols$year, + last_stage = IEATools::iea_cols$last_stage, + unit = IEATools::iea_cols$unit, flow = IEATools::iea_cols$flow, e_dot = IEATools::iea_cols$e_dot, + product = IEATools::iea_cols$product, # Flow and flow aggregation point names eiou = IEATools::aggregation_flows$energy_industry_own_use, pumped_storage = IEATools::eiou_flows$pumped_storage_plants, main_act_producer_elect = IEATools::main_act_plants$main_act_prod_elect_plants, + hydro_plants = IEATools::renewable_industries$hydro_plants, + hydro = IEATools::renewable_products$hydro, # Temporary column name negzeropos = ".negzeropos"){ - .tidy_iea_df %>% - dplyr::mutate( - "{flow}" := dplyr::case_when( - (.data[[flow]] == pumped_storage & .data[[flow_aggregation_point]] == eiou) ~ main_act_producer_elect, - TRUE ~ .data[[flow]] + if (isFALSE(specify_renewable_plants)){ + routed_phs <- .tidy_iea_df %>% + dplyr::mutate( + "{flow}" := dplyr::case_when( + (.data[[flow]] == pumped_storage & .data[[flow_aggregation_point]] == eiou) ~ main_act_producer_elect, + TRUE ~ .data[[flow]] + ) + ) %>% + # Aggregating. We need to add a pos/neg/null column to add up differently positive and negative values, otherwise we'd only get NET flows. + # dplyr::mutate( + # "{negzeropos}" := dplyr::case_when( + # .data[[e_dot]] < 0 ~ "neg", + # .data[[e_dot]] == 0 ~ "zero", + # .data[[e_dot]] > 0 ~ "pos" + # ) + # ) %>% + # Now sum similar rows using summarise. + # Group by everything except the energy flow rate column, "Edot". + matsindf::group_by_everything_except(e_dot) %>% + dplyr::summarise( + "{e_dot}" := sum(.data[[e_dot]]) + ) %>% + dplyr::mutate( + # Eliminate the column we added. + "{negzeropos}" := NULL + ) %>% + dplyr::ungroup() + + } else { + + # Listing observations for which pumped hydro EIOU should be routed to the new "Hydro" industry + hydro_observations <- .tidy_iea_df |> + dplyr::filter(.data[[product]] == hydro) |> + tidyr::expand(.data[[country]], .data[[method]], .data[[energy_type]], .data[[last_stage]], .data[[year]], .data[[unit]]) + + # Routing pumped hydro to the "Hydro" industry for these observations + routed_to_hydropower <- .tidy_iea_df |> + dplyr::inner_join(hydro_observations, by = c({country}, {method}, {energy_type}, {last_stage}, {year}, {unit})) |> + dplyr::mutate( + "{flow}" := dplyr::case_when( + (.data[[flow]] == pumped_storage & .data[[flow_aggregation_point]] == eiou) ~ hydro_plants, + TRUE ~ .data[[flow]] + ) ) - ) %>% - # Now sum similar rows using summarise. - # Group by everything except the energy flow rate column, "E.dot". - matsindf::group_by_everything_except(e_dot) %>% - dplyr::summarise( - "{e_dot}" := sum(.data[[e_dot]]) - ) %>% - dplyr::ungroup() +# <<<<<<< HEAD +# ) %>% +# # Now sum similar rows using summarise. +# # Group by everything except the energy flow rate column, "Edot". +# matsindf::group_by_everything_except(e_dot) %>% +# dplyr::summarise( +# "{e_dot}" := sum(.data[[e_dot]]) +# ) %>% +# dplyr::ungroup() +# ======= + + # Routing pumped hydro to Main activity producer electricity plants for remaining observations + routed_to_elec_plants <- .tidy_iea_df |> + dplyr::anti_join(hydro_observations, by = c({country}, {method}, {energy_type}, {last_stage}, {year}, {unit})) |> + dplyr::mutate( + "{flow}" := dplyr::case_when( + (.data[[flow]] == pumped_storage & .data[[flow_aggregation_point]] == eiou) ~ main_act_producer_elect, + TRUE ~ .data[[flow]] + ) + ) + + # Binding both data frames and clearing up + routed_phs <- routed_to_hydropower |> + dplyr::bind_rows(routed_to_elec_plants) |> + # dplyr::mutate( + # "{negzeropos}" := dplyr::case_when( + # .data[[e_dot]] < 0 ~ "neg", + # .data[[e_dot]] == 0 ~ "zero", + # .data[[e_dot]] > 0 ~ "pos" + # ) + # ) %>% + # Now sum similar rows using summarise. + # Group by everything except the energy flow rate column, "Edot". + matsindf::group_by_everything_except(e_dot) %>% + dplyr::summarise( + "{e_dot}" := sum(.data[[e_dot]]) + ) %>% + dplyr::mutate( + # Eliminate the column we added. + "{negzeropos}" := NULL + ) %>% + dplyr::ungroup() + } + return(routed_phs) +# >>>>>>> develop } @@ -397,12 +489,12 @@ route_own_use_elect_chp_heat <- function(.tidy_iea_df, split_using_shares_of <- match.arg(split_using_shares_of) # The function check whether one of the three main activity elect, heat, and/or chp exists in the TP - supply, - # for each (Country, Method, Energy.type, Last.stage, Year) + # for each (Country, Method, EnergyType, LastStage, Year) # If not, then it routes "Own use in electricity, CHP and heat plants" to "Main activity producer electricity plants". # If one of the three main activities elect, heat, and/or CHP EXISTS as a supplying transformation process, # Then it ascribes ... - # Returns all the combinations of (Country, Method, Energy.type, Last.stage, Year) present in the .tidy_iea_df + # Returns all the combinations of (Country, Method, EnergyType, LastStage, Year) present in the .tidy_iea_df df_observations_included_tidy_iea_df <- .tidy_iea_df %>% dplyr::group_by(.data[[country]], .data[[method]], .data[[energy_type]], .data[[last_stage]], .data[[year]]) %>% dplyr::summarise( @@ -439,7 +531,7 @@ route_own_use_elect_chp_heat <- function(.tidy_iea_df, ) } - # Find out which observations (Country, Method, Energy.type, Last.stage, Year) are NOT in the total computed + # Find out which observations (Country, Method, EnergyType, LastStage, Year) are NOT in the total computed list_not_included_total_main_activity <- df_observations_included_tidy_iea_df %>% dplyr::anti_join(total_main_activity, by = c({country}, {method}, {energy_type}, {last_stage}, {year})) %>% # tidyr::unite(col = "ID", .data[[country]], .data[[method]], .data[[energy_type]], .data[[last_stage]], .data[[year]]) %>% @@ -557,7 +649,7 @@ route_own_use_elect_chp_heat <- function(.tidy_iea_df, # Now sum similar rows using summarise. - # Group by everything except the energy flow rate column, "E.dot". + # Group by everything except the energy flow rate column, "Edot". matsindf::group_by_everything_except(e_dot) %>% dplyr::summarise( "{e_dot}" := sum(.data[[e_dot]]) @@ -601,6 +693,8 @@ route_own_use_elect_chp_heat <- function(.tidy_iea_df, #' * the output ascribed to nuclear plants is subtracted from Main activity producer electricity and heat plants. #' #' @param .tidy_iea_df The `.tidy_iea_df` which flows need to be specified. +#' @param ascribe_eiou_to_nuclear A boolean defining whether a fraction of the EIOU of electricity, CHP and heat plants +#' should be ascribed to the new nuclear industry. Default is FALSE. #' @param flow_aggregation_point The name of the flow aggregation point column in the `.tidy_iea_df`. #' Default is `IEATools::iea_cols$flow_aggregation_point`. #' @param flow The name of the flow column in the `.tidy_iea_df`. @@ -637,20 +731,20 @@ route_own_use_elect_chp_heat <- function(.tidy_iea_df, #' Default is `IEATools::main_act_plants$autoprod_elect_plants`. #' @param autoproducer_chp A string identifying "Autoproducer CHP plants" in the `flow` column of the `.tidy_iea_df`. #' Default is `IEATools::transformation_processes$autoproducer_CHP_plants`. +#' @param own_use_elect_chp_heat A string identifying "Own use in electricity, CHP and heat plants" in the `flow` column of the `.tidy_iea_df`. +#' Default is `IEATools::eiou_flows$own_use_elect_chp_heat_plants`. #' @param nuclear A string identifying the "Nuclear" product in the `product` column of the `tidy_iea_df`. #' Default is "Nuclear". #' @param electricity A string identifying the "Electricity" product in the `product` column of the `tidy_iea_df`. -#' Default is "Electricity". +#' Default is `IEATools::electricity_products$electricity`. #' @param heat A string identifying the "Heat" product in the `product` column of the `tidy_iea_df`. -#' Default is "Heat". +#' Default is `IEATools::heat_products$heat`. #' @param negzeropos The name of a temporary column added to the data frame. #' Default is ".negzeropos". -#' @param share_elect_output_From_Func A temporary column added to the data frame. +#' @param share_elect_output_From_Func The name of a temporary column added to the data frame. #' Default is ".share_elect_output_From_Func". -#' @param Electricity_Nuclear A temporary column and product name added to the data frame, which identifies the production of electricity by nuclear plants. -#' Default is "Electricity_Nuclear". -#' @param Heat_Nuclear A temporary column and product name added to the data frame, which identifies the production of heat by nuclear plants. -#' Default is "Heat_Nuclear". +#' @param share_nuclear_output The name of a temporary column added to the data frame. +#' Default is ".share_nuclear_output". #' @param ratio_output_to_nuclear_fuel A parameter that describes the correspondance between input of nuclear fuel and output of electricity and/or heat. #' The IEA World Energy Extended Balances state that the value adopted in the balances is 0.33, which is therefore #' the default value of the parameter. @@ -663,6 +757,7 @@ route_own_use_elect_chp_heat <- function(.tidy_iea_df, #' load_tidy_iea_df() %>% #' add_nuclear_industry() add_nuclear_industry <- function(.tidy_iea_df, + ascribe_eiou_to_nuclear = FALSE, # Column names flow_aggregation_point = IEATools::iea_cols$flow_aggregation_point, flow = IEATools::iea_cols$flow, @@ -683,14 +778,14 @@ add_nuclear_industry <- function(.tidy_iea_df, main_act_producer_chp = IEATools::main_act_plants$main_act_prod_chp_plants, autoproducer_elect = IEATools::main_act_plants$autoprod_elect_plants, autoproducer_chp = IEATools::transformation_processes$autoproducer_CHP_plants, - nuclear = "Nuclear", - electricity = "Electricity", - heat = "Heat", + own_use_elect_chp_heat = IEATools::eiou_flows$own_use_elect_chp_heat_plants, + nuclear = IEATools::nuclear_products$nuclear, + electricity = IEATools::electricity_products$electricity, + heat = IEATools::heat_products$heat, # Strings identifying temporary column names negzeropos = ".negzeropos", share_elect_output_From_Func = ".share_elect_output_From_Func", - Electricity_Nuclear = "Electricity_Nuclear", - Heat_Nuclear = "Heat_Nuclear", + share_nuclear_output = ".share_nuclear_output", # Constant ratio_output_to_nuclear_fuel = 0.33){ @@ -708,12 +803,14 @@ add_nuclear_industry <- function(.tidy_iea_df, # tidyr::pivot_wider(names_from = .data[[product]], values_from = .data[[e_dot]]) %>% tidyr::pivot_wider(names_from = dplyr::all_of(product), values_from = dplyr::all_of(e_dot)) %>% # dplyr::select(-tidyselect::any_of({e_dot})) - dplyr::select(-tidyselect::any_of(e_dot)) - + dplyr::select(-tidyselect::any_of(e_dot)) + # Select names of wide data frame just built, so we can add missing products as additional columns names_intermediary_modified_flows <- names(intermediary_modified_flows) - modified_flows <- intermediary_modified_flows %>% + # Modify selected flows + # (a) Temporary df to help specifying EIOU flows after + temp <- intermediary_modified_flows %>% tibble::add_column(!!products_tibble[! names(products_tibble) %in% names_intermediary_modified_flows]) %>% dplyr::mutate( "{nuclear}" := tidyr::replace_na(.data[[nuclear]], 0), @@ -724,12 +821,14 @@ add_nuclear_industry <- function(.tidy_iea_df, "{share_elect_output_From_Func}" := .data[[electricity]] / (.data[[electricity]] + .data[[heat]]), "{electricity}" := .data[[electricity]] + (.data[[nuclear]] * ratio_output_to_nuclear_fuel) * .data[[share_elect_output_From_Func]], "{heat}" := .data[[heat]] + (.data[[nuclear]] * ratio_output_to_nuclear_fuel) * (1 - .data[[share_elect_output_From_Func]]), - "{Electricity_Nuclear}" := - .data[[nuclear]] * ratio_output_to_nuclear_fuel * .data[[share_elect_output_From_Func]], - "{Heat_Nuclear}" := - .data[[nuclear]] * ratio_output_to_nuclear_fuel * (1 - .data[[share_elect_output_From_Func]]) - ) %>% - # dplyr::select(-.data[[share_elect_output_From_Func]]) %>% + "{electricity}_{nuclear}" := - .data[[nuclear]] * ratio_output_to_nuclear_fuel * .data[[share_elect_output_From_Func]], + "{heat}_{nuclear}" := - .data[[nuclear]] * ratio_output_to_nuclear_fuel * (1 - .data[[share_elect_output_From_Func]]) + ) + + # Then modified input/output flows for nuclear and elec/heat/chp plants + modified_flows <- temp |> dplyr::select(-dplyr::any_of(share_elect_output_From_Func)) %>% - tidyr::pivot_longer(cols = c({electricity}, {heat}, {nuclear}, {Electricity_Nuclear}, {Heat_Nuclear}), values_to = {e_dot}, names_to = {product}) %>% + tidyr::pivot_longer(cols = c({electricity}, {heat}, {nuclear}, glue::glue("{electricity}_{nuclear}"), glue::glue("{heat}_{nuclear}")), values_to = {e_dot}, names_to = {product}) %>% dplyr::filter(.data[[e_dot]] != 0) %>% dplyr::mutate( "{flow}" := dplyr::case_when( @@ -739,19 +838,64 @@ add_nuclear_industry <- function(.tidy_iea_df, "{product}" := stringr::str_remove(.data[[product]], stringr::str_c("_", nuclear)) ) + # Dealing with EIOU flows + eiou_elec_heat_CHP_plants <- .tidy_iea_df |> + dplyr::filter(.data[[flow]] == own_use_elect_chp_heat & .data[[flow_aggregation_point]] == eiou) + + # First case, we don't do anything + if (isFALSE(ascribe_eiou_to_nuclear)){ + modified_flows <- modified_flows |> + dplyr::bind_rows(eiou_elec_heat_CHP_plants) + # Second case, we determine the share of the output supplied by nuclear plants, + # and ascribe the corresponding EIOU to nuclear plants + } else if(isTRUE(ascribe_eiou_to_nuclear)){ + + # Share nuclear output + share_nuclear_output_df <- temp |> + dplyr::group_by(.data[[country]], .data[[method]], .data[[energy_type]], .data[[last_stage]], .data[[year]], .data[[unit]]) |> + dplyr::summarise(dplyr::across(tidyselect::any_of(c(electricity, heat, nuclear, glue::glue("{electricity}_{nuclear}"), glue::glue("{heat}_{nuclear}"))), sum)) |> + dplyr::mutate( + "{share_nuclear_output}" := (.data[[glue::glue("{electricity}_{nuclear}")]] + .data[[glue::glue("{heat}_{nuclear}")]])/(.data[[electricity]] + .data[[heat]] + .data[[glue::glue("{electricity}_{nuclear}")]] + .data[[glue::glue("{heat}_{nuclear}")]]) + ) |> + dplyr::select(-tidyselect::any_of(c(share_elect_output_From_Func, electricity, heat, nuclear, glue::glue("{electricity}_{nuclear}"), glue::glue("{heat}_{nuclear}"), ledger_side, flow_aggregation_point, flow, product))) + + # Definining nuclear EIOU + nuclear_eiou <- eiou_elec_heat_CHP_plants |> + dplyr::left_join(share_nuclear_output_df, by = c({country}, {method}, {energy_type}, {last_stage}, {year}, {unit})) |> + dplyr::mutate( + "{e_dot}" := .data[[e_dot]] * .data[[share_nuclear_output]], + "{flow}" := nuclear_industry + ) |> + dplyr::select(-tidyselect::any_of(c(share_nuclear_output))) + + # Defining elec/CHP/heat plants total EIOU + elec_chp_heat_plants_eiou <- eiou_elec_heat_CHP_plants |> + dplyr::left_join(share_nuclear_output_df, by = c({country}, {method}, {energy_type}, {last_stage}, {year}, {unit})) |> + dplyr::mutate( + "{e_dot}" := .data[[e_dot]] * (1 - .data[[share_nuclear_output]]), + "{flow}" := own_use_elect_chp_heat + ) |> + dplyr::select(-tidyselect::any_of(c(share_nuclear_output))) + + # Adding modified EIOU flows to modified flows + modified_flows <- modified_flows |> + dplyr::bind_rows( + elec_chp_heat_plants_eiou, + nuclear_eiou + ) + } + # Builds output data frame by filtering out input data frame (take out modified flows), and collating modified data. to_return <- .tidy_iea_df %>% dplyr::filter( ! (.data[[flow_aggregation_point]] == transformation_processes & ((.data[[flow]] %in% c(main_act_producer_elect, autoproducer_elect) & .data[[product]] %in% c(nuclear, electricity)) | (.data[[flow]] %in% c(main_act_producer_chp, autoproducer_chp) & .data[[product]] %in% c(nuclear, electricity, heat)))) ) %>% + dplyr::filter(! (.data[[flow]] == own_use_elect_chp_heat & .data[[flow_aggregation_point]] == eiou)) |> dplyr::bind_rows( modified_flows ) %>% - - - dplyr::mutate( "{negzeropos}" := dplyr::case_when( .data[[e_dot]] < 0 ~ "neg", @@ -759,32 +903,631 @@ add_nuclear_industry <- function(.tidy_iea_df, .data[[e_dot]] > 0 ~ "pos" ) ) %>% + # Now sum similar rows using summarise. + # Group by everything except the energy flow rate column, "Edot". + matsindf::group_by_everything_except(e_dot) %>% + dplyr::summarise( + "{e_dot}" := sum(.data[[e_dot]]) + ) %>% + dplyr::mutate( + #Eliminate the column we added. + "{negzeropos}" := NULL + ) %>% + dplyr::ungroup() + + return(to_return) +} + + +#' Specifies renewable electricity and heat +#' +#' This function specifies hydro, geothermal, solar photovoltaic, solar thermal, oceanic, and wind power industries. +#' +#' The primary energy use of hydro, geothermal, solar photovoltaic, solar thermal, oceanic, and wind power energy by main activity and autoproducer plants are used +#' to create new renewable industries that produce electricity and heat (heat only in the case of geothermal and solar thermal). The physical content method is used +#' to derive the electricity produced by renewable industries, except in the case of geothermal and solar thermal, for which the IEA uses other factors in its balances. +#' In the case of CHP plants (which can be relevant for geothermal and solar thermal), the output of the new renewable industry follows the same heat vs electricity +#' breakdown as the main industry from which it is derived. +#' +#' @param .tidy_iea_df The `.tidy_iea_df` which flows need to be specified. +#' @param specify_renewable_plants A boolean indicating whether renewable energy plants should be specified or not. +#' Default is FALSE. +#' @param ascribe_eiou_to_renewable_plants A boolean defining whether a fraction of the EIOU of electricity, CHP and heat plants +#' should be ascribed to the new renewable industries. Default is FALSE. +#' @param flow_aggregation_point,flow,e_dot,product,method,ledger_side,last_stage,energy_type,country,year,unit See `IEATools::iea_cols`. +#' @param transformation_processes A string identifying the transformation processes in the `flow_aggregation_point` column in the `.tidy_iea_df`. +#' Default is `IEATools::aggregation_flows$transformation_processes`. +#' @param eiou A string identifying the energy industry own use in the `flow_aggregation_point` column in the `.tidy_iea_df`. +#' Default is `IEATools::aggregation_flows$energy_industry_own_use`. +#' @param main_act_producer_elect A string identifying "Main activity producer electricity plants" in the `flow` column of the `.tidy_iea_df`. +#' Default is `IEATools::main_act_plants$main_act_prod_elect_plants`. +#' @param main_act_producer_chp A string identifying "Main activity producer CHP plants" in the `flow` column of the `.tidy_iea_df`. +#' Default is `IEATools::main_act_plants$main_act_prod_chp_plants`. +#' @param main_act_producer_heat A string identifying "Main activity producer heat plants" in the `flow` column of the `.tidy_iea_df`. +#' Default is `IEATools::main_act_plants$main_act_prod_heat_plants`. +#' @param autoproducer_elect A string identifying "Autoproducer electricity plants" in the `flow` column of the `.tidy_iea_df`. +#' Default is `IEATools::main_act_plants$autoprod_elect_plants`. +#' @param autoproducer_chp A string identifying "Autoproducer CHP plants" in the `flow` column of the `.tidy_iea_df`. +#' Default is `IEATools::transformation_processes$autoproducer_CHP_plants`. +#' @param autoproducer_heat A string identifying "Autoproducer CHP plants" in the `flow` column of the `.tidy_iea_df`. +#' Default is `IEATools::transformation_processes$autoprod_heat_plants`. +#' @param own_use_elect_chp_heat A string identifying the "Own use in electricity, CHP and heat plants" EIOU flow in the `.tidy_iea_df`. +#' Default is `IEATools::eiou_flows$own_use_elect_chp_heat_plants`. +#' @param geothermal,hydro,solar_pv,solar_th,oceanic,wind Renewable energy product names. See `IEATools::renewable_products`. +#' @param electricity The name of the electricity product. +#' Default is `IEATools::electricity_products$electricity`. +#' @param heat The name of the heat product. +#' Default is `IEATools::heat_products$heat`. +#' @param ratio_solar_th_elec The ratio of primary energy to electricity to use for solar thermal. +#' Default is 0.33 as this is the value assumed in the IEA's energy balances. +#' @param ratio_solar_th_heat The ratio of primary energy to heat to use for solar thermal. +#' Default is 1 as this is the value assumed in the IEA's energy balances. +#' @param ratio_geothermal_elec The ratio of primary energy to electricity to use for geothermal. +#' Default is 0.1 as this is the value assumed in the IEA's energy balances. +#' @param ratio_geothermal_heat The ratio of primary energy to heat to use for geothermal. +#' Default is 0.5 as this is the value assumed in the IEA's energy balances. +#' @param ratio_other_renewable_elec The ratio of primary energy to electricity to use for hydro, solar photovoltaic, oceanic, and wind power. +#' Default is 1 as this is the value assumed in the IEA's energy balances. +#' @param geothermal_plants,hydro_plants,solar_pv_plants,solar_th_plants,oceanic_plants,wind_power_plants Names of renewable industries added. See `IEATools::renewable_industries`. +#' @param negzeropos The name of a temporary column added to the data frame. +#' Default is ".negzeropos". +#' @param ratio_elec_to_heat A temporary column added to the data frame. +#' Default is ".ratio_elec_to_heat". +#' @param .share_industry The name of a temporary column added to specify the renewable industry +#' for which the share of output is calculated. Default is ".share_industry". +#' @param .share The name of a temporary column where the share of output is calculated for each renewable industry. +#' Default is ".share". +#' +#' @return Returns a .tidy_iea_df with renewable electricity and heat from geothermal, hydro, solar thermal, solar photovoltaic, wind, and oceanic power specified. +#' @export +#' +#' @examples +#' library(dplyr) +#' load_tidy_iea_df() %>% +#' specify_renewable_plants() +specify_renewable_plants <- function(.tidy_iea_df, + specify_renewable_plants = FALSE, + ascribe_eiou_to_renewable_plants = FALSE, + # Column names + flow_aggregation_point = IEATools::iea_cols$flow_aggregation_point, + flow = IEATools::iea_cols$flow, + e_dot = IEATools::iea_cols$e_dot, + product = IEATools::iea_cols$product, + method = IEATools::iea_cols$method, + ledger_side = IEATools::iea_cols$ledger_side, + last_stage = IEATools::iea_cols$last_stage, + energy_type = IEATools::iea_cols$energy_type, + country = IEATools::iea_cols$country, + year = IEATools::iea_cols$year, + unit = IEATools::iea_cols$unit, + # Strings identifying flows, ledger sides, flow aggregation points, and products + eiou = IEATools::aggregation_flows$energy_industry_own_use, + transformation_processes = IEATools::aggregation_flows$transformation_processes, + main_act_producer_elect = IEATools::main_act_plants$main_act_prod_elect_plants, + main_act_producer_chp = IEATools::main_act_plants$main_act_prod_chp_plants, + main_act_producer_heat = IEATools::main_act_plants$main_act_prod_heat_plants, + autoproducer_elect = IEATools::main_act_plants$autoprod_elect_plants, + autoproducer_chp = IEATools::transformation_processes$autoproducer_CHP_plants, + autoproducer_heat = IEATools::main_act_plants$autoprod_heat_plants, + own_use_elect_chp_heat = IEATools::eiou_flows$own_use_elect_chp_heat_plants, + # Input products + geothermal = IEATools::renewable_products$geothermal, + hydro = IEATools::renewable_products$hydro, + solar_pv = IEATools::renewable_products$solar_photovoltaics, + solar_th = IEATools::renewable_products$solar_thermal, + oceanic = IEATools::renewable_products$tide_wave_and_ocean, + wind = IEATools::renewable_products$wind, + # Output products + electricity = IEATools::electricity_products$electricity, + heat = IEATools::heat_products$heat, + # Ratios of final to primary energy + ratio_solar_th_elec = 0.33, + ratio_solar_th_heat = 1, + ratio_geothermal_elec = 0.1, + ratio_geothermal_heat = 0.5, + ratio_other_renewable_elec = 1, + # New industry names + geothermal_plants = IEATools::renewable_industries$geothermal_plants, + hydro_plants = IEATools::renewable_industries$hydro_plants, + solar_pv_plants = IEATools::renewable_industries$solar_pv_plants, + solar_th_plants = IEATools::renewable_industries$solar_th_plants, + oceanic_plants = IEATools::renewable_industries$oceanic_plants, + wind_power_plants = IEATools::renewable_industries$wind_power_plants, + # Strings identifying temporary column names + negzeropos = ".negzeropos", + ratio_elec_to_heat = ".ratio_elec_to_heat", + .share_industry = ".share_industry", + .share = ".share"){ + + # Check if renewable energy should be specified. If yes, then the code carries on. + if (isFALSE(specify_renewable_plants)){ + return(.tidy_iea_df) + } + + # Tibble of products of interest + products_tibble <- tibble::tibble("{geothermal}" := NA, + "{hydro}" := NA, + "{solar_pv}" := NA, + "{solar_th}" := NA, + "{oceanic}" := NA, + "{wind}" := NA, + "{electricity}" := NA, + "{heat}" := NA) + + # Potentially move to using the IEATools constant, if "Other sources" are removed + renewable_products <- c(geothermal, hydro, solar_pv, solar_th, oceanic, wind) + + # Relevant products + relevant_products <- c(geothermal, hydro, solar_pv, solar_th, oceanic, wind, electricity, heat) + + # (1) Here we select only the flows that we are going to modify, and pivot them to wide format for modification + selected_io_flows <- .tidy_iea_df %>% + dplyr::filter( + .data[[flow_aggregation_point]] == transformation_processes & + ((.data[[flow]] %in% c(main_act_producer_elect, autoproducer_elect) & .data[[product]] %in% c(renewable_products, electricity)) | + (.data[[flow]] %in% c(main_act_producer_chp, autoproducer_chp) & .data[[product]] %in% c(renewable_products, electricity, heat)) | + (.data[[flow]] %in% c(main_act_producer_heat, autoproducer_heat) & .data[[product]] %in% c(renewable_products, heat))) + ) %>% + # tidyr::pivot_wider(names_from = .data[[product]], values_from = .data[[e_dot]]) %>% + tidyr::pivot_wider(names_from = dplyr::all_of(product), values_from = dplyr::all_of(e_dot)) %>% + # dplyr::select(-tidyselect::any_of({e_dot})) + dplyr::select(-tidyselect::any_of(e_dot)) + + # (2.a) Select names of wide data frame just built, so we can add missing products as additional columns + names_selected_io_flows <- names(selected_io_flows) + + # (2.b) Modify selected flows + # (i) Temporary df to help specifying EIOU flows after + temp <- selected_io_flows %>% + tibble::add_column(!!products_tibble[! names(products_tibble) %in% names_selected_io_flows]) %>% + # Replacing NAs by zeros in all columns + dplyr::mutate(dplyr::across(tidyselect::all_of(relevant_products), ~tidyr::replace_na(.x, 0))) |> + # Defining renewable electricity for products for which all inputs deliver electricity + dplyr::mutate( + "{hydro}_{electricity}" := -.data[[hydro]] * ratio_other_renewable_elec, + "{solar_pv}_{electricity}" := -.data[[solar_pv]] * ratio_other_renewable_elec, + "{oceanic}_{electricity}" := -.data[[oceanic]] * ratio_other_renewable_elec, + "{wind}_{electricity}" := -.data[[wind]] * ratio_other_renewable_elec, + ) |> + # Defining renewable electricity and heat for products with potential joint production + dplyr::mutate( + "{ratio_elec_to_heat}" := .data[[electricity]] / .data[[heat]], + "{geothermal}_{electricity}" := dplyr::case_match( + .data[[ratio_elec_to_heat]], + Inf ~ -(.data[[geothermal]] * ratio_geothermal_elec), + 0 ~ 0, + .default = -(.data[[geothermal]]) / (1 + ratio_geothermal_elec/(ratio_geothermal_heat * .data[[ratio_elec_to_heat]])) * ratio_geothermal_elec + ), + "{geothermal}_{heat}" := dplyr::case_match( + .data[[ratio_elec_to_heat]], + Inf ~ 0, + 0 ~ -.data[[geothermal]] * ratio_geothermal_heat, + .default = -(.data[[geothermal]]) / (1 + ratio_geothermal_heat/ratio_geothermal_elec*.data[[ratio_elec_to_heat]]) * ratio_geothermal_heat + ), + "{solar_th}_{electricity}" := dplyr::case_match( + .data[[ratio_elec_to_heat]], + Inf ~ -(.data[[solar_th]] * ratio_solar_th_elec), + 0 ~ 0, + .default = -(.data[[solar_th]]) / (1 + ratio_solar_th_elec/(ratio_solar_th_heat * .data[[ratio_elec_to_heat]])) * ratio_solar_th_elec + ), + "{solar_th}_{heat}" := dplyr::case_match( + .data[[ratio_elec_to_heat]], + Inf ~ 0, + 0 ~ -.data[[solar_th]] * ratio_solar_th_heat, + .default = -(.data[[solar_th]]) / (1 + ratio_solar_th_heat/ratio_solar_th_elec*.data[[ratio_elec_to_heat]]) * ratio_solar_th_heat + ), + ) + + # (ii) Subtracting specified electricity and heat flows from existing plants output; specifying product output + modified_flows <- temp |> + dplyr::mutate( + "{electricity}" := .data[[electricity]] - (.data[[glue::glue("{hydro}_{electricity}")]] + .data[[glue::glue("{solar_pv}_{electricity}")]] + .data[[glue::glue("{oceanic}_{electricity}")]] + + .data[[glue::glue("{wind}_{electricity}")]] + .data[[glue::glue("{geothermal}_{electricity}")]] + .data[[glue::glue("{solar_th}_{electricity}")]]), + "{heat}" := .data[[heat]] - (.data[[glue::glue("{geothermal}_{heat}")]] + .data[[glue::glue("{solar_th}_{heat}")]]) + ) |> + # Removing columns if needed + dplyr::select(-dplyr::any_of(ratio_elec_to_heat)) %>% + # Back to tidy, long format + tidyr::pivot_longer(cols = -c({country}, {method}, {energy_type}, {last_stage}, {year}, {ledger_side}, {flow_aggregation_point}, {flow}, {unit}), + values_to = {e_dot}, names_to = {product}) |> + dplyr::filter(.data[[e_dot]] != 0) %>% + # Adjusting product and flow names: + dplyr::mutate( + "{flow}" := dplyr::case_when( + stringr::str_detect(.data[[product]], geothermal) ~ IEATools::renewable_industries$geothermal_plants, + stringr::str_detect(.data[[product]], hydro) ~ IEATools::renewable_industries$hydro_plants, + stringr::str_detect(.data[[product]], solar_pv) ~ IEATools::renewable_industries$solar_pv_plants, + stringr::str_detect(.data[[product]], solar_th) ~ IEATools::renewable_industries$solar_th_plants, + stringr::str_detect(.data[[product]], oceanic) ~ IEATools::renewable_industries$oceanic_plants, + stringr::str_detect(.data[[product]], wind) ~ IEATools::renewable_industries$wind_power_plants, + TRUE ~ .data[[flow]] + ), + "{product}" := stringr::str_remove(.data[[product]], ".*_") + ) + + # (3) Dealing with EIOU flows + eiou_elec_heat_CHP_plants <- .tidy_iea_df |> + dplyr::filter(.data[[flow]] == own_use_elect_chp_heat & .data[[flow_aggregation_point]] == eiou) + + # (i) First case, we don't do anything + if (isFALSE(ascribe_eiou_to_renewable_plants)){ + modified_flows <- modified_flows |> + dplyr::bind_rows(eiou_elec_heat_CHP_plants) + # (ii) Second case, we determine the share of the output supplied by each renewable energy industry, + # and ascribe the corresponding EIOU to each renewable energy industry + } else if(isTRUE(ascribe_eiou_to_renewable_plants)){ + + # Defining a vector of products of interest + products_of_interest <- c(electricity, heat, geothermal, hydro, solar_pv, solar_th, oceanic, wind, + glue::glue("{hydro}_{electricity}"), glue::glue("{solar_pv}_{electricity}"), glue::glue("{oceanic}_{electricity}"), glue::glue("{wind}_{electricity}"), + glue::glue("{geothermal}_{electricity}"), glue::glue("{geothermal}_{heat}"), glue::glue("{solar_th}_{electricity}"), glue::glue("{solar_th}_{heat}")) + # Share each renewable energy plant to total elec/chp/heat plants output + share_renewable_output_df <- temp |> + dplyr::group_by(.data[[country]], .data[[method]], .data[[energy_type]], .data[[last_stage]], .data[[year]], .data[[unit]]) |> + dplyr::summarise(dplyr::across(tidyselect::any_of(products_of_interest), sum)) |> + dplyr::mutate( + "{.share}_{geothermal_plants}" := (.data[[glue::glue("{geothermal}_{electricity}")]] + .data[[glue::glue("{geothermal}_{heat}")]])/(.data[[electricity]] + .data[[heat]]), + "{.share}_{hydro_plants}" := (.data[[glue::glue("{hydro}_{electricity}")]])/(.data[[electricity]] + .data[[heat]]), + "{.share}_{solar_pv_plants}" := (.data[[glue::glue("{solar_pv}_{electricity}")]])/(.data[[electricity]] + .data[[heat]]), + "{.share}_{solar_th_plants}" := (.data[[glue::glue("{solar_th}_{electricity}")]] + .data[[glue::glue("{solar_th}_{heat}")]])/(.data[[electricity]] + .data[[heat]]), + "{.share}_{oceanic_plants}" := (.data[[glue::glue("{oceanic}_{electricity}")]])/(.data[[electricity]] + .data[[heat]]), + "{.share}_{wind_power_plants}" := (.data[[glue::glue("{wind}_{electricity}")]])/(.data[[electricity]] + .data[[heat]]), + ) |> + dplyr::select(-tidyselect::any_of(c(ratio_elec_to_heat, products_of_interest, ledger_side, flow_aggregation_point, flow, product))) + # Defining shares of interest + shares_of_interest <- c(glue::glue("{.share}_{geothermal_plants}"), glue::glue("{.share}_{hydro_plants}"), glue::glue("{.share}_{solar_pv_plants}"), + glue::glue("{.share}_{solar_th_plants}"), glue::glue("{.share}_{oceanic_plants}"), glue::glue("{.share}_{wind_power_plants}")) + # Defining renewable industry EIOU + renewable_industry_eiou <- eiou_elec_heat_CHP_plants |> + dplyr::left_join(share_renewable_output_df, by = c({country}, {method}, {energy_type}, {last_stage}, {year}, {unit})) |> + tidyr::pivot_longer(cols = tidyselect::any_of(shares_of_interest), names_to = .share_industry, values_to = .share) |> + dplyr::mutate( + "{e_dot}" := .data[[e_dot]] * .data[[.share]], + "{flow}" := stringr::str_extract(.data[[.share_industry]], "_.*") |> + stringr::str_remove("_") + ) |> + dplyr::select(-tidyselect::any_of(c(.share, .share_industry))) + # Defining elec/CHP/heat plants total EIOU + elec_chp_heat_plants_eiou <- eiou_elec_heat_CHP_plants |> + dplyr::left_join(share_renewable_output_df, by = c({country}, {method}, {energy_type}, {last_stage}, {year}, {unit})) |> + dplyr::mutate( + "{e_dot}" := .data[[e_dot]] * (1 - (.data[[glue::glue("{.share}_{geothermal_plants}")]]+.data[[glue::glue("{.share}_{hydro_plants}")]]+.data[[glue::glue("{.share}_{solar_pv_plants}")]] + +.data[[glue::glue("{.share}_{solar_th_plants}")]]+.data[[glue::glue("{.share}_{oceanic_plants}")]]+.data[[glue::glue("{.share}_{wind_power_plants}")]])), + "{flow}" := own_use_elect_chp_heat + ) |> + dplyr::select(-dplyr::starts_with(.share)) + + # Adding modified EIOU flows to modified flows + modified_flows <- modified_flows |> + dplyr::bind_rows( + elec_chp_heat_plants_eiou, + renewable_industry_eiou + ) + } + + # (4) Builds output data frame by filtering out input data frame (take out modified flows), and collating modified data. + to_return <- .tidy_iea_df %>% + # Inverse of the condition that was filtered in "modified_flows" + dplyr::filter( + ! (.data[[flow_aggregation_point]] == transformation_processes & + ((.data[[flow]] %in% c(main_act_producer_elect, autoproducer_elect) & .data[[product]] %in% c(renewable_products, electricity)) | + (.data[[flow]] %in% c(main_act_producer_chp, autoproducer_chp) & .data[[product]] %in% c(renewable_products, electricity, heat)) | + (.data[[flow]] %in% c(main_act_producer_heat, autoproducer_heat) & .data[[product]] %in% c(renewable_products, heat)))) + ) %>% + dplyr::filter(! (.data[[flow]] == own_use_elect_chp_heat & .data[[flow_aggregation_point]] == eiou)) |> + dplyr::bind_rows( + modified_flows + ) %>% + dplyr::mutate( + "{negzeropos}" := dplyr::case_when( + .data[[e_dot]] < 0 ~ "neg", + .data[[e_dot]] == 0 ~ "zero", + .data[[e_dot]] > 0 ~ "pos" + ) + ) %>% # Now sum similar rows using summarise. - # Group by everything except the energy flow rate column, "E.dot". + # Group by everything except the energy flow rate column, "Edot". matsindf::group_by_everything_except(e_dot) %>% dplyr::summarise( "{e_dot}" := sum(.data[[e_dot]]) ) %>% - - - dplyr::mutate( #Eliminate the column we added. "{negzeropos}" := NULL ) %>% - - - dplyr::ungroup() return(to_return) } +#' Specifies electricity grid +#' +#' Adds an electricity grid industry that takes as input all electricity produced by any industry, +#' which is now specified by producing industry (e.g., "Electricity \[from Wind power plants\]"), +#' and converts it into Electricity. +#' +#' @param .tidy_iea_df The `.tidy__iea_df` for which an electricity grid industry should be added. +#' @param specify_electricity_grid A boolean stating whether an electricity grid industry should be created or not. +#' Default is FALSE. +#' @param supplying_industry_notation Notation to use to specify the electricity supplying industry. +#' Default is `RCLabels::from_notation`. +#' @param flow_aggregation_point,flow,e_dot,product,method,ledger_side,last_stage,energy_type,country,year,unit See `IEATools::iea_cols`. +#' @param losses The name of the "Losses" flows in the input data frame. +#' Default is `IEATools::tfc_compare_flows$losses`. +#' @param grid_industry The name of the electricity grid industry to be added. +#' Default is `IEATools::grid_industries$electricity_grid`. +#' @param supply The name of the supply ledger side. +#' Default is `IEATools::ledger_sides$supply`. +#' @param transformation_processes The name of transformation processes in the flow aggregation point column. +#' Default is `IEATools::tfc_compare_flows$transformation_processes`. +#' @param electricity The name of the product name for "Electricity". +#' Default is `IEATools::electricity_products$electricity`. +#' @param negzeropos The name of a temporary column added to the data frame. +#' Default is ".negzeropos". +#' +#' @return The `.tidy__iea_df` to which an electricity grid industry has been added. +#' @export +#' +#' @examples +#' load_tidy_iea_df() %>% +#' specify_electricity_grid() +specify_electricity_grid <- function(.tidy_iea_df, + specify_electricity_grid = FALSE, + supplying_industry_notation = RCLabels::from_notation, + # IEA col names + country = IEATools::iea_cols$country, + method = IEATools::iea_cols$method, + energy_type = IEATools::iea_cols$energy_type, + last_stage = IEATools::iea_cols$last_stage, + year = IEATools::iea_cols$year, + ledger_side = IEATools::iea_cols$ledger_side, + flow_aggregation_point = IEATools::iea_cols$flow_aggregation_point, + flow = IEATools::iea_cols$flow, + product = IEATools::iea_cols$product, + unit = IEATools::iea_cols$unit, + e_dot = IEATools::iea_cols$e_dot, + # Constants + losses = IEATools::tfc_compare_flows$losses, + grid_industry = IEATools::grid_industries$electricity_grid, + supply = IEATools::ledger_sides$supply, + transformation_processes = IEATools::tfc_compare_flows$transformation_processes, + electricity = IEATools::electricity_products$electricity, + # Strings identifying temporary column names + negzeropos = ".negzeropos"){ + + # maybe change pattern_to_remove to RCLabels::of_notation$suff_start + + # Check if electricity grid should be specified. If yes, then the code carries on. + if (isFALSE(specify_electricity_grid)){ + return(.tidy_iea_df) + } + + # (1) Select production flows + selected_production_flows <- .tidy_iea_df |> + dplyr::filter(.data[[ledger_side]] == supply & .data[[e_dot]] > 0 & .data[[product]] == electricity) + + # (2) Select losses flows + selected_losses_flows <- .tidy_iea_df |> + dplyr::filter(.data[[flow]] == losses & .data[[product]] == electricity) + + # (3) Modify production flows + modified_production_flows <- selected_production_flows |> + dplyr::mutate( + "{product}" := stringr::str_c(.data[[product]], + supplying_industry_notation[["suff_start"]], + .data[[flow]], + supplying_industry_notation[["suff_end"]], + sep = "") + ) + + # (4) Adding inputs to grid industry + added_inputs_to_grid <- modified_production_flows |> + dplyr::mutate( + "{flow}" := grid_industry, + "{e_dot}" := - .data[[e_dot]] + ) + + # (5) Adding supply of the grid industry + added_supply_by_grid <- selected_production_flows |> + dplyr::bind_rows(selected_losses_flows) |> + dplyr::group_by(.data[[country]], .data[[method]], .data[[energy_type]], .data[[last_stage]], .data[[year]], .data[[product]], .data[[unit]]) |> + dplyr::summarise( + "{e_dot}" := sum(.data[[e_dot]]) + ) |> + dplyr::mutate( + "{flow_aggregation_point}" := transformation_processes, + "{ledger_side}" := supply, + "{flow}" := grid_industry, + ) + + # (5) Bind data frame and get ready to return values + to_return <- .tidy_iea_df |> + dplyr::filter(! (.data[[ledger_side]] == supply & .data[[e_dot]] > 0 & .data[[product]] == electricity)) |> + dplyr::filter(! (.data[[flow]] == losses & .data[[product]] == electricity)) |> + dplyr::bind_rows( + modified_production_flows, + added_inputs_to_grid, + added_supply_by_grid + ) |> + dplyr::mutate( + "{negzeropos}" := dplyr::case_when( + .data[[e_dot]] < 0 ~ "neg", + .data[[e_dot]] == 0 ~ "zero", + .data[[e_dot]] > 0 ~ "pos" + ) + ) %>% + # Now sum similar rows using summarise. + # Group by everything except the energy flow rate column, "Edot". + matsindf::group_by_everything_except(e_dot) %>% + dplyr::summarise( + "{e_dot}" := sum(.data[[e_dot]]) + ) %>% + dplyr::mutate( + #Eliminate the column we added. + "{negzeropos}" := NULL + ) %>% + dplyr::ungroup() + + return(to_return) +} + + + + + +#' Specifies distribution industries +#' +#' For each product where losses are reported, creates a distribution industry which +#' is specified by product (e.g., "Distribution \[of Heat\]"), +#' and converts a product from a specific origin (e.g., "Heat \[from Oil refineries\]") +#' into the final demand product (e.g., "Heat"). +#' +#' @param .tidy_iea_df The `.tidy__iea_df` for which an electricity grid industry should be added. +#' @param specify_distribution_industries A boolean stating whether distribution industries should be added or not. +#' Default is FALSE. +#' @param supplying_industry_notation Notation to use to specify the supplying industries. +#' Default is `RCLabels::from_notation`. +#' @param distribution_industry_notation Notation to use to specify the distribution industries. +#' Default is `RCLabels::of_notation`. +#' @param flow_aggregation_point,flow,e_dot,product,method,ledger_side,last_stage,energy_type,country,year,unit See `IEATools::iea_cols`. +#' @param losses The name of the "Losses" flows in the input data frame. +#' Default is `IEATools::tfc_compare_flows$losses`. +#' @param distribution_industry The name of the distribution industries to be added. +#' Default is `IEATools::distribution_industry`. +#' @param supply The name of the supply ledger side. +#' Default is `IEATools::ledger_sides$supply`. +#' @param transformation_processes The name of transformation processes in the flow aggregation point column. +#' Default is `IEATools::tfc_compare_flows$transformation_processes`. +#' @param negzeropos The name of a temporary column added to the data frame. +#' Default is ".negzeropos". +#' +#' @return The `.tidy__iea_df` to which distribution industries have been added. +#' @export +#' +#' @examples +#' load_tidy_iea_df() %>% +#' specify_distribution_losses() +specify_distribution_losses <- function(.tidy_iea_df, + specify_distribution_industries = FALSE, + supplying_industry_notation = RCLabels::from_notation, + distribution_industry_notation = RCLabels::of_notation, + # IEA col names + country = IEATools::iea_cols$country, + method = IEATools::iea_cols$method, + energy_type = IEATools::iea_cols$energy_type, + last_stage = IEATools::iea_cols$last_stage, + year = IEATools::iea_cols$year, + ledger_side = IEATools::iea_cols$ledger_side, + flow_aggregation_point = IEATools::iea_cols$flow_aggregation_point, + flow = IEATools::iea_cols$flow, + product = IEATools::iea_cols$product, + unit = IEATools::iea_cols$unit, + e_dot = IEATools::iea_cols$e_dot, + # Constants + losses = IEATools::tfc_compare_flows$losses, + distribution_industry = IEATools::distribution_industry, + supply = IEATools::ledger_sides$supply, + transformation_processes = IEATools::tfc_compare_flows$transformation_processes, + # Strings identifying temporary column names + negzeropos = ".negzeropos"){ + + # Check if electricity grid should be specified. If yes, then the code carries on. + if (isFALSE(specify_distribution_industries)){ + return(.tidy_iea_df) + } + + # (1) Select losses flows + selected_losses_flows <- .tidy_iea_df |> + dplyr::filter(.data[[flow]] == losses) + + # (2) Pick up observations (Country, Year, Product + other metadata) IDs where losses occur + losses_observations_list <- selected_losses_flows |> + tidyr::expand(.data[[country]], .data[[method]], .data[[energy_type]], .data[[last_stage]], .data[[year]], .data[[product]]) + + # (3) Select production flows + selected_production_flows <- .tidy_iea_df |> + dplyr::inner_join(losses_observations_list, by = c({country}, {method}, {energy_type}, {last_stage}, {year}, {product})) |> + dplyr::filter(.data[[ledger_side]] == supply & .data[[e_dot]] > 0) + + # (4) All selected flows + all_selected_flows <- dplyr::bind_rows(selected_losses_flows, selected_production_flows) + + # (5) Modify production flows + modified_production_flows <- selected_production_flows |> + dplyr::mutate( + "{product}" := stringr::str_c(.data[[product]], + supplying_industry_notation[["suff_start"]], + .data[[flow]], + supplying_industry_notation[["suff_end"]], + sep = "") + ) + + # (6) Adding inputs to distribution industries + inputs_to_distribution_industries <- modified_production_flows |> + dplyr::mutate( + "{flow}" := stringr::str_c(distribution_industry, + distribution_industry_notation[["suff_start"]], + RCLabels::get_pref_suff(.data[[product]], which = "pref", notation = supplying_industry_notation), + distribution_industry_notation[["suff_end"]]), + "{e_dot}" := - .data[[e_dot]] + ) + + + # (7) Adding supply of the new distribution industries + supply_by_distribution_industries <- selected_production_flows |> + dplyr::bind_rows(selected_losses_flows) |> + dplyr::group_by(.data[[country]], .data[[method]], .data[[energy_type]], .data[[last_stage]], .data[[year]], .data[[product]], .data[[unit]]) |> + dplyr::summarise( + "{e_dot}" := sum(.data[[e_dot]]) + ) |> + dplyr::mutate( + "{flow_aggregation_point}" := transformation_processes, + "{ledger_side}" := supply, + "{flow}" := stringr::str_c(distribution_industry, + distribution_industry_notation[["suff_start"]], + .data[[product]], + distribution_industry_notation[["suff_end"]]), + ) + + # (8) Bind data frame and get ready to return values + to_return <- .tidy_iea_df |> + # THIS HERE NEEDS TO BE MODIFIED, ACTUALLY: + # dplyr::filter(! (.data[[ledger_side]] == supply & .data[[e_dot]] > 0)) |> + # dplyr::filter(! (.data[[flow]] == losses)) |> + dplyr::anti_join(all_selected_flows, by = c({country}, {method}, {energy_type}, {last_stage}, {year}, {ledger_side}, {flow_aggregation_point}, {flow}, {unit}, {e_dot})) |> + dplyr::bind_rows( + modified_production_flows, + inputs_to_distribution_industries, + supply_by_distribution_industries + ) |> + dplyr::mutate( + "{negzeropos}" := dplyr::case_when( + .data[[e_dot]] < 0 ~ "neg", + .data[[e_dot]] == 0 ~ "zero", + .data[[e_dot]] > 0 ~ "pos" + ) + ) %>% + # Now sum similar rows using summarise. + # Group by everything except the energy flow rate column, "Edot". + matsindf::group_by_everything_except(e_dot) %>% + dplyr::summarise( + "{e_dot}" := sum(.data[[e_dot]]) + ) %>% + dplyr::mutate( + #Eliminate the column we added. + "{negzeropos}" := NULL + ) %>% + dplyr::ungroup() + + return(to_return) +} + + #' Routes non specified flows #' @@ -1024,7 +1767,7 @@ route_non_specified_eiou <- function(.tidy_iea_df, # Now sum similar rows using summarise. - # Group by everything except the energy flow rate column, "E.dot". + # Group by everything except the energy flow rate column, "Edot". matsindf::group_by_everything_except(e_dot) %>% dplyr::summarise( "{e_dot}" := sum(.data[[e_dot]]) @@ -1292,7 +2035,7 @@ route_non_specified_tp <- function(.tidy_iea_df, dplyr::bind_rows(routed_nonspec_tp) %>% #Aggregating. We need to add a pos/neg/null column to add up differently positive and negative values, otherwise we'd only get NET flows. # Now sum similar rows using summarise. - # Group by everything except the energy flow rate column, "E.dot". + # Group by everything except the energy flow rate column, "Edot". matsindf::group_by_everything_except(e_dot) %>% dplyr::summarise( "{e_dot}" := sum(.data[[e_dot]]) diff --git a/R/tables.R b/R/tables.R index b21c0d99..a842e41a 100644 --- a/R/tables.R +++ b/R/tables.R @@ -109,8 +109,8 @@ tidy_fu_allocation_table <- function(.fu_allocation_table, #' # Allocations for Residential consumption of PSBs will be picked up from the exemplar, South Africa. #' fu_table_GHA <- fu_table %>% #' dplyr::filter(Country == "GHA") %>% -#' dplyr::filter(!(Flow.aggregation.point == IEATools::tfc_flows$other & -#' Ef.product == IEATools::biofuels_and_waste_products$primary_solid_biofuels & +#' dplyr::filter(!(FlowAggregationPoint == IEATools::tfc_flows$other & +#' EfProduct == IEATools::biofuels_and_waste_products$primary_solid_biofuels & #' Destination == IEATools::other_flows$residential)) #' # Make the exemplar, South Africa. #' fu_table_ZAF <- fu_table %>% @@ -118,10 +118,10 @@ tidy_fu_allocation_table <- function(.fu_allocation_table, #' # The South African data have Residential PSB consumption, #' # which will be used to complete the Ghanaian FU Allocation table. #' fu_table_ZAF %>% -#' dplyr::filter(Flow.aggregation.point == IEATools::tfc_flows$other & -#' Ef.product == IEATools::biofuels_and_waste_products$primary_solid_biofuels & +#' dplyr::filter(FlowAggregationPoint == IEATools::tfc_flows$other & +#' EfProduct == IEATools::biofuels_and_waste_products$primary_solid_biofuels & #' Destination == IEATools::other_flows$residential) %>% -#' dplyr::select(!c(Method, Energy.type, Last.stage, Flow.aggregation.point)) +#' dplyr::select(!c(Method, EnergyType, LastStage, FlowAggregationPoint)) #' # Get the IEA data for GHA and ZAF and specify it. #' tidy_specified_iea_data <- load_tidy_iea_df() %>% #' specify_all() @@ -132,10 +132,10 @@ tidy_fu_allocation_table <- function(.fu_allocation_table, #' tidy_specified_iea_data = tidy_specified_iea_data) #' # Note that the C_source column shows that these data have been taken from South Africa. #' completed %>% -#' dplyr::filter(Flow.aggregation.point == IEATools::tfc_flows$other & -#' Ef.product == IEATools::biofuels_and_waste_products$primary_solid_biofuels & +#' dplyr::filter(FlowAggregationPoint == IEATools::tfc_flows$other & +#' EfProduct == IEATools::biofuels_and_waste_products$primary_solid_biofuels & #' Destination == IEATools::other_flows$residential) %>% -#' dplyr::select(!c(Method, Energy.type, Last.stage, Flow.aggregation.point)) +#' dplyr::select(!c(Method, EnergyType, LastStage, FlowAggregationPoint)) complete_fu_allocation_table <- function(fu_allocation_table, country_to_complete, exemplar_fu_allocation_tables, @@ -220,7 +220,8 @@ complete_fu_allocation_table <- function(fu_allocation_table, fu_allocation_table <- fu_allocation_table %>% dplyr::bind_rows(exemplar_rows_to_use) # Check to see if we have allocated everything - done <- fu_allocation_table_completed(fu_allocation_table, iea_rows_that_must_be_allocated) + done <- fu_allocation_table_completed(fu_allocation_table = fu_allocation_table, + specified_iea_data = iea_rows_that_must_be_allocated) if (done) { break } @@ -364,7 +365,7 @@ fu_allocation_table_completed <- function(fu_allocation_table = NULL, # Accept a non-tidy fu_allocation_table if it arrives. fu_allocation_table <- tidy_fu_allocation_table(fu_allocation_table) - # Eliminate the quantity, Machine, and Eu.product columns and summarize. + # Eliminate the quantity, Machine, and EuProduct columns and summarize. # We should get all 1's. # If not, throw an error. allocation_sums <- fu_allocation_table %>% @@ -667,8 +668,8 @@ complete_eta_fu_table <- function(eta_fu_table, # fu_allocation_table may come in with C_1 [%] etc. in the quantity column. # But it really needs eta.fu or phi.u, as required by the which_quantity argument. # for each unique combination of columns from - # Country, Year, Method, Energy.type, Last.stage, Flow.aggregation.point, - # Destination, Ef.product, Machine, and Eu.product. + # Country, Year, Method, EnergyType, LastStage, FlowAggregationPoint, + # Destination, EfProduct, Machine, and EuProduct. # Note that "quantities" here refers to eta_fu or phi_u. machines_that_need_quantities <- lapply(X = which_quantity, FUN = function(q){ fu_allocation_table %>% @@ -876,7 +877,7 @@ eta_fu_table_completed <- function(eta_fu_table = NULL, machines_that_need_quantities <- lapply(X = which_quantity, FUN = function(q){ machines_that_need_quantities %>% # Eliminate columns (if they exist) that contain unnecessary metadata - # associated with unique Country-Year-machine-EU.product combinations. + # associated with unique Country-Year-machine-EuProduct combinations. # These columns will interfere with the anti_join below. dplyr::mutate( "{method}" := NULL, diff --git a/R/templates.R b/R/templates.R index eaefcebc..b892200a 100644 --- a/R/templates.R +++ b/R/templates.R @@ -13,14 +13,14 @@ #' Non-energy use is removed from `.tidy_iea_df` before creating the template. #' #' @param .tidy_iea_df a tidy data frame containing IEA extended energy balance data -#' @param energy_type the name of the energy type column. Default is "Energy.type". +#' @param energy_type the name of the energy type column. Default is "EnergyType". #' @param energy the string identifier for energy (as opposed to exergy) in the `energy_type` column. Default is "`E`". -#' @param last_stage the name of the last stage column. Default is "Last.stage". +#' @param last_stage the name of the last stage column. Default is "LastStage". #' @param final the string identifier for final energy (as `Last.stage`). Default is "Final". #' @param year the name of the year column. Default is "Year". -#' @param ledger_side the name of the ledger side column. Default is "Ledger.side". +#' @param ledger_side the name of the ledger side column. Default is "LedgerSide". #' @param consumption the string identifier for the consumption side of the ledger. Default is "Consumption". -#' @param flow_aggregation_point the name of the flow aggregation point column. Default is "Flow.aggregation.point". +#' @param flow_aggregation_point the name of the flow aggregation point column. Default is FlowAggregationPoint. #' @param eiou the string identifier for energy industry own use in `flow_aggregation_point`. Default is "Energy industry own use". #' @param non_energy_use string identifier for non-energy use in `flow_aggregation_point`. Default is "Non-energy use". #' @param tfc the string identifier for total final consumption. Default is "Total final consumption". @@ -29,10 +29,10 @@ #' @param product the name of the product column. Default is "Product". #' @param destination the name for the destination column. Default is "Destination". #' @param quantity the name of the quantity column. Default is "Quantity". -#' @param e_dot the name of the energy flow rate column. Default is "E.dot". -#' @param e_dot_total the string identifier for total energy. Default is "E.dot.total". +#' @param e_dot the name of the energy flow rate column. Default is "Edot". +#' @param e_dot_total the string identifier for total energy. Default is "Edot.total". #' @param perc_unit_string the string used to indicate percentages. Default is "`[%]`". -#' @param e_dot_perc the string identifier for energy percentage. Default is "E.dot.perc". +#' @param e_dot_perc the string identifier for energy percentage. Default is "Edot.perc". #' @param maximum_values the name for the maximum energy values column. Default is "Maximum values". #' @param year_for_maximum_values an integer for the first year (in which maximum values will be stored before renaming the column to `maximum_values`). #' Default is `0`. @@ -56,33 +56,33 @@ #' specify_all() %>% #' fu_allocation_template() fu_allocation_template <- function(.tidy_iea_df, - energy_type = "Energy.type", - energy = "E", - last_stage = "Last.stage", - final = "Final", - year = "Year", - ledger_side = "Ledger.side", - consumption = "Consumption", - flow_aggregation_point = "Flow.aggregation.point", - eiou = "Energy industry own use", - non_energy_use = "Non-energy use", - tfc = "Total final consumption", - tpes = "Total primary energy supply", - flow = "Flow", - product = "Product", - destination = "Destination", - quantity = "Quantity", - e_dot = "E.dot", + energy_type = IEATools::iea_cols$energy_type, + energy = IEATools::energy_types$e, + last_stage = IEATools::iea_cols$last_stage, + final = IEATools::all_stages$final, + year = IEATools::iea_cols$year, + ledger_side = IEATools::iea_cols$ledger_side, + consumption = IEATools::ledger_sides$consumption, + flow_aggregation_point = IEATools::iea_cols$flow_aggregation_point, + eiou = IEATools::tfc_compare_flows$energy_industry_own_use, + non_energy_use = IEATools::tfc_flows$non_energy_use, + tfc = IEATools::aggregation_flows$total_final_consumption, + tpes = IEATools::tfc_compare_flows$total_primary_energy_supply, + flow = IEATools::iea_cols$flow, + product = IEATools::iea_cols$product, + destination = IEATools::template_cols$destination, + quantity = IEATools::template_cols$quantity, + e_dot = IEATools::iea_cols$e_dot, e_dot_total = paste0(e_dot, ".total"), perc_unit_string = "[%]", e_dot_perc = paste(e_dot, perc_unit_string), - maximum_values = "Maximum.values", + maximum_values = IEATools::template_cols$maximum_values, year_for_maximum_values = 0, - ef_product = "Ef.product", + ef_product = IEATools::template_cols$ef_product, allocation_var = "C_", n_allocation_rows = 4, - machine = "Machine", - eu_product = "Eu.product", + machine = IEATools::template_cols$machine, + eu_product = IEATools::template_cols$eu_product, arrange = TRUE, .value = ".value"){ matsindf::verify_cols_missing(.tidy_iea_df, .value) @@ -200,9 +200,9 @@ fu_allocation_template <- function(.tidy_iea_df, #' When sorting columns, the order of energy flows through the energy conversion chain is considered. #' The column order is: #' * metadata columns, -#' * final energy product (`Ef.product`). +#' * final energy product (`EfProduct`). #' * Machine (the final-to-useful transformation process), -#' * useful energy product (`Eu.product`), +#' * useful energy product (`EuProduct`), #' * destination where the useful energy now flows, #' * years (in columns), and #' * allocations (C_x rows). @@ -210,15 +210,15 @@ fu_allocation_template <- function(.tidy_iea_df, #' @param .fu_allocation_template the final-to-useful allocation template created by `fu_allocation_template()` #' @param rowcol one of "both", "row", or "col" to indicate whether rows, columns, or both should be arranged. #' Default is "both". -#' @param ledger_side the ledger side column in `.fu_allocation_template`. Default is "Ledger.side". -#' @param flow_aggregation_point the flow aggregation point column in `.fu_allocation_template`. Default is "Flow.aggregation.point". -#' @param ef_product the name of the final energy column in `.fu_allocation_template`. Default is "Ef.product". +#' @param ledger_side the ledger side column in `.fu_allocation_template`. Default is "LedgerSide". +#' @param flow_aggregation_point the flow aggregation point column in `.fu_allocation_template`. Default is FlowAggregationPoint. +#' @param ef_product the name of the final energy column in `.fu_allocation_template`. Default is `IEATools::template_cols$ef_product`. #' @param machine the name of the machine column in `.fu_allocation_template`. Default is "Machine". -#' @param eu_product the name of the useful energy product column in `.fu_allocation_template`. Default is "Eu.product". +#' @param eu_product the name of the useful energy product column in `.fu_allocation_template`. Default is `IEATools::template_cols$eu_product`. #' @param destination the name of the destination column in `.fu_allocation_template`. Default is "Destination". #' @param unit the name of the unit in `.fu_allocation_template`. Default is "Unit". #' @param fap_dest_order the desired order for the combination of `flow_aggregation_point` and `destination` columns. Default is `IEATools::fap_flow_iea_order`. -#' @param ef_product_order the desired order for final energy products in `.fu_allocation_template`. Default is "Ef.product". +#' @param ef_product_order the desired order for final energy products in `.fu_allocation_template`. Default is `IEATools::products`. #' @param quantity the name of the quantity column in `.fu_allocation_template`. Default is "Quantity". #' @param maximum_values the name of the maximum value column `.fu_allocation_template`. Default is "Unit". #' @param .temp_sort the name of a temporary column to be added to `.fu_allocation_template`. @@ -241,17 +241,17 @@ fu_allocation_template <- function(.tidy_iea_df, #' arrange_iea_fu_allocation_template() arrange_iea_fu_allocation_template <- function(.fu_allocation_template, rowcol = c("both", "row", "col"), - ledger_side = "Ledger.side", - flow_aggregation_point = "Flow.aggregation.point", - ef_product = "Ef.product", - machine = "Machine", - eu_product = "Eu.product", - destination = "Destination", - unit = "Unit", + ledger_side = IEATools::iea_cols$ledger_side, + flow_aggregation_point = IEATools::iea_cols$flow_aggregation_point, + ef_product = IEATools::template_cols$ef_product, + machine = IEATools::template_cols$machine, + eu_product = IEATools::template_cols$eu_product, + destination = IEATools::template_cols$destination, + unit = IEATools::iea_cols$unit, fap_dest_order = IEATools::fap_flows, ef_product_order = IEATools::products, - quantity = "Quantity", - maximum_values = "Maximum.values", + quantity = IEATools::template_cols$quantity, + maximum_values = IEATools::template_cols$maximum_values, .temp_sort = ".fap_flow", .clean_ef_product = ".clean_Ef_product"){ rowcol <- match.arg(rowcol) @@ -270,7 +270,7 @@ arrange_iea_fu_allocation_template <- function(.fu_allocation_template, matsindf::everything_except(c(year_colnames, machine_and_product_columns, ef_product)) # Adjust the columns in preparation for sorting. out <- out %>% - # De-specify the Ef.product column so it can be sorted. + # De-specify the EfProduct column so it can be sorted. despecify_col(col = ef_product, despecified_col = .clean_ef_product) %>% # Create a united Flow.aggregation.point_Flow column. tidyr::unite(col = !!as.name(.temp_sort), !!as.name(flow_aggregation_point), !!as.name(destination), sep = "_", remove = FALSE) @@ -373,19 +373,19 @@ arrange_iea_fu_allocation_template <- function(.fu_allocation_template, #' @param path the file path into which the blank template file will be written. #' Include both folder and file name. #' If not present, the ".xlsx" extension is added. -#' @param ledger_side the name of the ledger side column in `.tidy_iea_df`. Default is "Ledger.side". -#' @param consumption the string identifier for consumption in the `ledger_side` column. Default is "Consumption". -#' @param flow_aggregation_point the name of the flow aggregation point column in `.tidy_iea_df`. Default is "Flow.aggregation.point". -#' @param eiou the string identifier for energy industry own use in the `flow_aggregation_point` column. Default is "Energy industry own use". -#' @param fu_allocations_tab_name the name of the tab on which the template will be written. Default is "FU Allocations". -#' @param machine the name of the machine column in output. Default is "Machine" -#' @param eu_product the name of the useful energy product column in output. Default is "Eu.product". -#' @param quantity the name of the quantity column to be created on output. Default is "Quantity". +#' @param ledger_side the name of the ledger side column in `.tidy_iea_df`. Default is `IEATools::iea_cols$ledger_side`. +#' @param consumption the string identifier for consumption in the `ledger_side` column. Default is `IEATools::ledger_sides$consumption`. +#' @param flow_aggregation_point the name of the flow aggregation point column in `.tidy_iea_df`. Default is `IEATools::iea_cols$flow_aggregation_point`. +#' @param eiou the string identifier for energy industry own use in the `flow_aggregation_point` column. Default is `IEATools::tfc_compare_flows$energy_industry_own_use`. +#' @param fu_allocations_tab_name the name of the tab on which the template will be written. Default is `IEATools::fu_analysis_file_info$fu_allocation_tab_name`. +#' @param machine the name of the machine column in output. Default is `IEATools::template_cols$machine`. +#' @param eu_product the name of the useful energy product column in output. Default is `IEATools::template_cols$eu_product`. +#' @param quantity the name of the quantity column to be created on output. Default is `IEATools::template_cols$quantity`. #' @param e_dot the name of the energy flow rate column in `.tidy_iea_df` and the name of the energy flow rate rows to be included in the Excel file that is written by this function. -#' Default is "E.dot". +#' Default is "Edot". #' @param e_dot_perc the name of the energy flow rate percentage row to be included in the Excel file that is written by this function. -#' Default is "E.dot.perc". -#' @param maximum_values the name of the maximum values column in output. Default is "Maximum.values". +#' Default is "Edot.perc". +#' @param maximum_values the name of the maximum values column in output. Default is `IEATools::template_cols$maximum_values`. #' @param header_row_font_color a hex string representing the font color for the header row in the Excel file that is written by this function. #' Default is "#FFFFFF", white. #' @param header_row_shading_color a hex string representing the shading color for the header row in the Excel file that is written by this function. @@ -495,11 +495,11 @@ write_fu_allocation_template <- function(.fu_allocation_template, openxlsx::addStyle(fu_wb, fu_allocations_tab_name, style = energy_row_style_eiou, rows = union(e_dot_rows_eiou, e_dot_perc_rows_eiou), cols = 1:ncol(.fu_allocation_template), gridExpand = TRUE) # Apply shading for cells that don't need to be filled - # First, tackle the cells in the Maximum.values column. + # First, tackle the cells in the MaximumValues column. dont_fill_style <- openxlsx::createStyle(fgFill = dont_fill_shading_color) openxlsx::addStyle(fu_wb, fu_allocations_tab_name, style = dont_fill_style, rows = c_rows_indices, cols = max_values_col_index, gridExpand = TRUE) # Now work on the year columns. - # Find all the E.dot rows + # Find all the Edot rows for (yr_index in 1:length(year_cols_indices)) { col_index <- year_cols_indices[[yr_index]] col_name <- year_cols_names[[yr_index]] @@ -569,7 +569,7 @@ write_fu_allocation_template <- function(.fu_allocation_template, #' A filled example can be loaded with the default value of `path`. #' #' Note that any machine named `non_energy_machine` is required to have -#' identical values for `Ef.product` and `Eu.product`. +#' identical values for `ef_product` and `eu_product`. #' Violations of this requirement cause errors to be thrown. #' #' @param path The path from which final-to-useful allocation data will be loaded. Default is the path to allocation data supplied with this package. @@ -652,7 +652,7 @@ check_fu_allocation_data <- function(.fu_allocation_table, quantity = IEATools::template_cols$quantity, .values = IEATools::template_cols$.values, non_energy_machine = "Non-energy") { - # When "Non-energy" is the Machine, Ef.product and Eu.product should be identical. + # When "Non-energy" is the Machine, EfProduct and EuProduct should be identical. # It is an easy mistake that isn't true. # So check for that problem. errs <- .fu_allocation_table %>% @@ -675,12 +675,12 @@ check_fu_allocation_data <- function(.fu_allocation_table, stop(err_msg) } # When filling a final-to-useful allocation template, - # the analyst forgets to fill some Machines and Eu.products. + # the analyst forgets to fill some Machines and EuProducts. # Check for those situations and provide a helpful error message. # To check for these situations, we need to first tidy the FU allocations table tidy_fu <- .fu_allocation_table %>% tidy_fu_allocation_table() - # Now check for any cases where one or both of the Machine or Eu.product column is NA + # Now check for any cases where one or both of the Machine or EuProduct column is NA # while the .values column is not NA. errs <- tidy_fu %>% dplyr::filter((is.na(.data[[machine]]) | is.na(.data[[eu_product]])) & !is.na(.data[[.values]])) @@ -852,8 +852,8 @@ eta_fu_template <- function(.fu_allocations, # So we create e_dot_info from tidy_specified_iea_data. # The following modifications to tidy_specified_iea_data are needed. # * filter to contain only Consumption and EIOU - # * rename E.dot --> E.dot_dest - # * rename Product --> Ef.product + # * rename Edot --> Edot_dest + # * rename Product --> EfProduct # * rename Flow --> Destination e_dot_info <- tidy_specified_iea_data %>% dplyr::filter(.data[[country]] %in% countries) %>% @@ -889,7 +889,7 @@ eta_fu_template <- function(.fu_allocations, # To create the e_dot_machine_max_perc column, # we need to calculate the energy flowing into each f-->u machine. - # The first step is to isolate the E.dot rows + # The first step is to isolate the Edot rows e_dot_info <- .fu_allocations %>% dplyr::filter(!!as.name(quantity) == e_dot) %>% # dplyr::select(-maximum_values, -machine, -eu_product, -quantity) %>% @@ -924,7 +924,7 @@ eta_fu_template <- function(.fu_allocations, "{c_ratio}" := dplyr::all_of(c_perc) ) - # Now we join the E.dot and C values and calculate the energy flowing into each final-to-useful machine + # Now we join the Edot and C values and calculate the energy flowing into each final-to-useful machine input_energy <- dplyr::full_join(c_info, e_dot_info, by = matsindf::everything_except(c_info, machine, eu_product, c_ratio, .symbols = FALSE)) %>% # There may be cases where the analyst has filled a C value, but there is no corresponding e_dot_dest value. @@ -933,7 +933,7 @@ eta_fu_template <- function(.fu_allocations, dplyr::mutate( # Calculate the energy flow into each f-->u machine # for each row of the table - # (each combination of Ef.product and Machine. + # (each combination of EfProduct and Machine. "{e_dot_machine}" := .data[[c_ratio]] * .data[[e_dot_dest]] ) %>% # Group by the metadata columns, year, the Machine column, and the eu_product column, because we want to calculate the @@ -1005,7 +1005,7 @@ eta_fu_template <- function(.fu_allocations, ) %>% magrittr::extract2(.row_order) } else if (sort_by == "useful_energy_type") { - # We need to create a list of all the Eu.products. + # We need to create a list of all the EuProducts. eu_prods <- input_energy[[eu_product]] %>% unique() # Then find all the ones that are heat useful energy, identified by the 2nd and third characters being "TH". heat_prods <- eu_prods[which(substring(eu_prods, 2) %>% startsWith(heat))] @@ -1017,7 +1017,7 @@ eta_fu_template <- function(.fu_allocations, heat_prods_sorted <- heat_prods[sorted_heat_indices] # There may be useful products that we don't know about. Put those at the end, sorted in alphabetical order.. leftover_eu_prods <- sort(setdiff(eu_prods, c(md, ke, light, heat_prods))) - # Now compile the order of Eu.products for this data frame. + # Now compile the order of EuProducts for this data frame. eu_product_sort_order <- c(md, ke, light, heat_prods_sorted, leftover_eu_prods) # Sort the Maxima data frame to get the order we want. row_order <- Maxima %>% @@ -1105,10 +1105,10 @@ eta_fu_template <- function(.fu_allocations, #' @param eta_fu_tab_name the name of the final-to-useful efficiency tab. Default is "`r IEATools::fu_analysis_file_info$eta_fu_tab_name`". #' @param overwrite_file a logical telling whether to overwrite a file, if it already exists. Default is `FALSE`. #' @param overwrite_fu_eta_tab a logical telling whether to overwrite the final-to-useful efficiency tab, if it already exists. Default is `FALSE`. -#' @param eta_fu the name of the final-to-useful efficiency rows in `.eta_fu_template`. Default is "eta.fu". -#' @param e_dot_machine a string identifying energy flow into final-to-useful machines. Default is "E.dot_machine". -#' @param e_dot_machine_perc a string identifying percentage of total final energy flowing into final-to-useful machines. Default is "E.dot_machine \[%\]". -#' @param maximum_values a string identifying the maximum values column in the outgoing template. Default is "Maximum.values". +#' @param eta_fu the name of the final-to-useful efficiency rows in `.eta_fu_template`. Default is IEATools::template_cols$eta_fu. +#' @param e_dot_machine a string identifying energy flow into final-to-useful machines. Default is "Edot_machine". +#' @param e_dot_machine_perc a string identifying percentage of total final energy flowing into final-to-useful machines. Default is "Edot_machine \[%\]". +#' @param maximum_values a string identifying the maximum values column in the outgoing template. Default is `IEATools::template_cols$maximum_values`. #' @param header_row_font_color a hex string representing the font color for the header row in the Excel file that is written by this function. #' Default is "#FFFFFF", white. #' @param header_row_shading_color a hex string representing the shading color for the header row in the Excel file that is written by this function. @@ -1132,7 +1132,7 @@ eta_fu_template <- function(.fu_allocations, #' @param blank_shading_color a hex string representing the shading color for blank cells in the `maximum_values` column. #' Default is "#808080". #' @param quantity the name of the quantity column in `.eta_fu_template`. Default is "Quantity". -#' @param e_dot_machine_max_perc the name of the rows that give maximum percentages. Default is "E.dot_machine_max \[%\]". +#' @param e_dot_machine_max_perc the name of the rows that give maximum percentages. Default is "Edot_machine_max \[%\]". #' @param .rownum the name of a temporary column containing row numbers. Default is ".rownum". #' #' @return the `path` argument @@ -1219,7 +1219,7 @@ write_eta_fu_template <- function(.eta_fu_template, # Identify the maximum_values column. maximum_values_col_index <- min(year_cols_indices) - 1 - # Add percentage formatting to the E.dot_machine [%] rows + # Add percentage formatting to the Edot_machine [%] rows e_dot_perc_style <- openxlsx::createStyle(numFmt = "PERCENTAGE") openxlsx::addStyle(eta_wb, eta_fu_tab_name, style = e_dot_perc_style, rows = e_dot_machine_perc_row_indices, cols = c(maximum_values_col_index, year_cols_indices), gridExpand = TRUE) diff --git a/R/utilities.R b/R/utilities.R index 2b91312e..2ec4143c 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -103,7 +103,7 @@ year_cols <- function(.df, year_pattern = "^-?\\d+$", year = IEATools::iea_cols$ #' @export #' #' @examples -#' DF <- data.frame(E.dot = 2020, a = c(1, 2), `1967` = c(3, 4), `-42` = c(5, 6), check.names = FALSE) +#' DF <- data.frame(Edot = 2020, a = c(1, 2), `1967` = c(3, 4), `-42` = c(5, 6), check.names = FALSE) #' DF %>% meta_cols() #' DF %>% meta_cols(return_names = TRUE) meta_cols <- function(.df, @@ -333,13 +333,13 @@ carnot_efficiency <- function(heat_types, T_0 = 298.15){ #' @param iea_df A data frame containing IEA data. Default is `IEATools::load_tidy_iea_df(file_path)`. #' @param side Refers to the "Consumption" or "Supply" side of Production, Transformation processes, or Energy industry own use. #' One of "Consumption" or "Supply". Default is "Consumption". -#' @param flow_aggregation_point The flow aggregation point column in `iea_df`. Default is "Flow.aggregation.point". +#' @param flow_aggregation_point The flow aggregation point column in `iea_df`. Default is FlowAggregationPoint. #' @param production The string indicating the production flow. Default is "Production". #' @param transformation_processes The string indicating the transformation process stage. Default is "Transformation processes". #' @param eiou The string indicating the energy industry own use flow. Default is "Energy industry own use". #' @param stage The string indicating the stage for the analysis. One of `production`, `transformation_processes`, or `eiou`. #' Default is `production`. -#' @param e_dot The energy flow rate column in `iea_df`. Default is "E.dot". +#' @param e_dot The energy flow rate column in `iea_df`. Default is "Edot". #' @param flow The flow column in `iea_df`. Default is "Flow". #' @param product The product column in `iea_df`. Default is "Product". #' @@ -351,15 +351,16 @@ carnot_efficiency <- function(heat_types, T_0 = 298.15){ #' prod_tp_eiou_energy_carriers() prod_tp_eiou_energy_carriers <- function(file_path = sample_iea_data_path(), iea_df = IEATools::load_tidy_iea_df(file_path), - side = c("Consumption", "Supply"), - flow_aggregation_point = "Flow.aggregation.point", - production = "Production", - transformation_processes = "Transformation processes", - eiou = "Energy industry own use", + side = c(IEATools::ledger_sides$consumption, + IEATools::ledger_sides$supply), + flow_aggregation_point = IEATools::iea_cols$flow_aggregation_point, + production = IEATools::tpes_flows$production, + transformation_processes = IEATools::tfc_compare_flows$transformation_processes, + eiou = IEATools::tfc_compare_flows$energy_industry_own_use, stage = c(production, transformation_processes, eiou), - e_dot = "E.dot", - flow = "Flow", - product = "Product"){ + e_dot = IEATools::iea_cols$e_dot, + flow = IEATools::iea_cols$flow, + product = IEATools::iea_cols$product){ stage <- match.arg(stage) side <- match.arg(side) # First step is to focus on Supply or Consumption based on the side argument @@ -505,11 +506,11 @@ sample_eta_fu_table_path <- function(version = 2022) { #' @param col_names a list of column names in IEA data frames. Default is `IEATools::iea_cols`. #' @param country the name of the country column in `.tidy_iea_df`. Default is "Country". #' @param method the name of the method column in `.tidy_iea_df`. Default is "Method". -#' @param energy_type the name of the energy type column in `.tidy_iea_df`. Default is "Energy.type". -#' @param last_stage the name of the last stage column in `.tidy_iea_df`. Default is "Last.stage". +#' @param energy_type the name of the energy type column in `.tidy_iea_df`. Default is "EnergyType". +#' @param last_stage the name of the last stage column in `.tidy_iea_df`. Default is "LastStage". #' @param year the name of the year column in `.tidy_iea_df`. Default is "Year". -#' @param ledger_side the name of the ledger side column in `.tidy_iea_df`. Default is "Ledger.side". -#' @param flow_aggregation_point the name of the flow aggregation point column in `.tidy_iea_df`. Default is "Flow.aggregation.point". +#' @param ledger_side the name of the ledger side column in `.tidy_iea_df`. Default is "LedgerSide". +#' @param flow_aggregation_point the name of the flow aggregation point column in `.tidy_iea_df`. Default is FlowAggregationPoint. #' @param flow the name of the flow column in `.tidy_iea_df`. Default is "Flow". #' @param sep a separator between the flow aggregation point column and the flow column. Used when uniting those two columns internally. Default is "_". #' @param fap_flow the name of the united flow aggregation point and flow column to be created internally in `.tidy_iea_df`. Default is "Flow.aggregation.point_Flow". @@ -729,4 +730,4 @@ default_aggregation_region_table_path <- function(version = 2019) { } stop("Only 2019, and 2020 are supported in default_aggregation_region_table_path()") } -# EAR - 29/09/2020 \ No newline at end of file +# EAR - 29/09/2020 diff --git a/data-raw/FixIEAData_OLD.R b/data-raw/FixIEAData_OLD.R index c9025cd3..9fbb2389 100644 --- a/data-raw/FixIEAData_OLD.R +++ b/data-raw/FixIEAData_OLD.R @@ -71,7 +71,7 @@ IEAZACoalLiquefactionDataTo2000 <- AllIEAData1 %>% from = c("Hard coal (if no detail)"), to = c("Other bituminous coal")) ) %>% - unite(col = `Flow.aggregation.point+Product`, Flow.aggregation.point, Product, sep = "+") %>% + unite(col = `Flow.aggregation.point+Product`, FlowAggregationPoint, Product, sep = "+") %>% spread(key = `Flow.aggregation.point+Product`, value = E.ktoe) # Need to adjust coal production 1997 and prior for the new information that we'll calculate below @@ -83,9 +83,9 @@ NewCoalProductionRowsTo1977 <- AllIEAData1 %>% spread(key = Product, value = E.ktoe) %>% # Join the CTL gross consumption data to the production data. # Doing so makes accessible the amounts by which we need to adjust Hard coal (if no detail) production. - left_join(IEAZACoalLiquefactionDataTo2000 %>% select(Country, Year, Ledger.side, + left_join(IEAZACoalLiquefactionDataTo2000 %>% select(Country, Year, LedgerSide, `Transformation processes+Other bituminous coal`), - by = c("Country", "Year", "Ledger.side") + by = c("Country", "Year", "LedgerSide") ) %>% mutate( # Reduce Hard coal production by the amount of Other bituminous coal consumption in CTL plants. @@ -122,13 +122,13 @@ NewCTLRowsTo2000 <- IEAZACoalLiquefactionDataTo2000 %>% `Energy industry own use+Other hydrocarbons` = -0.5 * `Transformation processes+Other hydrocarbons`, `Transformation processes+Other hydrocarbons` = 0.5 * `Transformation processes+Other hydrocarbons` ) %>% - gather(key = `Flow.aggregation.point+Product`, value = E.ktoe, -Country, -Ledger.side, -Flow, -Year) %>% - separate(col = `Flow.aggregation.point+Product`, into = c("Flow.aggregation.point", "Product"), sep = "[+]") + gather(key = `Flow.aggregation.point+Product`, value = E.ktoe, -Country, -LedgerSide, -Flow, -Year) %>% + separate(col = `Flow.aggregation.point+Product`, into = c(FlowAggregationPoint, "Product"), sep = "[+]") # Second, work on CTL data for 2001 and following IEAZACoalLiquefactionDataFrom2001 <- AllIEAData1 %>% filter(Country == "ZA", Year >= 2001, Flow == "Coal liquefaction plants") %>% - unite(col = `Flow.aggregation.point+Product`, Flow.aggregation.point, Product, sep = "+") %>% + unite(col = `Flow.aggregation.point+Product`, FlowAggregationPoint, Product, sep = "+") %>% spread(key = `Flow.aggregation.point+Product`, value = E.ktoe) # Calculate new information for years 2001 and following @@ -150,8 +150,8 @@ mutate( `Energy industry own use+Other bituminous coal` = `Energy industry own use+Other bituminous coal` - `Energy industry own use+Other hydrocarbons` ) %>% - gather(key = `Flow.aggregation.point+Product`, value = E.ktoe, -Country, -Ledger.side, -Flow, -Year) %>% - separate(col = `Flow.aggregation.point+Product`, into = c("Flow.aggregation.point", "Product"), sep = "[+]") + gather(key = `Flow.aggregation.point+Product`, value = E.ktoe, -Country, -LedgerSide, -Flow, -Year) %>% + separate(col = `Flow.aggregation.point+Product`, into = c(FlowAggregationPoint, "Product"), sep = "[+]") @@ -195,13 +195,13 @@ rm(IEAZACoalLiquefactionDataTo2000, IEAZACoalLiquefactionDataFrom2001, # AllIEAData_prior_to_fix %>% # filter(Country == "ZA" & Flow == "Coal liquefaction plants") %>% # mutate( -# Flow.aggregation.point = plyr::mapvalues(Flow.aggregation.point, +# FlowAggregationPoint = plyr::mapvalues(FlowAggregationPoint, # from = c("Transformation processes", "Energy industry own use"), # to = c("TP", "EIOU")), # Product = plyr::mapvalues(Product, # from = c("Other bituminous coal", "Hard coal (if no detail)", "Other hydrocarbons"), # to = c("Other bit. coal", "Hard coal", "OHC (output)")), -# agg.product = paste(Flow.aggregation.point, "+", Product) +# agg.product = paste(FlowAggregationPoint, "+", Product) # ) %>% # area_graph_over_under( # y_variable = "E.ktoe", @@ -219,13 +219,13 @@ rm(IEAZACoalLiquefactionDataTo2000, IEAZACoalLiquefactionDataFrom2001, # AllIEAData2 %>% # filter(Country == "ZA" & Flow == "Coal liquefaction plants") %>% # mutate( -# Flow.aggregation.point = plyr::mapvalues(Flow.aggregation.point, +# FlowAggregationPoint = plyr::mapvalues(FlowAggregationPoint, # from = c("Transformation processes", "Energy industry own use"), # to = c("TP", "EIOU")), # Product = plyr::mapvalues(Product, # from = c("Other bituminous coal", "Other hydrocarbons"), # to = c("Other bit. coal", "OHC (output)")), -# agg.product = paste(Flow.aggregation.point, "+", Product) +# agg.product = paste(FlowAggregationPoint, "+", Product) # ) %>% # area_graph_over_under( # y_variable = "E.ktoe", @@ -285,7 +285,7 @@ rm(IEAZACoalLiquefactionDataTo2000, IEAZACoalLiquefactionDataFrom2001, # Data are in ktoe. FixedGHIndustryElectricity <- read.delim(file = file.path("data-raw", "FixedGHIndustryElectricity.tsv"), check.names = FALSE, stringsAsFactors = FALSE) %>% - gather(key = Year, value = E.ktoe, -Country, -Ledger.side, -Flow.aggregation.point, -Flow, -Product ) %>% + gather(key = Year, value = E.ktoe, -Country, -LedgerSide, -FlowAggregationPoint, -Flow, -Product ) %>% filter( # Eliminate rows that have 0 energy. E.ktoe != 0 @@ -321,8 +321,8 @@ FixedGHPSB <- read.delim(file = file.path("data-raw", "FixedGHPSB.tsv"), AllIEAData3 <- AllIEAData2 %>% # Remove rows from AllIEAData that are to be replaced by FixedGHIndustryElectricity filter(!(Country == "GH" & - Ledger.side == "Consumption" & - Flow.aggregation.point == "Industry" & + LedgerSide == "Consumption" & + FlowAggregationPoint == "Industry" & Product == "Electricity")) %>% # Replace them bind_rows(FixedGHIndustryElectricity) %>% @@ -337,7 +337,7 @@ AllIEAData3 <- AllIEAData2 %>% # # # Graph of GH industrial electricity prior to fixing it. # AllIEADataPriorToFix %>% -# filter(Country == "GH" & Flow.aggregation.point == "Industry" & Product == "Electricity") %>% +# filter(Country == "GH" & FlowAggregationPoint == "Industry" & Product == "Electricity") %>% # area_graph( # y_variable = "E.ktoe", # ylab = "Primary Energy [ktoe]", @@ -352,7 +352,7 @@ AllIEAData3 <- AllIEAData2 %>% # # # Graph of GH industrial electricity after fixing it. # AllIEAData3 %>% -# filter(Country == "GH" & Flow.aggregation.point == "Industry" & Product == "Electricity") %>% +# filter(Country == "GH" & FlowAggregationPoint == "Industry" & Product == "Electricity") %>% # area_graph( # y_variable = "E.ktoe", # ylab = "Primary Energy [ktoe]", @@ -387,7 +387,7 @@ AllIEAData4 <- AllIEAData3 %>% # Data is inconsistent due to the emergence of non-specified industry in 1998 # and commercial and public services in 2000 !((Country == "HN" & - Ledger.side == "Consumption" & + LedgerSide == "Consumption" & Flow %in% c("Agriculture/forestry", "Commercial and public services", "Non-specified (industry)", @@ -398,7 +398,7 @@ AllIEAData4 <- AllIEAData3 %>% # These rows will be reallocated to smooth out some changes and # redistribute Non-specified (other) (Country == "HN" & - Ledger.side == "Consumption" & + LedgerSide == "Consumption" & Flow %in% c("Agriculture/forestry", "Autoproducer electricity plants", "Commercial and public services", @@ -407,7 +407,7 @@ AllIEAData4 <- AllIEAData3 %>% Product == "Gas/diesel oil excl. biofuels") | # Remove Fuel oil rows to eliminate Non-specified (other) (Country == "HN" & - Ledger.side == "Consumption" & + LedgerSide == "Consumption" & Flow %in% c("Agriculture/forestry", "Commercial and public services", "Non-specified (industry)", @@ -434,8 +434,8 @@ AllIEAData4 <- AllIEAData3 %>% IEAStatDiffs <- AllIEAData4 %>% filter(Flow == "Statistical differences") %>% mutate( - Ledger.side = NULL, - Flow.aggregation.point = NULL, + LedgerSide = NULL, + FlowAggregationPoint = NULL, Flow = NULL, Source = "IEA" ) %>% @@ -445,7 +445,7 @@ IEAStatDiffs <- AllIEAData4 %>% MyStatDiffs <- AllIEAData4 %>% filter(!Flow == "Statistical differences") %>% - group_by(Country, Ledger.side, Product, Year) %>% + group_by(Country, LedgerSide, Product, Year) %>% summarise(E.ktoe = sum(E.ktoe)) %>% spread(key = Ledger.side, value = E.ktoe, fill = 0) %>% mutate( @@ -460,8 +460,8 @@ NewStatDiffs <- bind_rows(MyStatDiffs, IEAStatDiffs) %>% mutate( # DeltaStatDiffs should be added to the IEA's Statistical differences to perfectly balance the table. DeltaStatDiffs = Actual - IEA, - Ledger.side = "Supply", - Flow.aggregation.point = "TFC compare", + LedgerSide = "Supply", + FlowAggregationPoint = "TFC compare", Flow = "Statistical differences" ) %>% filter(Actual != 0) @@ -474,7 +474,7 @@ AllIEAData5 <- AllIEAData4 %>% # Verify that the new Statistical differences bring all Products into perfect balance. VerifyStatDiffs <- AllIEAData5 %>% - group_by(Country, Ledger.side, Product, Year) %>% + group_by(Country, LedgerSide, Product, Year) %>% summarise(E.ktoe = sum(E.ktoe)) %>% spread(key = Ledger.side, value = E.ktoe, fill = 0) %>% mutate( diff --git a/data-raw/FixedAUSbgf.xlsx b/data-raw/FixedAUSbgf.xlsx index bd886594..7812238c 100644 Binary files a/data-raw/FixedAUSbgf.xlsx and b/data-raw/FixedAUSbgf.xlsx differ diff --git a/data-raw/FixedCOLWRLDElect.xlsx b/data-raw/FixedCOLWRLDElect.xlsx index 88b1f7c3..e2ba4e8b 100644 Binary files a/data-raw/FixedCOLWRLDElect.xlsx and b/data-raw/FixedCOLWRLDElect.xlsx differ diff --git a/data-raw/FixedHNFuels.tsv b/data-raw/FixedHNFuels.tsv index 1f39cdbb..e9b79e29 100644 --- a/data-raw/FixedHNFuels.tsv +++ b/data-raw/FixedHNFuels.tsv @@ -1,15 +1,15 @@ -Country Ledger.side Flow.aggregation.point Flow Product 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 -HN Consumption Other Commercial and public services Liquefied petroleum gases (LPG) 3 3 3.52 3.535802469 3.995555556 4.459259259 4.92691358 4.498765432 4.97037037 4.992098765 4.558024691 4.12 5.057283951 6.464197531 6.028148148 6.519506173 6.547160494 8.453333333 6.602469136 6.15654321 7.133333333 7.162962963 7.192592593 12.51851852 19.25925926 20.7037037 23.11111111 17.81481481 20.7037037 22.62962963 26 28 28 32 36 28 37 34 33 35 35 44 49 -HN Consumption Industry Non-specified (industry) Liquefied petroleum gases (LPG) 0 0 0.96 0.991887125 1.151746032 1.31957672 1.495379189 1.399294533 1.583068783 1.62691358 1.518871252 1.402857143 1.758447972 2.29382716 2.181798942 2.405432099 2.461234568 3.236190476 2.572839506 2.440881834 2.876190476 2.935978836 2.995767196 5.296296296 8.148148148 8.759259259 9.777777778 7.537037037 8.759259259 9.574074074 11 12 16 20 23 27 36 28 20 21 29 37 41 -HN Consumption Other Residential Liquefied petroleum gases (LPG) 3 3 3.52 3.472310406 3.852698413 4.221164021 4.577707231 4.101940035 4.446560847 4.380987654 3.923104056 3.477142857 4.184268078 5.241975309 4.79005291 5.075061728 4.991604938 6.31047619 4.824691358 4.402574956 4.99047619 4.901058201 4.811640212 8.185185185 12.59259259 13.53703704 15.11111111 11.64814815 13.53703704 14.7962963 17 18 18 14 16 23 29 26 26 28 27 35 38 -HN Consumption Transport Road Liquefied petroleum gases (LPG) 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 -HN Consumption Other Agriculture/forestry Gas/diesel oil excl. biofuels 10 11 14 13 14 16 16 18 19 19 18 18 18 17 8 1 1 1 2 1 1 2 2 2 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -HN Consumption Other Commercial and public services Gas/diesel oil excl. biofuels 8 9 11 10 11 14 23 27 28 27 23 22 19 20 17 17 20 22 24 22 21 23 25 29 28 29 30 32 50 23 53 93 137 43 42 32 4 5 1 1 0 0 0 -HN Consumption Industry Non-specified (industry) Gas/diesel oil excl. biofuels 33 38 44 43 46 52 66 75 79 80 67 72 92 109 100 73 98 108 118 107 102 111 124 144 138 143 147 159 156 140 237 219 249 286 278 262 127 172 196 190 185 200 197 -HN Consumption Other Agriculture/forestry Fuel oil 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -HN Consumption Other Commercial and public services Fuel oil 1 1 1 1 2 2 2 2 2 2 3 2 1 1 1 1 3 4 6 5 6 6 5 4 4 6 3 4 8 0 1 8 16 21 22 25 6 0 0 0 0 0 0 -HN Consumption Industry Non-specified (industry) Fuel oil 56 61 63 75 82 77 72 76 92 89 77 61 70 80 73 78 83 106 135 130 134 133 115 101 97 144 76 85 106 78 72 78 122 165 173 248 289 249 201 205 222 467 380 -HN Supply Total primary energy supply Imports - Coke oven coke Coke oven coke 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 54 42 44 70 77 92 92 92 89 -HN Supply Total primary energy supply Stock changes - Coke oven coke Coke oven coke 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 7 7 0 -22 0 -14 14 14 14 -HN Consumption Industry Blast furnaces Coke oven coke 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -HN Consumption Industry Non-specified (industry) Coke oven coke 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 61 49 44 48 77 78 106 106 103 +Country LedgerSide FlowAggregationPoint Flow Product 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 +HN Consumption Other Commercial and public services Liquefied petroleum gases (LPG) 3 3 3.52 3.535802469 3.995555556 4.459259259 4.92691358 4.498765432 4.97037037 4.992098765 4.558024691 4.12 5.057283951 6.464197531 6.028148148 6.519506173 6.547160494 8.453333333 6.602469136 6.15654321 7.133333333 7.162962963 7.192592593 12.51851852 19.25925926 20.7037037 23.11111111 17.81481481 20.7037037 22.62962963 26 28 28 32 36 28 37 34 33 35 35 44 49 +HN Consumption Industry Non-specified (industry) Liquefied petroleum gases (LPG) 0 0 0.96 0.991887125 1.151746032 1.31957672 1.495379189 1.399294533 1.583068783 1.62691358 1.518871252 1.402857143 1.758447972 2.29382716 2.181798942 2.405432099 2.461234568 3.236190476 2.572839506 2.440881834 2.876190476 2.935978836 2.995767196 5.296296296 8.148148148 8.759259259 9.777777778 7.537037037 8.759259259 9.574074074 11 12 16 20 23 27 36 28 20 21 29 37 41 +HN Consumption Other Residential Liquefied petroleum gases (LPG) 3 3 3.52 3.472310406 3.852698413 4.221164021 4.577707231 4.101940035 4.446560847 4.380987654 3.923104056 3.477142857 4.184268078 5.241975309 4.79005291 5.075061728 4.991604938 6.31047619 4.824691358 4.402574956 4.99047619 4.901058201 4.811640212 8.185185185 12.59259259 13.53703704 15.11111111 11.64814815 13.53703704 14.7962963 17 18 18 14 16 23 29 26 26 28 27 35 38 +HN Consumption Transport Road Liquefied petroleum gases (LPG) 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 +HN Consumption Other Agriculture/forestry Gas/diesel oil excl. biofuels 10 11 14 13 14 16 16 18 19 19 18 18 18 17 8 1 1 1 2 1 1 2 2 2 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +HN Consumption Other Commercial and public services Gas/diesel oil excl. biofuels 8 9 11 10 11 14 23 27 28 27 23 22 19 20 17 17 20 22 24 22 21 23 25 29 28 29 30 32 50 23 53 93 137 43 42 32 4 5 1 1 0 0 0 +HN Consumption Industry Non-specified (industry) Gas/diesel oil excl. biofuels 33 38 44 43 46 52 66 75 79 80 67 72 92 109 100 73 98 108 118 107 102 111 124 144 138 143 147 159 156 140 237 219 249 286 278 262 127 172 196 190 185 200 197 +HN Consumption Other Agriculture/forestry Fuel oil 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +HN Consumption Other Commercial and public services Fuel oil 1 1 1 1 2 2 2 2 2 2 3 2 1 1 1 1 3 4 6 5 6 6 5 4 4 6 3 4 8 0 1 8 16 21 22 25 6 0 0 0 0 0 0 +HN Consumption Industry Non-specified (industry) Fuel oil 56 61 63 75 82 77 72 76 92 89 77 61 70 80 73 78 83 106 135 130 134 133 115 101 97 144 76 85 106 78 72 78 122 165 173 248 289 249 201 205 222 467 380 +HN Supply Total primary energy supply Imports - Coke oven coke Coke oven coke 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 54 42 44 70 77 92 92 92 89 +HN Supply Total primary energy supply Stock changes - Coke oven coke Coke oven coke 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 7 7 0 -22 0 -14 14 14 14 +HN Consumption Industry Blast furnaces Coke oven coke 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +HN Consumption Industry Non-specified (industry) Coke oven coke 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 61 49 44 48 77 78 106 106 103 diff --git a/data-raw/FixedOAMRCharcoalProductionPlants.xlsx b/data-raw/FixedOAMRCharcoalProductionPlants.xlsx index d83c4625..b9fe2c22 100644 Binary files a/data-raw/FixedOAMRCharcoalProductionPlants.xlsx and b/data-raw/FixedOAMRCharcoalProductionPlants.xlsx differ diff --git a/data-raw/FixedOAMRGasWorks.xlsx b/data-raw/FixedOAMRGasWorks.xlsx index 779fad80..ec83f4ee 100644 Binary files a/data-raw/FixedOAMRGasWorks.xlsx and b/data-raw/FixedOAMRGasWorks.xlsx differ diff --git a/data-raw/FixedRUSESTHeat19901993.xlsx b/data-raw/FixedRUSESTHeat19901993.xlsx index 0afaa51b..6078d819 100644 Binary files a/data-raw/FixedRUSESTHeat19901993.xlsx and b/data-raw/FixedRUSESTHeat19901993.xlsx differ diff --git a/data-raw/GHA-IndustryElectricity.xlsx b/data-raw/GHA-IndustryElectricity.xlsx index 293c6c81..bbbc8284 100644 Binary files a/data-raw/GHA-IndustryElectricity.xlsx and b/data-raw/GHA-IndustryElectricity.xlsx differ diff --git a/data-raw/GHA-PSB.xlsx b/data-raw/GHA-PSB.xlsx index 838be733..139c429f 100644 Binary files a/data-raw/GHA-PSB.xlsx and b/data-raw/GHA-PSB.xlsx differ diff --git a/data-raw/PreprocessIEAData.R b/data-raw/PreprocessIEAData.R index bb18efc6..9a44da6e 100644 --- a/data-raw/PreprocessIEAData.R +++ b/data-raw/PreprocessIEAData.R @@ -97,7 +97,7 @@ IEAData1 <- lapply( # Manipulate raw data # IEAData2 <- IEAData1 %>% - gather(Year, E.ktoe, -c(Country, Ledger.side, Flow, Flow.aggregation.point, Product)) + gather(Year, E.ktoe, -c(Country, LedgerSide, Flow, FlowAggregationPoint, Product)) IEAData3 <- IEAData2 %>% # Remove missing values to reduce memory footprint @@ -159,7 +159,7 @@ AllIEAData <- IEAData4 %>% # Orders rows same as the original downloaded IEA data. # Orders the columns in a reasonable manner # Also, drops the Name of the country. - select(Country, Ledger.side, Flow.aggregation.point, Flow, Product, Year, E.ktoe) + select(Country, LedgerSide, FlowAggregationPoint, Flow, Product, Year, E.ktoe) # # Clean up the environment diff --git a/data-raw/create_constants.R b/data-raw/create_constants.R index 6b2c25b7..feae9503 100644 --- a/data-raw/create_constants.R +++ b/data-raw/create_constants.R @@ -20,15 +20,15 @@ usethis::use_data(valid_iea_release_years, overwrite = TRUE) iea_cols <- list(country = "Country", method = "Method", - energy_type = "Energy.type", - last_stage = "Last.stage", + energy_type = "EnergyType", + last_stage = "LastStage", year = "Year", - ledger_side = "Ledger.side", - flow_aggregation_point = "Flow.aggregation.point", + ledger_side = "LedgerSide", + flow_aggregation_point = "FlowAggregationPoint", flow = "Flow", product = "Product", unit = "Unit", - e_dot = "E.dot") + e_dot = "Edot") usethis::use_data(iea_cols, overwrite = TRUE) @@ -116,30 +116,30 @@ usethis::use_data(psut_cols, overwrite = TRUE) # Give names of columns in FU allocation and eta_fu templates # -template_cols <- list(ef_product = "Ef.product", +template_cols <- list(ef_product = "EfProduct", machine = "Machine", - eu_product = "Eu.product", + eu_product = "EuProduct", destination = "Destination", quantity = "Quantity", - maximum_values = "Maximum.values", + maximum_values = "MaximumValues", C_eiou = "C_EIOU", C_Y = "C_Y", C_perc = "C.perc [%]", - e_dot_max = "E.dot_max", - e_dot_perc = "E.dot [%]", - e_dot_dest = "E.dot_dest", - e_dot_machine = "E.dot_machine", - e_dot_machine_tot = "E.dot_machine_tot", - e_dot_machine_perc = "E.dot_machine [%]", - e_dot_machine_max_perc = "E.dot_machine_max [%]", - eta_fu = "eta.fu", - phi_pf = "phi.pf", - phi_u = "phi.u", + e_dot_max = paste0(IEATools::iea_cols$e_dot, "_max"), + e_dot_perc = paste0(IEATools::iea_cols$e_dot, " [%]"), + e_dot_dest = paste0(IEATools::iea_cols$e_dot, "_dest"), + e_dot_machine = paste0(IEATools::iea_cols$e_dot, "_machine"), + e_dot_machine_tot = paste0(IEATools::iea_cols$e_dot, "_machine", "_tot"), + e_dot_machine_perc = paste0(IEATools::iea_cols$e_dot, "_machine", " [%]"), + e_dot_machine_max_perc = paste0(IEATools::iea_cols$e_dot, "_machine", "_max [%]"), + eta_fu = "etafu", + phi_pf = "phipf", + phi_u = "phiu", phi = "phi", - c_source = "C.source", - eta_fu_source = "eta.fu.source", - phi_source = "phi.source", - .values = ".values") + c_source = "CSource", + eta_fu_source = "etafuSource", + phi_source = "PhiSource", + .values = "Value") usethis::use_data(template_cols, overwrite = TRUE) @@ -161,7 +161,8 @@ row_col_types <- list(industry = "Industry", resource = "Industry", sector = "Industry", product = "Product", - unit = "Unit") + unit = "Unit", + other = "Other") usethis::use_data(row_col_types, overwrite = TRUE) @@ -307,6 +308,23 @@ usethis::use_data(biofuels_and_waste_products, overwrite = TRUE) electricity_products <- list(electricity = "Electricity") usethis::use_data(electricity_products, overwrite = TRUE) + +# +# Heat +# + +heat_products <- list(heat = "Heat") +usethis::use_data(heat_products, overwrite = TRUE) + + +# +# Nuclear products +# + +nuclear_products <- list(nuclear = "Nuclear") +usethis::use_data(nuclear_products, overwrite = TRUE) + + # # Non-energy # @@ -582,11 +600,11 @@ usethis::use_data(aggregation_flows, overwrite = TRUE) # # Default names for columns in aggregate data frames # -aggregate_cols <- list(aggregate_primary = "EX.p", - aggregate_final = "EX.f", - aggregate_useful = "EX.u", - net_aggregate_demand = "EX.fd_net", - gross_aggregate_demand = "EX.fd_gross") +aggregate_cols <- list(aggregate_primary = "EXp", + aggregate_final = "EXf", + aggregate_useful = "EXu", + net_aggregate_demand = "EXfdnet", + gross_aggregate_demand = "EXfdgross") usethis::use_data(aggregate_cols, overwrite = TRUE) @@ -715,8 +733,8 @@ usethis::use_data(ledger_sides, overwrite = TRUE) # In the first step, we use the data frame created from load_tidy_iea_df, # creating a united column from Flow.aggregation.point and Flow. fap_flows <- load_tidy_iea_df(remove_zeroes = FALSE) %>% - tidyr::unite(col = Flow.aggregation.point_Flow, Flow.aggregation.point, Flow, sep = "_", remove = TRUE) %>% - dplyr::select(Flow.aggregation.point_Flow) %>% + tidyr::unite(col = FlowAggregationPoint_Flow, FlowAggregationPoint, Flow, sep = "_", remove = TRUE) %>% + dplyr::select(FlowAggregationPoint_Flow) %>% unique() %>% unlist() %>% unname() %>% @@ -826,8 +844,51 @@ usethis::use_data(fd_sectors, overwrite = TRUE) phi_constants_names <- list(phi_constants_tab_name = "phi_constants", product_colname = "Product", phi_colname = "phi", - phi_source_colname = "phi.source", - is_useful_colname = "is.useful") + phi_source_colname = "PhiSource", + is_useful_colname = "IsUseful") usethis::use_data(phi_constants_names, overwrite = TRUE) +# +# Renewable energy industries +# + +renewable_industries <- list(geothermal_plants = "Geothermal plants", + hydro_plants = "Hydropower plants", + solar_pv_plants = "Solar photovoltaic plants", + solar_th_plants = "Solar thermal plants", + oceanic_plants = "Oceanic power plants", + wind_power_plants = "Wind power plants") +usethis::use_data(renewable_industries, overwrite = TRUE) + + +# +# Grid industries +# + +grid_industries <- list(electricity_grid = "Electricity grid") + +usethis::use_data(grid_industries, overwrite = TRUE) + + +# +# Distribution industries +# + +distribution_industry <- "Distribution" + +usethis::use_data(distribution_industry, overwrite = TRUE) + + + +# +# Electricity and heat output rows +# + +elec_heat_output <- list(electricity_output_prefix = "Electricity output (GWh)-", + heat_output_prefix = "Heat output-", + output_machine_delimiter = "-", + input_product = "InputProduct", + output_product = "OutputProduct") + +usethis::use_data(elec_heat_output, overwrite = TRUE) diff --git a/data-raw/prep_IEA_country_files.R b/data-raw/prep_IEA_country_files.R new file mode 100644 index 00000000..ee62746e --- /dev/null +++ b/data-raw/prep_IEA_country_files.R @@ -0,0 +1,61 @@ +# This script reads an IEA data file and splits it into +# several data files, one for each country. +# This process should normally be done only once. +# MKH --2024-04-01 + +iea_year <- 2022 + +onedrive_root <- file.path("~", + "OneDrive - University of Leeds", + "Fellowship 1960-2015 PFU database research") + +# Set the folder into which all country files will be saved, +# which is nearly the same name as the IEA data file below. +country_folder <- file.path(onedrive_root, + "IEA extended energy balance data", + paste("IEA", iea_year, "energy balance data"), + paste("IEA Extended Energy Balances", iea_year, "(TJ)")) +# The path to the IEA data +iea_file <- paste0(country_folder, ".csv") + + +# Read the country concordance file +country_concordance_file <- file.path(onedrive_root, + "InputData", + "v2.0", + "Country_Concordance_Full.xlsx") +country_concordance_tab <- "country_concordance_table" +country_concordance <- country_concordance_file |> + readxl::read_excel(sheet = country_concordance_tab) |> + dplyr::select(IEA.name, PFU.code) + +# Read the IEA data file +iea_df <- iea_file |> + IEATools::slurp_iea_to_raw_df() + +known_countries <- iea_df |> + dplyr::left_join(country_concordance, by = dplyr::join_by(COUNTRY == IEA.name)) |> + dplyr::filter(!is.na(PFU.code)) + +# List the UN countries we don't pick up +print("Countries not picked up:") +setdiff(unique(iea_df$COUNTRY), unique(known_countries$COUNTRY)) + +# Save the country files +known_countries |> + dplyr::group_by(PFU.code) |> + dplyr::group_walk(.f = function(this_grp, this_key) { + pfu_code <- this_key$PFU.code[[1]] + country_file_path <- file.path(country_folder, + paste0("IEA Extended Energy Balances ", + iea_year, + " (TJ) ", + pfu_code, + ".csv")) + this_grp |> + dplyr::mutate( + PFU.code = NULL + ) |> + write.csv(file = country_file_path, row.names = FALSE) + }) + diff --git a/data/Fixed_AUS_bfg.rda b/data/Fixed_AUS_bfg.rda index 44eb3163..f8a60e74 100644 Binary files a/data/Fixed_AUS_bfg.rda and b/data/Fixed_AUS_bfg.rda differ diff --git a/data/Fixed_COL_WRLD_Electricity.rda b/data/Fixed_COL_WRLD_Electricity.rda index 99060ec7..87b8474e 100644 Binary files a/data/Fixed_COL_WRLD_Electricity.rda and b/data/Fixed_COL_WRLD_Electricity.rda differ diff --git a/data/Fixed_GHA_Industry_Electricity.rda b/data/Fixed_GHA_Industry_Electricity.rda index d6942eab..630ca225 100644 Binary files a/data/Fixed_GHA_Industry_Electricity.rda and b/data/Fixed_GHA_Industry_Electricity.rda differ diff --git a/data/Fixed_GHA_PSB.rda b/data/Fixed_GHA_PSB.rda index cdf4ed9e..1bec6a64 100644 Binary files a/data/Fixed_GHA_PSB.rda and b/data/Fixed_GHA_PSB.rda differ diff --git a/data/Fixed_OAMR_Cpp.rda b/data/Fixed_OAMR_Cpp.rda index 4278e040..1e97c690 100644 Binary files a/data/Fixed_OAMR_Cpp.rda and b/data/Fixed_OAMR_Cpp.rda differ diff --git a/data/Fixed_OAMR_gw.rda b/data/Fixed_OAMR_gw.rda index e6d3f3c6..c5fba419 100644 Binary files a/data/Fixed_OAMR_gw.rda and b/data/Fixed_OAMR_gw.rda differ diff --git a/data/Fixed_RUSEST_heat.rda b/data/Fixed_RUSEST_heat.rda index c81ae944..ebd1798a 100644 Binary files a/data/Fixed_RUSEST_heat.rda and b/data/Fixed_RUSEST_heat.rda differ diff --git a/data/aggregate_cols.rda b/data/aggregate_cols.rda index bc0a219b..3e75546e 100644 Binary files a/data/aggregate_cols.rda and b/data/aggregate_cols.rda differ diff --git a/data/aggregation_flows.rda b/data/aggregation_flows.rda index 3330a843..cae9d51f 100644 Binary files a/data/aggregation_flows.rda and b/data/aggregation_flows.rda differ diff --git a/data/aggregation_regions.rda b/data/aggregation_regions.rda index fe4c57fa..86c824a2 100644 Binary files a/data/aggregation_regions.rda and b/data/aggregation_regions.rda differ diff --git a/data/all_stages.rda b/data/all_stages.rda index dad37c1d..f0d1de56 100644 Binary files a/data/all_stages.rda and b/data/all_stages.rda differ diff --git a/data/biofuels_and_waste_products.rda b/data/biofuels_and_waste_products.rda index 4398bc03..be90e6fd 100644 Binary files a/data/biofuels_and_waste_products.rda and b/data/biofuels_and_waste_products.rda differ diff --git a/data/coal_and_coal_products.rda b/data/coal_and_coal_products.rda index 86fde04b..e741b5ea 100644 Binary files a/data/coal_and_coal_products.rda and b/data/coal_and_coal_products.rda differ diff --git a/data/countries.rda b/data/countries.rda index bab795e7..e516d6bf 100644 Binary files a/data/countries.rda and b/data/countries.rda differ diff --git a/data/country_concordance_cols.rda b/data/country_concordance_cols.rda index b9f65b63..3c9b004a 100644 Binary files a/data/country_concordance_cols.rda and b/data/country_concordance_cols.rda differ diff --git a/data/distribution_industry.rda b/data/distribution_industry.rda new file mode 100644 index 00000000..4fdc391d Binary files /dev/null and b/data/distribution_industry.rda differ diff --git a/data/eiou_flows.rda b/data/eiou_flows.rda index 09d394ca..bac750ac 100644 Binary files a/data/eiou_flows.rda and b/data/eiou_flows.rda differ diff --git a/data/elec_heat_output.rda b/data/elec_heat_output.rda new file mode 100644 index 00000000..3ca1b6da Binary files /dev/null and b/data/elec_heat_output.rda differ diff --git a/data/electricity_products.rda b/data/electricity_products.rda index 0f31886c..2b8c70ba 100644 Binary files a/data/electricity_products.rda and b/data/electricity_products.rda differ diff --git a/data/energy_types.rda b/data/energy_types.rda index c6d56455..edf3ec39 100644 Binary files a/data/energy_types.rda and b/data/energy_types.rda differ diff --git a/data/fap_flows.rda b/data/fap_flows.rda index d18d2a82..472285d3 100644 Binary files a/data/fap_flows.rda and b/data/fap_flows.rda differ diff --git a/data/fd_sectors.rda b/data/fd_sectors.rda index d9adac82..a9efc3d8 100644 Binary files a/data/fd_sectors.rda and b/data/fd_sectors.rda differ diff --git a/data/fu_analysis_file_info.rda b/data/fu_analysis_file_info.rda index 86b52338..3b531c96 100644 Binary files a/data/fu_analysis_file_info.rda and b/data/fu_analysis_file_info.rda differ diff --git a/data/grid_industries.rda b/data/grid_industries.rda new file mode 100644 index 00000000..06772e8d Binary files /dev/null and b/data/grid_industries.rda differ diff --git a/data/heat_products.rda b/data/heat_products.rda new file mode 100644 index 00000000..9056481f Binary files /dev/null and b/data/heat_products.rda differ diff --git a/data/iea_cols.rda b/data/iea_cols.rda index 47f663ae..96739197 100644 Binary files a/data/iea_cols.rda and b/data/iea_cols.rda differ diff --git a/data/industry_flows.rda b/data/industry_flows.rda index 852db0ec..786b1371 100644 Binary files a/data/industry_flows.rda and b/data/industry_flows.rda differ diff --git a/data/industry_net_flows.rda b/data/industry_net_flows.rda index b699d07e..e8f5a4fc 100644 Binary files a/data/industry_net_flows.rda and b/data/industry_net_flows.rda differ diff --git a/data/interface_industries.rda b/data/interface_industries.rda index afd2fbb6..73c26239 100644 Binary files a/data/interface_industries.rda and b/data/interface_industries.rda differ diff --git a/data/last_stages.rda b/data/last_stages.rda index 411cfcc0..8f9feec7 100644 Binary files a/data/last_stages.rda and b/data/last_stages.rda differ diff --git a/data/ledger_sides.rda b/data/ledger_sides.rda index 821fdc55..ce830026 100644 Binary files a/data/ledger_sides.rda and b/data/ledger_sides.rda differ diff --git a/data/main_act_plants.rda b/data/main_act_plants.rda index e511743e..e4372089 100644 Binary files a/data/main_act_plants.rda and b/data/main_act_plants.rda differ diff --git a/data/manufacturing_flows.rda b/data/manufacturing_flows.rda index 2dd487f9..04d34971 100644 Binary files a/data/manufacturing_flows.rda and b/data/manufacturing_flows.rda differ diff --git a/data/mat_meta_cols.rda b/data/mat_meta_cols.rda index 49d74ddc..f9b31df9 100644 Binary files a/data/mat_meta_cols.rda and b/data/mat_meta_cols.rda differ diff --git a/data/memo_aggregation_flow_prefixes.rda b/data/memo_aggregation_flow_prefixes.rda index 67d35691..75cc997d 100644 Binary files a/data/memo_aggregation_flow_prefixes.rda and b/data/memo_aggregation_flow_prefixes.rda differ diff --git a/data/memo_aggregation_product_prefixes.rda b/data/memo_aggregation_product_prefixes.rda index b75cf675..c4e2a03f 100644 Binary files a/data/memo_aggregation_product_prefixes.rda and b/data/memo_aggregation_product_prefixes.rda differ diff --git a/data/memo_non_energy_flows.rda b/data/memo_non_energy_flows.rda index d25526a5..41a62fae 100644 Binary files a/data/memo_non_energy_flows.rda and b/data/memo_non_energy_flows.rda differ diff --git a/data/methods.rda b/data/methods.rda index b9d1f1e6..0807849a 100644 Binary files a/data/methods.rda and b/data/methods.rda differ diff --git a/data/non_energy_flows.rda b/data/non_energy_flows.rda index 80c88200..52bc7d1f 100644 Binary files a/data/non_energy_flows.rda and b/data/non_energy_flows.rda differ diff --git a/data/non_specified_flows.rda b/data/non_specified_flows.rda index d1488085..74ef5b8f 100644 Binary files a/data/non_specified_flows.rda and b/data/non_specified_flows.rda differ diff --git a/data/nonenergy_products.rda b/data/nonenergy_products.rda index f638bee0..c16a32a1 100644 Binary files a/data/nonenergy_products.rda and b/data/nonenergy_products.rda differ diff --git a/data/nuclear_products.rda b/data/nuclear_products.rda new file mode 100644 index 00000000..aa11af0e Binary files /dev/null and b/data/nuclear_products.rda differ diff --git a/data/oil_and_oil_products.rda b/data/oil_and_oil_products.rda index bd8d39b3..b5387473 100644 Binary files a/data/oil_and_oil_products.rda and b/data/oil_and_oil_products.rda differ diff --git a/data/other_flows.rda b/data/other_flows.rda index a2198a7d..7f01b430 100644 Binary files a/data/other_flows.rda and b/data/other_flows.rda differ diff --git a/data/override_iso_codes_df.rda b/data/override_iso_codes_df.rda index f5237d11..890aff7a 100644 Binary files a/data/override_iso_codes_df.rda and b/data/override_iso_codes_df.rda differ diff --git a/data/peat_and_peat_products.rda b/data/peat_and_peat_products.rda index 59c49304..08fd5ff4 100644 Binary files a/data/peat_and_peat_products.rda and b/data/peat_and_peat_products.rda differ diff --git a/data/phi_constants_names.rda b/data/phi_constants_names.rda index ce1c93ed..0de98eb7 100644 Binary files a/data/phi_constants_names.rda and b/data/phi_constants_names.rda differ diff --git a/data/prim_agg_flows.rda b/data/prim_agg_flows.rda index 0bc0e61d..9bec0073 100644 Binary files a/data/prim_agg_flows.rda and b/data/prim_agg_flows.rda differ diff --git a/data/primary_coal_products.rda b/data/primary_coal_products.rda index 5cc16c5f..c884aa3e 100644 Binary files a/data/primary_coal_products.rda and b/data/primary_coal_products.rda differ diff --git a/data/primary_gas_products.rda b/data/primary_gas_products.rda index 4cd4094d..885d7a58 100644 Binary files a/data/primary_gas_products.rda and b/data/primary_gas_products.rda differ diff --git a/data/primary_oil_products.rda b/data/primary_oil_products.rda index 17486e4d..70fbf9d2 100644 Binary files a/data/primary_oil_products.rda and b/data/primary_oil_products.rda differ diff --git a/data/primary_peat_products.rda b/data/primary_peat_products.rda index facf5c9a..4d8913c2 100644 Binary files a/data/primary_peat_products.rda and b/data/primary_peat_products.rda differ diff --git a/data/products.rda b/data/products.rda index f0e8b280..637fca11 100644 Binary files a/data/products.rda and b/data/products.rda differ diff --git a/data/psut_cols.rda b/data/psut_cols.rda index 8872e963..3aa44848 100644 Binary files a/data/psut_cols.rda and b/data/psut_cols.rda differ diff --git a/data/renewable_industries.rda b/data/renewable_industries.rda new file mode 100644 index 00000000..0298d85d Binary files /dev/null and b/data/renewable_industries.rda differ diff --git a/data/renewable_products.rda b/data/renewable_products.rda index bdc46452..e946bc2d 100644 Binary files a/data/renewable_products.rda and b/data/renewable_products.rda differ diff --git a/data/row_col_types.rda b/data/row_col_types.rda index e58bc4ca..86019fb3 100644 Binary files a/data/row_col_types.rda and b/data/row_col_types.rda differ diff --git a/data/secondary_coal_products.rda b/data/secondary_coal_products.rda index 48814efe..dc42c09d 100644 Binary files a/data/secondary_coal_products.rda and b/data/secondary_coal_products.rda differ diff --git a/data/secondary_oil_products.rda b/data/secondary_oil_products.rda index 4e354a4a..296c7c4d 100644 Binary files a/data/secondary_oil_products.rda and b/data/secondary_oil_products.rda differ diff --git a/data/secondary_peat_products.rda b/data/secondary_peat_products.rda index 85089e84..855da8d9 100644 Binary files a/data/secondary_peat_products.rda and b/data/secondary_peat_products.rda differ diff --git a/data/sut_meta_cols.rda b/data/sut_meta_cols.rda index a5123e01..ef30a56c 100644 Binary files a/data/sut_meta_cols.rda and b/data/sut_meta_cols.rda differ diff --git a/data/template_cols.rda b/data/template_cols.rda index c2ec6d6a..4238fe34 100644 Binary files a/data/template_cols.rda and b/data/template_cols.rda differ diff --git a/data/tfc_compare_flows.rda b/data/tfc_compare_flows.rda index 99ba9eea..a845a0bb 100644 Binary files a/data/tfc_compare_flows.rda and b/data/tfc_compare_flows.rda differ diff --git a/data/tfc_flows.rda b/data/tfc_flows.rda index 23895cee..a47f4ca9 100644 Binary files a/data/tfc_flows.rda and b/data/tfc_flows.rda differ diff --git a/data/tpes_flows.rda b/data/tpes_flows.rda index 7ab487fd..2e624bb9 100644 Binary files a/data/tpes_flows.rda and b/data/tpes_flows.rda differ diff --git a/data/transformation_processes.rda b/data/transformation_processes.rda index 0b5a50d2..c5ea02c6 100644 Binary files a/data/transformation_processes.rda and b/data/transformation_processes.rda differ diff --git a/data/transport_domestic_flows.rda b/data/transport_domestic_flows.rda index 8a35ea52..1d14619a 100644 Binary files a/data/transport_domestic_flows.rda and b/data/transport_domestic_flows.rda differ diff --git a/data/transport_flows.rda b/data/transport_flows.rda index 8d916f78..a4161ffe 100644 Binary files a/data/transport_flows.rda and b/data/transport_flows.rda differ diff --git a/data/valid_iea_release_years.rda b/data/valid_iea_release_years.rda index 003c792c..7bba483f 100644 Binary files a/data/valid_iea_release_years.rda and b/data/valid_iea_release_years.rda differ diff --git a/docs/404.html b/docs/404.html index ed96ce62..2af6ec84 100644 --- a/docs/404.html +++ b/docs/404.html @@ -8,65 +8,56 @@ Page not found (404) • IEATools - - - - + + + Skip to contents - -