Skip to content

Commit

Permalink
code formatting
Browse files Browse the repository at this point in the history
  • Loading branch information
mitokic committed Jul 25, 2024
1 parent 31f60ba commit bc88b7b
Show file tree
Hide file tree
Showing 32 changed files with 396 additions and 444 deletions.
31 changes: 16 additions & 15 deletions R/ensemble_models.R
Original file line number Diff line number Diff line change
Expand Up @@ -181,27 +181,29 @@ ensemble_models <- function(run_info,
# model forecasts
single_model_tbl <- NULL
if (run_local_models) {
suppressWarnings(try(single_model_tbl <- read_file(run_info,
path = paste0(
"/forecasts/", hash_data(run_info$experiment_name), "-", hash_data(run_info$run_name),
"-", combo, "-single_models.", run_info$data_output
suppressWarnings(try(
single_model_tbl <- read_file(run_info,
path = paste0(
"/forecasts/", hash_data(run_info$experiment_name), "-", hash_data(run_info$run_name),
"-", combo, "-single_models.", run_info$data_output
),
return_type = "df"
),
return_type = "df"
),
silent = TRUE
silent = TRUE
))
}

global_model_tbl <- NULL
if (run_global_models) {
suppressWarnings(try(global_model_tbl <- read_file(run_info,
path = paste0(
"/forecasts/", hash_data(run_info$experiment_name), "-", hash_data(run_info$run_name),
"-", combo, "-global_models.", run_info$data_output
suppressWarnings(try(
global_model_tbl <- read_file(run_info,
path = paste0(
"/forecasts/", hash_data(run_info$experiment_name), "-", hash_data(run_info$run_name),
"-", combo, "-global_models.", run_info$data_output
),
return_type = "df"
),
return_type = "df"
),
silent = TRUE
silent = TRUE
))
}

Expand Down Expand Up @@ -336,7 +338,6 @@ ensemble_models <- function(run_info,
.multicombine = TRUE,
.noexport = NULL
) %do% {

# get initial run info
model <- model_run %>%
dplyr::pull(Model_Name)
Expand Down
4 changes: 0 additions & 4 deletions R/feature_selection.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,8 @@ run_feature_selection <- function(input_data,
forecast_horizon,
external_regressors,
multistep_horizon = FALSE) {

# check for more than one unique target value
if (input_data %>% tidyr::drop_na(Target) %>% dplyr::pull(Target) %>% unique() %>% length() < 2) {

# just return the date features
fs_list <- input_data %>%
dplyr::select(tidyselect::contains("Date"))
Expand Down Expand Up @@ -83,7 +81,6 @@ run_feature_selection <- function(input_data,

# run feature selection
if (date_type %in% c("day", "week")) {

# number of votes needed for feature to be selected
votes_needed <- 3

Expand Down Expand Up @@ -410,7 +407,6 @@ lofo_fn <- function(run_info,
parallel_processing,
pca = FALSE,
seed = 123) {

# parallel run info
par_info <- par_start(
run_info = run_info,
Expand Down
47 changes: 25 additions & 22 deletions R/final_models.R
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,6 @@ final_models <- function(run_info,
run_ensemble_models <- prev_log_df$run_ensemble_models

if (sum(colnames(prev_log_df) %in% "weighted_mape")) {

# check if input values have changed
current_log_df <- tibble::tibble(
average_models = average_models,
Expand Down Expand Up @@ -294,7 +293,6 @@ final_models <- function(run_info,

# simple model averaging
if (average_models & length(final_model_list) > 1) {

# create model combinations list
model_combinations <- tibble::tibble()

Expand Down Expand Up @@ -338,7 +336,6 @@ final_models <- function(run_info,
.noexport = NULL
) %op%
{

# get list of models to average
model_list <- strsplit(x, "_")[[1]]

Expand All @@ -362,17 +359,17 @@ final_models <- function(run_info,
} else {
averages_tbl <- NULL
}

# choose best average model
if(!is.null(averages_tbl)) {
if (!is.null(averages_tbl)) {
avg_back_test_mape <- averages_tbl %>%
dplyr::mutate(
Train_Test_ID = as.numeric(Train_Test_ID),
Target = ifelse(Target == 0, 0.1, Target)
) %>%
dplyr::filter(Train_Test_ID != 1) %>%
dplyr::mutate(MAPE = round(abs((Forecast - Target) / Target), digits = 4))

avg_best_model_mape <- avg_back_test_mape %>%
dplyr::group_by(Model_ID, Combo) %>%
dplyr::mutate(
Expand All @@ -385,7 +382,7 @@ final_models <- function(run_info,
dplyr::group_by(Combo) %>%
dplyr::slice(1) %>%
dplyr::ungroup()

avg_best_model_tbl <- avg_best_model_mape %>%
dplyr::select(Combo, Model_ID)
}
Expand Down Expand Up @@ -525,11 +522,11 @@ final_models <- function(run_info,
by = "Model_ID"
) %>%
dplyr::mutate(Best_Model = ifelse(!is.na(Best_Model), "Yes", "No"))
if(!is.null(averages_tbl)) {

if (!is.null(averages_tbl)) {
avg_model_final_tbl <- averages_tbl %>%
dplyr::right_join(avg_best_model_tbl,
by = c("Combo", "Model_ID")
by = c("Combo", "Model_ID")
) %>%
dplyr::mutate(
Combo_ID = Combo,
Expand Down Expand Up @@ -619,14 +616,16 @@ final_models <- function(run_info,

# clean up any parallel run process
par_end(cl)

# condense outputs into less files for larger runs
if(length(combo_list) > 10000) {
if (length(combo_list) > 10000) {
cli::cli_progress_step("Condensing Forecasts")

condense_data(run_info,
parallel_processing,
num_cores)

condense_data(
run_info,
parallel_processing,
num_cores
)
}

# reconcile hierarchical forecasts
Expand All @@ -641,17 +640,21 @@ final_models <- function(run_info,
num_cores
)
}

# calculate weighted mape
weighted_mape <- get_forecast_data(run_info = run_info) %>%
dplyr::filter(Run_Type == "Back_Test",
Best_Model == "Yes") %>%
dplyr::filter(
Run_Type == "Back_Test",
Best_Model == "Yes"
) %>%
dplyr::mutate(
Target = ifelse(Target == 0, 0.1, Target)
) %>%
dplyr::mutate(MAPE = round(abs((Forecast - Target) / Target), digits = 4),
Total = sum(Target, na.rm = TRUE),
Weight = (MAPE*Target)/Total) %>%
dplyr::mutate(
MAPE = round(abs((Forecast - Target) / Target), digits = 4),
Total = sum(Target, na.rm = TRUE),
Weight = (MAPE * Target) / Total
) %>%
dplyr::pull(Weight) %>%
sum() %>%
round(digits = 4)
Expand Down
1 change: 0 additions & 1 deletion R/forecast_time_series.R
Original file line number Diff line number Diff line change
Expand Up @@ -323,7 +323,6 @@ forecast_backwards_compatibility <- function(run_info,
dplyr::select(Combo, Model, Best_Model) %>%
dplyr::distinct()
} else {

# read in unreconciled results
best_model_tbl <- read_file(run_info,
path = paste0(
Expand Down
15 changes: 5 additions & 10 deletions R/hierarchy.R
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,6 @@ prep_hierarchical_data <- function(input_data,
hierarchical_tbl <- hierarchical_tbl %>%
dplyr::left_join(temp_tbl, by = c("Date"))
} else if (value_level != "All") {

# agg by lowest level
bottom_tbl <- input_data_adj %>%
tidyr::unite("Combo",
Expand Down Expand Up @@ -400,7 +399,6 @@ reconcile_hierarchical_data <- function(run_info,
forecast_approach,
negative_forecast = FALSE,
num_cores) {

# get run splits
model_train_test_tbl <- read_file(run_info,
path = paste0(
Expand All @@ -420,15 +418,15 @@ reconcile_hierarchical_data <- function(run_info,
hts_nodes <- hts_list$nodes
original_combo_list <- hts_list$original_combos
hts_combo_list <- hts_list$hts_combos

# check if data has been condensed
cond_path <- paste0(
run_info$path, "/forecasts/*", hash_data(run_info$experiment_name), "-",
hash_data(run_info$run_name), "*condensed", ".", run_info$data_output
)

condensed_files <- list_files(run_info$storage_object, fs::path(cond_path))

if (length(condensed_files) > 0) {
condensed <- TRUE
} else {
Expand All @@ -444,7 +442,7 @@ reconcile_hierarchical_data <- function(run_info,
return_type <- "df"
}

if(condensed) {
if (condensed) {
fcst_path <- paste0(
"/forecasts/*", hash_data(run_info$experiment_name), "-",
hash_data(run_info$run_name), "*condensed", ".", run_info$data_output
Expand All @@ -453,7 +451,7 @@ reconcile_hierarchical_data <- function(run_info,
fcst_path <- paste0(
"/forecasts/*", hash_data(run_info$experiment_name), "-",
hash_data(run_info$run_name), "*models", ".", run_info$data_output
)
)
}

unreconciled_tbl <- read_file(run_info,
Expand Down Expand Up @@ -889,7 +887,6 @@ reconcile_hierarchical_data <- function(run_info,
external_regressor_mapping <- function(data,
combo_variables,
external_regressors) {

# create var combinations list
var_combinations <- tibble::tibble()

Expand Down Expand Up @@ -918,7 +915,6 @@ external_regressor_mapping <- function(data,
.multicombine = TRUE,
.noexport = NULL
) %do% {

# get unique values of regressor per combo variable iteration
var_unique_tbl <- foreach::foreach(
var = iter_list,
Expand Down Expand Up @@ -1000,7 +996,6 @@ sum_hts_data <- function(bottom_level_tbl,
forecast_approach,
frequency_number,
return_type = "data") {

# create aggregations for target variable
Date <- bottom_level_tbl$Date

Expand Down
26 changes: 12 additions & 14 deletions R/input_checks.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

#' Check input values
#'
#' @param input_name input name
Expand All @@ -13,21 +12,23 @@ check_input_type <- function(input_name,
type,
expected_value = NULL) {
if (!inherits(input_value, type)) {
stop(paste0(
"invalid type for input name '", input_name, "', needs to be of type ",
glue::glue_collapse(type, " or ")
),
call. = FALSE
stop(
paste0(
"invalid type for input name '", input_name, "', needs to be of type ",
glue::glue_collapse(type, " or ")
),
call. = FALSE
)
}

if (!is.null(expected_value) & !is.null(input_value)) {
if (!sum(input_value %in% expected_value)) {
stop(paste0(
"invalid value for input name '", input_name, "', value needs to equal ",
glue::glue_collapse(expected_value, " or ")
),
call. = FALSE
stop(
paste0(
"invalid value for input name '", input_name, "', value needs to equal ",
glue::glue_collapse(expected_value, " or ")
),
call. = FALSE
)
}
}
Expand All @@ -52,7 +53,6 @@ check_input_data <- function(input_data,
date_type,
fiscal_year_start,
parallel_processing) {

# data combo names match the input data
if (sum(combo_variables %in% colnames(input_data)) != length(combo_variables)) {
stop("combo variables do not match column headers in input data")
Expand Down Expand Up @@ -103,7 +103,6 @@ check_input_data <- function(input_data,

# input_data is correct type for parallel processing
if (inherits(input_data, c("data.frame", "tbl")) & is.null(parallel_processing)) {

# do nothing
} else if (inherits(input_data, "tbl_spark") & is.null(parallel_processing)) {
stop("spark data frames should run with spark parallel processing",
Expand Down Expand Up @@ -148,7 +147,6 @@ check_input_data <- function(input_data,
check_parallel_processing <- function(run_info,
parallel_processing,
inner_parallel = FALSE) {

# parallel processing formatting
if (is.null(parallel_processing)) {
return()
Expand Down
2 changes: 0 additions & 2 deletions R/models.R
Original file line number Diff line number Diff line change
Expand Up @@ -701,7 +701,6 @@ glmnet <- function(train_data,
horizon,
external_regressors,
frequency) {

# create model recipe and spec
if (multistep) {
recipe_spec_glmnet <- train_data %>%
Expand Down Expand Up @@ -1328,7 +1327,6 @@ xgboost <- function(train_data,
horizon,
external_regressors,
frequency) {

# create model recipe and spec
if (multistep) {
recipe_spec_xgboost <- train_data %>%
Expand Down
4 changes: 0 additions & 4 deletions R/multistep_cubist.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

# CUBIST Multistep ----

#' Initialize custom cubist parsnip model
Expand Down Expand Up @@ -298,7 +297,6 @@ cubist_multistep_fit_impl <- function(x, y,
external_regressors = NULL,
forecast_horizon = NULL,
selected_features = NULL) {

# X & Y
# Expect outcomes = vector
# Expect predictor = data.frame
Expand All @@ -321,7 +319,6 @@ cubist_multistep_fit_impl <- function(x, y,
model_predictions <- list()

for (lag in get_multi_lags(lag_periods, forecast_horizon)) {

# get final features based on lag
xreg_tbl_final <- multi_feature_selection(
xreg_tbl,
Expand Down Expand Up @@ -438,7 +435,6 @@ predict.cubist_multistep_fit_impl <- function(object, new_data, ...) {
#' @keywords internal
#' @export
cubist_multistep_predict_impl <- function(object, new_data, ...) {

# Date Mapping Table
date_tbl <- new_data %>%
dplyr::select(Date, Date_index.num) %>%
Expand Down
Loading

0 comments on commit bc88b7b

Please sign in to comment.