code formatting

microsoft · Jul 25, 2024 · bc88b7b · bc88b7b
1 parent 31f60ba
commit bc88b7b
Show file tree

Hide file tree

Showing 32 changed files with 396 additions and 444 deletions.
diff --git a/R/ensemble_models.R b/R/ensemble_models.R
@@ -181,27 +181,29 @@ ensemble_models <- function(run_info,
       # model forecasts
       single_model_tbl <- NULL
       if (run_local_models) {
-        suppressWarnings(try(single_model_tbl <- read_file(run_info,
-          path = paste0(
-            "/forecasts/", hash_data(run_info$experiment_name), "-", hash_data(run_info$run_name),
-            "-", combo, "-single_models.", run_info$data_output
+        suppressWarnings(try(
+          single_model_tbl <- read_file(run_info,
+            path = paste0(
+              "/forecasts/", hash_data(run_info$experiment_name), "-", hash_data(run_info$run_name),
+              "-", combo, "-single_models.", run_info$data_output
+            ),
+            return_type = "df"
           ),
-          return_type = "df"
-        ),
-        silent = TRUE
+          silent = TRUE
         ))
       }
 
       global_model_tbl <- NULL
       if (run_global_models) {
-        suppressWarnings(try(global_model_tbl <- read_file(run_info,
-          path = paste0(
-            "/forecasts/", hash_data(run_info$experiment_name), "-", hash_data(run_info$run_name),
-            "-", combo, "-global_models.", run_info$data_output
+        suppressWarnings(try(
+          global_model_tbl <- read_file(run_info,
+            path = paste0(
+              "/forecasts/", hash_data(run_info$experiment_name), "-", hash_data(run_info$run_name),
+              "-", combo, "-global_models.", run_info$data_output
+            ),
+            return_type = "df"
           ),
-          return_type = "df"
-        ),
-        silent = TRUE
+          silent = TRUE
         ))
       }
 
@@ -336,7 +338,6 @@ ensemble_models <- function(run_info,
         .multicombine = TRUE,
         .noexport = NULL
       ) %do% {
-
         # get initial run info
         model <- model_run %>%
           dplyr::pull(Model_Name)

diff --git a/R/feature_selection.R b/R/feature_selection.R
@@ -23,10 +23,8 @@ run_feature_selection <- function(input_data,
                                   forecast_horizon,
                                   external_regressors,
                                   multistep_horizon = FALSE) {
-
   # check for more than one unique target value
   if (input_data %>% tidyr::drop_na(Target) %>% dplyr::pull(Target) %>% unique() %>% length() < 2) {
-
     # just return the date features
     fs_list <- input_data %>%
       dplyr::select(tidyselect::contains("Date"))
@@ -83,7 +81,6 @@ run_feature_selection <- function(input_data,
 
     # run feature selection
     if (date_type %in% c("day", "week")) {
-
       # number of votes needed for feature to be selected
       votes_needed <- 3
 
@@ -410,7 +407,6 @@ lofo_fn <- function(run_info,
                     parallel_processing,
                     pca = FALSE,
                     seed = 123) {
-
   # parallel run info
   par_info <- par_start(
     run_info = run_info,

diff --git a/R/final_models.R b/R/final_models.R
@@ -141,7 +141,6 @@ final_models <- function(run_info,
   run_ensemble_models <- prev_log_df$run_ensemble_models
 
   if (sum(colnames(prev_log_df) %in% "weighted_mape")) {
-
     # check if input values have changed
     current_log_df <- tibble::tibble(
       average_models = average_models,
@@ -294,7 +293,6 @@ final_models <- function(run_info,
 
       # simple model averaging
       if (average_models & length(final_model_list) > 1) {
-
         # create model combinations list
         model_combinations <- tibble::tibble()
 
@@ -338,7 +336,6 @@ final_models <- function(run_info,
           .noexport = NULL
         ) %op%
           {
-
             # get list of models to average
             model_list <- strsplit(x, "_")[[1]]
 
@@ -362,17 +359,17 @@ final_models <- function(run_info,
       } else {
         averages_tbl <- NULL
       }
-      
+
       # choose best average model
-      if(!is.null(averages_tbl)) {
+      if (!is.null(averages_tbl)) {
         avg_back_test_mape <- averages_tbl %>%
           dplyr::mutate(
             Train_Test_ID = as.numeric(Train_Test_ID),
             Target = ifelse(Target == 0, 0.1, Target)
           ) %>%
           dplyr::filter(Train_Test_ID != 1) %>%
           dplyr::mutate(MAPE = round(abs((Forecast - Target) / Target), digits = 4))
-        
+
         avg_best_model_mape <- avg_back_test_mape %>%
           dplyr::group_by(Model_ID, Combo) %>%
           dplyr::mutate(
@@ -385,7 +382,7 @@ final_models <- function(run_info,
           dplyr::group_by(Combo) %>%
           dplyr::slice(1) %>%
           dplyr::ungroup()
-        
+
         avg_best_model_tbl <- avg_best_model_mape %>%
           dplyr::select(Combo, Model_ID)
       }
@@ -525,11 +522,11 @@ final_models <- function(run_info,
             by = "Model_ID"
           ) %>%
           dplyr::mutate(Best_Model = ifelse(!is.na(Best_Model), "Yes", "No"))
-        
-        if(!is.null(averages_tbl)) {
+
+        if (!is.null(averages_tbl)) {
           avg_model_final_tbl <- averages_tbl %>%
             dplyr::right_join(avg_best_model_tbl,
-                              by = c("Combo", "Model_ID")
+              by = c("Combo", "Model_ID")
             ) %>%
             dplyr::mutate(
               Combo_ID = Combo,
@@ -619,14 +616,16 @@ final_models <- function(run_info,
 
   # clean up any parallel run process
   par_end(cl)
-  
+
   # condense outputs into less files for larger runs
-  if(length(combo_list) > 10000) {
+  if (length(combo_list) > 10000) {
     cli::cli_progress_step("Condensing Forecasts")
-
-    condense_data(run_info, 
-                  parallel_processing,
-                  num_cores)
+
+    condense_data(
+      run_info,
+      parallel_processing,
+      num_cores
+    )
   }
 
   # reconcile hierarchical forecasts
@@ -641,17 +640,21 @@ final_models <- function(run_info,
       num_cores
     )
   }
-  
+
   # calculate weighted mape
   weighted_mape <- get_forecast_data(run_info = run_info) %>%
-    dplyr::filter(Run_Type == "Back_Test", 
-                  Best_Model == "Yes") %>%
+    dplyr::filter(
+      Run_Type == "Back_Test",
+      Best_Model == "Yes"
+    ) %>%
     dplyr::mutate(
       Target = ifelse(Target == 0, 0.1, Target)
     ) %>%
-    dplyr::mutate(MAPE = round(abs((Forecast - Target) / Target), digits = 4), 
-                  Total = sum(Target, na.rm = TRUE), 
-                  Weight = (MAPE*Target)/Total) %>%
+    dplyr::mutate(
+      MAPE = round(abs((Forecast - Target) / Target), digits = 4),
+      Total = sum(Target, na.rm = TRUE),
+      Weight = (MAPE * Target) / Total
+    ) %>%
     dplyr::pull(Weight) %>%
     sum() %>%
     round(digits = 4)

diff --git a/R/forecast_time_series.R b/R/forecast_time_series.R
@@ -323,7 +323,6 @@ forecast_backwards_compatibility <- function(run_info,
       dplyr::select(Combo, Model, Best_Model) %>%
       dplyr::distinct()
   } else {
-
     # read in unreconciled results
     best_model_tbl <- read_file(run_info,
       path = paste0(

diff --git a/R/hierarchy.R b/R/hierarchy.R
@@ -82,7 +82,6 @@ prep_hierarchical_data <- function(input_data,
         hierarchical_tbl <- hierarchical_tbl %>%
           dplyr::left_join(temp_tbl, by = c("Date"))
       } else if (value_level != "All") {
-
         # agg by lowest level
         bottom_tbl <- input_data_adj %>%
           tidyr::unite("Combo",
@@ -400,7 +399,6 @@ reconcile_hierarchical_data <- function(run_info,
                                         forecast_approach,
                                         negative_forecast = FALSE,
                                         num_cores) {
-
   # get run splits
   model_train_test_tbl <- read_file(run_info,
     path = paste0(
@@ -420,15 +418,15 @@ reconcile_hierarchical_data <- function(run_info,
   hts_nodes <- hts_list$nodes
   original_combo_list <- hts_list$original_combos
   hts_combo_list <- hts_list$hts_combos
-  
+
   # check if data has been condensed
   cond_path <- paste0(
     run_info$path, "/forecasts/*", hash_data(run_info$experiment_name), "-",
     hash_data(run_info$run_name), "*condensed", ".", run_info$data_output
   )
-  
+
   condensed_files <- list_files(run_info$storage_object, fs::path(cond_path))
-  
+
   if (length(condensed_files) > 0) {
     condensed <- TRUE
   } else {
@@ -444,7 +442,7 @@ reconcile_hierarchical_data <- function(run_info,
     return_type <- "df"
   }
 
-  if(condensed) {
+  if (condensed) {
     fcst_path <- paste0(
       "/forecasts/*", hash_data(run_info$experiment_name), "-",
       hash_data(run_info$run_name), "*condensed", ".", run_info$data_output
@@ -453,7 +451,7 @@ reconcile_hierarchical_data <- function(run_info,
     fcst_path <- paste0(
       "/forecasts/*", hash_data(run_info$experiment_name), "-",
       hash_data(run_info$run_name), "*models", ".", run_info$data_output
-    ) 
+    )
   }
 
   unreconciled_tbl <- read_file(run_info,
@@ -889,7 +887,6 @@ reconcile_hierarchical_data <- function(run_info,
 external_regressor_mapping <- function(data,
                                        combo_variables,
                                        external_regressors) {
-
   # create var combinations list
   var_combinations <- tibble::tibble()
 
@@ -918,7 +915,6 @@ external_regressor_mapping <- function(data,
     .multicombine = TRUE,
     .noexport = NULL
   ) %do% {
-
     # get unique values of regressor per combo variable iteration
     var_unique_tbl <- foreach::foreach(
       var = iter_list,
@@ -1000,7 +996,6 @@ sum_hts_data <- function(bottom_level_tbl,
                          forecast_approach,
                          frequency_number,
                          return_type = "data") {
-
   # create aggregations for target variable
   Date <- bottom_level_tbl$Date
 

diff --git a/R/input_checks.R b/R/input_checks.R
@@ -1,4 +1,3 @@
-
 #' Check input values
 #'
 #' @param input_name input name
@@ -13,21 +12,23 @@ check_input_type <- function(input_name,
                              type,
                              expected_value = NULL) {
   if (!inherits(input_value, type)) {
-    stop(paste0(
-      "invalid type for input name '", input_name, "', needs to be of type ",
-      glue::glue_collapse(type, " or ")
-    ),
-    call. = FALSE
+    stop(
+      paste0(
+        "invalid type for input name '", input_name, "', needs to be of type ",
+        glue::glue_collapse(type, " or ")
+      ),
+      call. = FALSE
     )
   }
 
   if (!is.null(expected_value) & !is.null(input_value)) {
     if (!sum(input_value %in% expected_value)) {
-      stop(paste0(
-        "invalid value for input name '", input_name, "', value needs to equal ",
-        glue::glue_collapse(expected_value, " or ")
-      ),
-      call. = FALSE
+      stop(
+        paste0(
+          "invalid value for input name '", input_name, "', value needs to equal ",
+          glue::glue_collapse(expected_value, " or ")
+        ),
+        call. = FALSE
       )
     }
   }
@@ -52,7 +53,6 @@ check_input_data <- function(input_data,
                              date_type,
                              fiscal_year_start,
                              parallel_processing) {
-
   # data combo names match the input data
   if (sum(combo_variables %in% colnames(input_data)) != length(combo_variables)) {
     stop("combo variables do not match column headers in input data")
@@ -103,7 +103,6 @@ check_input_data <- function(input_data,
 
   # input_data is correct type for parallel processing
   if (inherits(input_data, c("data.frame", "tbl")) & is.null(parallel_processing)) {
-
     # do nothing
   } else if (inherits(input_data, "tbl_spark") & is.null(parallel_processing)) {
     stop("spark data frames should run with spark parallel processing",
@@ -148,7 +147,6 @@ check_input_data <- function(input_data,
 check_parallel_processing <- function(run_info,
                                       parallel_processing,
                                       inner_parallel = FALSE) {
-
   # parallel processing formatting
   if (is.null(parallel_processing)) {
     return()

diff --git a/R/models.R b/R/models.R
@@ -701,7 +701,6 @@ glmnet <- function(train_data,
                    horizon,
                    external_regressors,
                    frequency) {
-
   # create model recipe and spec
   if (multistep) {
     recipe_spec_glmnet <- train_data %>%
@@ -1328,7 +1327,6 @@ xgboost <- function(train_data,
                     horizon,
                     external_regressors,
                     frequency) {
-
   # create model recipe and spec
   if (multistep) {
     recipe_spec_xgboost <- train_data %>%

diff --git a/R/multistep_cubist.R b/R/multistep_cubist.R
@@ -1,4 +1,3 @@
-
 # CUBIST Multistep ----
 
 #' Initialize custom cubist parsnip model
@@ -298,7 +297,6 @@ cubist_multistep_fit_impl <- function(x, y,
                                       external_regressors = NULL,
                                       forecast_horizon = NULL,
                                       selected_features = NULL) {
-
   # X & Y
   # Expect outcomes  = vector
   # Expect predictor = data.frame
@@ -321,7 +319,6 @@ cubist_multistep_fit_impl <- function(x, y,
   model_predictions <- list()
 
   for (lag in get_multi_lags(lag_periods, forecast_horizon)) {
-
     # get final features based on lag
     xreg_tbl_final <- multi_feature_selection(
       xreg_tbl,
@@ -438,7 +435,6 @@ predict.cubist_multistep_fit_impl <- function(object, new_data, ...) {
 #' @keywords internal
 #' @export
 cubist_multistep_predict_impl <- function(object, new_data, ...) {
-
   # Date Mapping Table
   date_tbl <- new_data %>%
     dplyr::select(Date, Date_index.num) %>%