Skip to content

Commit

Permalink
Merge pull request #144 from microsoft/mitokic/08262023/diff-boxcox
Browse files Browse the repository at this point in the history
Mitokic/08262023/diff boxcox
  • Loading branch information
mitokic authored Sep 12, 2023
2 parents 029ae74 + 08dfac8 commit 5125e54
Show file tree
Hide file tree
Showing 14 changed files with 596 additions and 75 deletions.
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: finnts
Title: Microsoft Finance Time Series Forecasting Framework
Version: 0.3.0.9002
Version: 0.3.0.9003
Authors@R:
c(person(given = "Mike",
family = "Tokic",
Expand Down Expand Up @@ -33,6 +33,7 @@ Imports:
doParallel,
dplyr,
earth,
feasts,
foreach,
fs,
generics,
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ export(get_prepped_data)
export(get_prepped_models)
export(get_run_info)
export(get_trained_models)
export(list_models)
export(prep_data)
export(prep_models)
export(set_run_info)
Expand Down
4 changes: 3 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
# finnts 0.3.0.9002 (DEVELOPMENT VERSION)
# finnts 0.3.0.9003 (DEVELOPMENT VERSION)

## Improvements

- Tidymodels speed up
- Added external regressor support for ARIMA by introducing a new model option of `arimax`, which uses engineered features in addition to any external regressors supplied.
- Automated feature selection, refer to feature selection vignette for more details
- Error handling in hierarchical forecast reconciliation
- Box-cox and differencing transformations
- Added new function, `list_models()`, that lists available models in the package

## Bug Fixes

Expand Down
4 changes: 2 additions & 2 deletions R/ensemble_models.R
Original file line number Diff line number Diff line change
Expand Up @@ -140,14 +140,14 @@ ensemble_models <- function(run_info,
}

# get ensemble models to run
ensemble_model_list <- c("cubist", "glmnet", "svm-poly", "svm-rbf", "xgboost")
ensemble_model_list <- list_ensemble_models()

if (is.na(models_to_run) & is.na(models_not_to_run)) {
# do nothing, using existing ml_models list
} else if (is.na(models_to_run) & !is.na(models_not_to_run)) {
ensemble_model_list <- setdiff(ensemble_model_list, stringr::str_split(models_not_to_run, "---")[[1]])
} else {
ensemble_model_list <- ensemble_model_list[c("cubist", "glmnet", "svm-poly", "svm-rbf", "xgboost") %in% stringr::str_split(models_to_run, "---")[[1]]]
ensemble_model_list <- ensemble_model_list[list_ensemble_models() %in% stringr::str_split(models_to_run, "---")[[1]]]
}

# parallel run info
Expand Down
34 changes: 24 additions & 10 deletions R/feature_selection.R
Original file line number Diff line number Diff line change
Expand Up @@ -131,16 +131,30 @@ select_features <- function(input_data,
dplyr::select(Feature, Vote, Auto_Accept)

# cubist feature importance
vip_cubist_results <- vip_cubist_fn(
input_data,
seed
) %>%
dplyr::rename(Feature = Variable) %>%
dplyr::mutate(
Vote = 1,
Auto_Accept = 0
) %>%
dplyr::select(Feature, Vote, Auto_Accept)
vip_cubist_results <- tryCatch(
{
vip_cubist_fn(
input_data,
seed
) %>%
dplyr::rename(Feature = Variable) %>%
dplyr::mutate(
Vote = 1,
Auto_Accept = 0
) %>%
dplyr::select(Feature, Vote, Auto_Accept)
},
warning = function(w) {
# do nothing
},
error = function(e) {
tibble::tibble()
}
)

if (is.null(vip_cubist_results)) {
votes_needed <- votes_needed - 1
}

# lasso regression feature importance
vip_lm_initial <- vip_lm_fn(
Expand Down
113 changes: 95 additions & 18 deletions R/models.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,81 @@
#' List all available models
#'
#' @return list of models
#' @export
list_models <- function() {
list <- c(
"arima", "arima-boost", "arimax", "cubist", "croston", "ets", "glmnet", "mars", "meanf",
"nnetar", "nnetar-xregs", "prophet", "prophet-boost", "prophet-xregs", "snaive",
"stlm-arima", "stlm-ets", "svm-poly", "svm-rbf", "tbats", "theta", "xgboost"
)

return(list)
}

#' List models with hyperparameters
#'
#'
#' @return list of models
#' @noRd
list_hyperparmater_models <- function() {
list <- c(
"arima-boost", "cubist", "glmnet", "mars",
"nnetar", "nnetar-xregs", "prophet", "prophet-boost",
"prophet-xregs", "svm-poly", "svm-rbf", "xgboost"
)

return(list)
}

#' List ensemble models
#'
#'
#' @return list of models
#' @noRd
list_ensemble_models <- function() {
list <- c(
"cubist", "glmnet", "svm-poly", "svm-rbf", "xgboost"
)

return(list)
}

#' List models capable with R2 recipe
#'
#'
#' @return list of models
#' @noRd
list_r2_models <- function() {
list <- c("cubist", "glmnet", "svm-poly", "svm-rbf", "xgboost")

return(list)
}

#' List global models
#'
#'
#' @return list of models
#' @noRd
list_global_models <- function() {
list <- c("cubist", "glmnet", "mars", "svm-poly", "svm-rbf", "xgboost")

return(list)
}

#' List multivariate models
#'
#'
#' @return list of models
#' @noRd
list_multivariate_models <- function() {
list <- c(
list_global_models(), "arima-boost", "prophet-boost", "prophet-xregs",
"nnetar-xregs"
)

return(list)
}

#' Gets a simple recipe
#'
#' @param train_data Training Data
Expand Down Expand Up @@ -34,17 +112,17 @@ get_recipe_combo <- function(train_data) {
#' @noRd

get_recipe_configurable <- function(train_data,
mutate_adj_half = FALSE,
rm_date = "plain",
step_nzv = "zv",
norm_date_adj_year = FALSE,
dummy_one_hot = TRUE,
character_factor = FALSE,
center_scale = FALSE,
one_hot = FALSE,
pca = TRUE,
corr = FALSE,
lincomb = FALSE) {
mutate_adj_half = FALSE,
rm_date = "plain",
step_nzv = "zv",
norm_date_adj_year = FALSE,
dummy_one_hot = TRUE,
character_factor = FALSE,
center_scale = FALSE,
one_hot = FALSE,
pca = TRUE,
corr = FALSE,
lincomb = FALSE) {
mutate_adj_half_fn <- function(df) {
if (mutate_adj_half) {
df %>%
Expand All @@ -68,7 +146,7 @@ get_recipe_configurable <- function(train_data,
"none" = df
)
}

corr_fn <- function(df) {
if (corr) {
df %>%
Expand Down Expand Up @@ -137,7 +215,7 @@ get_recipe_configurable <- function(train_data,
rm_lincomb_fn <- function(df) {
if (lincomb) {
df %>%
recipes::step_lincomb(recipes::all_numeric_predictors(), id = "remove_linear_combs")
recipes::step_lincomb(recipes::all_numeric_predictors(), id = "remove_linear_combs")
} else {
df
}
Expand Down Expand Up @@ -413,9 +491,8 @@ arima <- function(train_data,
#' @return Get the ARIMAX based model
#' @noRd
arimax <- function(train_data,
frequency,
pca) {

frequency,
pca) {
recipe_spec_arimax <- train_data %>%
get_recipe_configurable(
step_nzv = "zv",
Expand All @@ -428,12 +505,12 @@ arimax <- function(train_data,
seasonal_period = frequency
) %>%
parsnip::set_engine("auto_arima")

wflw_spec <- get_workflow_simple(
model_spec_arima,
recipe_spec_arimax
)

return(wflw_spec)
}

Expand Down
Loading

0 comments on commit 5125e54

Please sign in to comment.