Skip to content

Commit

Permalink
final push before PR
Browse files Browse the repository at this point in the history
  • Loading branch information
mitokic committed Nov 18, 2021
1 parent 0c7a314 commit 9fb7c2e
Show file tree
Hide file tree
Showing 14 changed files with 118 additions and 72 deletions.
5 changes: 5 additions & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,8 @@
^docs$
^pkgdown$
^\.github$
CODE_OF_CONDUCT.md
SECURITY.md
SUPPORT.md
cran-comments.md
NEWS.md
12 changes: 6 additions & 6 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
Package: finnts
Title: Microsoft Finance Time Series Forecasting Framework
Version: 0.0.0.9000
Version: 0.1.0
Authors@R:
c(person(given = "Mike",
family = "Tokic",
role = c("aut", "cre"),
email = "mitokic@microsoft.com",
email = "mftokic@gmail.com",
comment = c(ORCID = "0000-0002-7630-7055")),
person(given = "Aadharsh",
family = "Kannan",
Expand Down Expand Up @@ -34,15 +34,17 @@ Imports:
doParallel,
dplyr,
earth,
foreach,
generics,
glmnet,
gtools,
hts,
kernlab,
lightgbm,
lubridate,
magrittr,
matrixcalc,
methods,
methods,
modeltime.ensemble,
modeltime.gluonts,
modeltime.resample,
Expand Down Expand Up @@ -74,8 +76,6 @@ Suggests:
testthat (>= 3.0.0)
Config/testthat/edition: 3
Depends:
R (>= 3.6.0),
lubridate,
foreach,
R (>= 3.6.0),
modeltime
VignetteBuilder: knitr
5 changes: 4 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
# Generated by roxygen2: do not edit by hand

export("%>%")
export(arima)
export(arima_boost)
export(croston)
Expand All @@ -26,6 +25,10 @@ export(tabnet)
export(tbats)
export(theta)
export(xgboost)
import(modeltime)
importFrom(foreach,"%do%")
importFrom(foreach,"%dopar%")
importFrom(lubridate,"%m+%")
importFrom(magrittr,"%>%")
importFrom(methods,formalArgs)
importFrom(stats,sd)
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@

# finnts 0.1.0

* Initial CRAN Release
2 changes: 1 addition & 1 deletion R/azure_batch_parallel.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ get_fcast_parallel_azure <- function(combo_list,

cli::cli_h2("Submitting Tasks to Azure Batch")

fcst <- foreach(i = combo_list, .combine = 'rbind',
fcst <- foreach::foreach(i = combo_list, .combine = 'rbind',
.packages = get_export_packages(),
.export = get_transfer_functions(),
.options.azure = list(maxTaskRetryCount = 0,
Expand Down
10 changes: 5 additions & 5 deletions R/forecast_models.R
Original file line number Diff line number Diff line change
Expand Up @@ -332,7 +332,7 @@ construct_forecast_models <- function(full_data_tbl,
combined_models_recipe_2 <- modeltime::modeltime_table()

# parallel processing
if(run_model_parallel==TRUE & sum(parallel_processing!="local_machine") == 1) {
if(run_model_parallel == TRUE & sum(parallel_processing == "local_machine") == 0) {
parallel_args <- init_parallel_within(parallel_processing, num_cores)
}

Expand Down Expand Up @@ -390,7 +390,7 @@ construct_forecast_models <- function(full_data_tbl,
try(combined_models_recipe_1 <- modeltime::add_modeltime_model(combined_models_recipe_1,
mdl_called,
location = "top") %>%
update_model_description(1, model_name),
modeltime::update_model_description(1, model_name),
silent = TRUE)

}else{
Expand Down Expand Up @@ -422,7 +422,7 @@ construct_forecast_models <- function(full_data_tbl,
try(combined_models_recipe_1 <- modeltime::add_modeltime_model(combined_models_recipe_1,
mdl_called,
location = "top") %>%
update_model_description(1, add_name),
modeltime::update_model_description(1, add_name),
silent = TRUE)

}
Expand All @@ -446,7 +446,7 @@ construct_forecast_models <- function(full_data_tbl,
try(combined_models_recipe_2 <- modeltime::add_modeltime_model(combined_models_recipe_2,
mdl_called,
location = "top") %>%
update_model_description(1, add_name),
modeltime::update_model_description(1, add_name),
silent = TRUE)
}

Expand Down Expand Up @@ -651,7 +651,7 @@ construct_forecast_models <- function(full_data_tbl,
try(combined_ensemble_models <- modeltime::add_modeltime_model(combined_ensemble_models,
mdl_ensemble,
location = "top") %>%
update_model_description(1, add_name),
modeltime::update_model_description(1, add_name),
silent = TRUE)
}

Expand Down
57 changes: 33 additions & 24 deletions R/forecast_time_series.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,10 @@
#' that specified date. Default of NULL is to not remove any time series and attempt to forecast all of them.
#' @param fiscal_year_start Month number of start of fiscal year of input data, aids in building out date features.
#' Formatted as a numeric value. Default of 1 assumes fiscal year starts in January.
#' @param clean_missing_values Should missing values be inputted? Only inputes values for missing data within an
#' @param clean_missing_values If TRUE, cleans missing values. Only inputes values for missing data within an
#' existing series, and does not add new values onto the beginning or end, but does provide a value of 0 for said
#' values.
#' @param clean_outliers Should outliers be cleaned and inputted with values more in line with historical data?
#' @param clean_outliers If TRUE, outliers are cleaned and inputted with values more in line with historical data
#' @param back_test_scenarios Number of specific back test folds to run when determining the best model.
#' Default of NULL will automatically choose the number of back tests to run based on historical data size,
#' which tries to always use a minimum of 80% of the data when training a model.
Expand All @@ -36,16 +36,16 @@
#' @param parallel_processing Default of NULL runs no parallel processing and forecasts each individual time series
#' one after another. 'local_machine' leverages all cores on current machine Finn is running on. 'azure_batch'
#' runs time series in parallel on a remote compute cluster in Azure Batch.
#' @param run_model_parallel Run model training in parallel, only works when parallel_processing is set to
#' @param run_model_parallel If TRUE, runs model training in parallel, only works when parallel_processing is set to
#' 'local_machine' or 'azure_batch'.
#' @param num_cores Number of cores to run when parallel processing is set up. Used when running parallel computations
#' on local machine or within Azure. Default of NULL uses total amount of cores on machine minus one. Can't be greater
#' than number of cores on machine minus 1.
#' @param azure_batch_credentials Credentials to run parallel_processing in Azure Batch.
#' @param azure_batch_cluster_config Compute cluster specification to run parallel_processing in Azure Batch.
#' @param azure_batch_cluster_delete Delete the Azure Batch compute cluster after Finn finished running.
#' @param target_log_transformation Log transform target variable before training models.
#' @param negative_fcst Allow forecasts to dip below zero.
#' @param azure_batch_cluster_delete If TRUE, deletes the Azure Batch compute cluster after Finn finished running.
#' @param target_log_transformation If TRUE, log transform target variable before training models.
#' @param negative_fcst If TRUE, allow forecasts to dip below zero.
#' @param fourier_periods List of values to use in creating fourier series as features. Default of NULL automatically chooses
#' these values based on the date_type.
#' @param lag_periods List of values to use in creating lag features. Default of NULL automatically chooses these values
Expand All @@ -55,24 +55,25 @@
#' @param recipes_to_run List of recipes to run on multivariate models that can run different recipes. A value of NULL runs
#' all recipes, but only runs the R1 recipe for weekly and daily date types. A value of "all" runs all recipes, regardless
#' of date type. A list like c("R1") or c("R2") would only run models with the R1 or R2 recipe.
#' @param pca Run principle component analysis on any lagged features to speed up model run time. Default of NULL runs
#' @param pca If TRUE, run principle component analysis on any lagged features to speed up model run time. Default of NULL runs
#' PCA on day and week date types across all local multivariate models, and also for global models across all date types.
#' @param reticulate_environment File path to python environment to use when training gluonts deep learning models.
#' Only important when parallel_processing is not set to 'azure_batch'. Azure Batch should use its own docker image
#' that has python environment already installed.
#' @param models_to_run List of models to run. Default of NULL runs all models.
#' @param models_not_to_run List of models not to run, overrides values in models_to_run. Default of NULL doesn't turn off
#' any model.
#' @param run_deep_learning Run deep learning models from gluonts (deepar and nbeats). Overrides models_to_run and
#' @param run_deep_learning If TRUE, run deep learning models from gluonts (deepar and nbeats). Overrides models_to_run and
#' models_not_to_run.
#' @param run_global_models Run multivariate models on the entire data set (across all time series) as a global model.
#' @param run_global_models If TRUE, run multivariate models on the entire data set (across all time series) as a global model.
#' Can be override by models_not_to_run. Default of NULL runs global models for all date types except week and day.
#' @param run_local_models Run models by individual time series as local models.
#' @param run_ensemble_models Run ensemble models
#' @param average_models Create simple averages of individual models.
#' @param run_local_models If TRUE, run models by individual time series as local models.
#' @param run_ensemble_models If TRUE, run ensemble models. Default of NULL runs ensemble models only for quarter and month
#' date types.
#' @param average_models If TRUE, create simple averages of individual models.
#' @param max_model_average Max number of models to average together. Will create model averages for 2 models up until input value
#' or max number of models ran.
#' @param weekly_to_daily Convert a week forecast down to day by evenly splitting across each day of week. Helps when aggregating
#' @param weekly_to_daily If TRUE, convert a week forecast down to day by evenly splitting across each day of week. Helps when aggregating
#' up to higher temporal levels like month or quarter.
#'
#' @return A list of three separate data sets: the future forecast, the back test results, and the best model per time series.
Expand Down Expand Up @@ -125,7 +126,7 @@ forecast_time_series <- function(input_data,
run_deep_learning = FALSE,
run_global_models = NULL,
run_local_models = TRUE,
run_ensemble_models = TRUE,
run_ensemble_models = NULL,
average_models = TRUE,
max_model_average = 3,
weekly_to_daily = TRUE
Expand Down Expand Up @@ -195,12 +196,20 @@ forecast_time_series <- function(input_data,
back_test_spacing <- get_back_test_spacing(back_test_spacing,
date_type)

# * Yearly Forecast Adjustment ----
if(date_type =="year") {
# * Ensemble Models Adjustment ----
if(is.null(run_ensemble_models) & date_type %in% c("quarter", "month")) {
run_ensemble_models <- TRUE
} else if(is.null(run_ensemble_models) & date_type %in% c("week", "day")) {
run_ensemble_models <- FALSE
} else if(sum(run_ensemble_models == TRUE) == 1 & date_type %in% c("quarter", "month", "week", "day")) {
run_ensemble_models <- TRUE
} else if(sum(run_ensemble_models == TRUE) == 1 & date_type =="year") {
run_ensemble_models = FALSE
warning("ensemble models have been turned off for yearly forecasts")
} else {
run_ensemble_models = FALSE
}

# 4. Prep Data ----

cli::cli_h1("Prepping Data")
Expand Down Expand Up @@ -354,17 +363,16 @@ forecast_time_series <- function(input_data,
model_combinations$All <- model_combinations %>% tidyr::unite(All, colnames(model_combinations))
model_combinations <- model_combinations$All


#parallel processing
if(run_model_parallel==TRUE & sum(parallel_processing == "local_machine") == 0) {

cores <- get_cores(num_cores)
cl <- parallel::makeCluster(cores)
doParallel::registerDoParallel(cl)

#point to the correct libraries within Azure Batch
if(sum(parallel_processing=="azure_batch") == 1) {
clusterEvalQ(cl, .libPaths("/mnt/batch/tasks/shared/R/packages"))
parallel::clusterEvalQ(cl, .libPaths("/mnt/batch/tasks/shared/R/packages"))
}

combinations_tbl <- foreach::foreach(i = model_combinations[[1]], .combine = 'rbind',
Expand Down Expand Up @@ -422,7 +430,7 @@ forecast_time_series <- function(input_data,
cl <- parallel::makeCluster(cores)
doParallel::registerDoParallel(cl)

combinations_tbl_final <- foreach(i = 2:min(max_model_average, length(model_list)), .combine = 'rbind',
combinations_tbl_final <- foreach::foreach(i = 2:min(max_model_average, length(model_list)), .combine = 'rbind',
.packages = get_export_packages(),
.export = c("fcst_prep", "get_cores")) %dopar% {create_model_averages(i)}

Expand All @@ -434,7 +442,7 @@ forecast_time_series <- function(input_data,
if(sum(parallel_processing=="azure_batch") == 1) {


combinations_tbl_final <- foreach(i = 2:min(max_model_average, length(model_list)), .combine = 'rbind',
combinations_tbl_final <- foreach::foreach(i = 2:min(max_model_average, length(model_list)), .combine = 'rbind',
.packages = get_export_packages(),
.export = c("fcst_prep", "get_cores"),
.options.azure = list(maxTaskRetryCount = 0, autoDeleteJob = TRUE,
Expand All @@ -445,7 +453,7 @@ forecast_time_series <- function(input_data,
}

if(sum(parallel_processing == 'azure_batch') == 1 & azure_batch_cluster_delete == TRUE) {
stopCluster(cluster)
parallel::stopCluster(cluster)
}

# combine with individual model data
Expand Down Expand Up @@ -653,7 +661,8 @@ forecast_time_series <- function(input_data,
dplyr::group_by(Combo, .id, Model) %>%
dplyr::mutate(Horizon = dplyr::row_number()) %>%
dplyr::ungroup() %>%
dplyr::select(Combo, .id, Date, Model, Horizon, FCST, Target, MAPE) %>%
dplyr::mutate(Best_Model = ifelse(Model == "Best-Model", "Yes", "No")) %>%
dplyr::select(Combo, .id, Date, Model, Horizon, FCST, Target, MAPE, Best_Model) %>%
tidyr::separate(Combo, into = combo_variables, sep = '--', remove = FALSE) %>%
dplyr::rename(Back_Test_Scenario = .id)

Expand Down
4 changes: 2 additions & 2 deletions R/general_parallel.R
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ init_parallel_within <-function(type, num_cores){
doParallel::registerDoParallel(cl)

#point to the correct libraries within Azure Batch
if(type == "azure_batch") {
if(sum(type == "azure_batch") == 1) {
parallel::clusterEvalQ(cl, .libPaths("/mnt/batch/tasks/shared/R/packages"))
}

Expand Down Expand Up @@ -72,7 +72,7 @@ get_fcast_parallel<- function(combo_list,

cli::cli_alert_info("Running across {cores} cores")

fcst <- foreach(i = combo_list,
fcst <- foreach::foreach(i = combo_list,
.combine = 'rbind',
.packages = get_export_packages(),
.export = get_transfer_functions()
Expand Down
21 changes: 19 additions & 2 deletions R/utility.R
Original file line number Diff line number Diff line change
@@ -1,13 +1,30 @@
# define global variables to prevent notes in R CMD Check
utils::globalVariables(c(".id", ".key", ".model_desc", ".pred", ".resample_id", "All", "Best_Model", "Combo",
"Combo_Test_Date", "Combo_Total", "Count", "Date", "Date_Adj", "Date_Adj_half",
"Date_Adj_index.num", "Date_Adj_quarter", "Date_Adj_year", "Date_Day", "FCST",
"Horizon", "MAPE", "Model", "Number", "Number_Char", "Origin", "Residual",
"Residual_Std_Dev", "Rolling_MAPE", "Slice", "Sum", "Target", "Type", "Variable",
"cluster", "frequency", "gluon_ts_frequency", "hi.80", "hi.95", "i", "lo.80", "lo.95",
"weighted_MAPE", "where", "as2"))

#' @importFrom magrittr %>%
#' @export
magrittr::`%>%`
NULL

#' @importFrom methods formalArgs
NULL

#' @importFrom stats sd
NULL

#' @importFrom foreach %do% %dopar%
NULL

#' @importFrom lubridate %m+%
NULL

#' @import modeltime
NULL

# * cbind.fill custom function ----
#create function to cbind dataframes that contain different amounts of rows
#https://github.com/cvarrichio/rowr/blob/master/R/rowr.R
Expand Down
4 changes: 4 additions & 0 deletions _pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,7 @@ authors:
href: https://aadharshkannan.com/
Mike Tokic:
href: https://www.linkedin.com/in/michaeltokic/

template:
params:
ganalytics: G-6X0DS5856B
19 changes: 19 additions & 0 deletions cran-comments.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
## R CMD check results
There were no ERRORs or WARNINGs.

There was 1 NOTE:

* checking dependencies in R code ... NOTE
Imports includes 38 non-default packages.
Importing from so many packages makes the package vulnerable to any of
them becoming unavailable. Move as many as possible to Suggests and
use conditionally.

This package does leverage many outside packages. The main feature of this package is
that it consolidates a lot of different models into one package to run them automatically.
So having many required packages is important to the package.

Also this is my first cran submission.

## Downstream dependencies
There are currently no downstream dependencies for this package
Loading

0 comments on commit 9fb7c2e

Please sign in to comment.