From 3415f4912172701b3f0602d1690edf3f0c59ad02 Mon Sep 17 00:00:00 2001 From: vladdsm Date: Mon, 14 Jun 2021 22:13:43 +0200 Subject: [PATCH 1/2] rule assign option for market type --- DESCRIPTION | 2 +- NEWS.md | 1 + R/mt_stat_transf.R | 110 ++++++++++++++++++++++++++++++++++-------- R/zzz.R | 2 +- man/mt_stat_transf.Rd | 33 +++++++++---- 5 files changed, 116 insertions(+), 32 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 30333fd..424aa7c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: lazytrade Type: Package Title: Learn Computer and Data Science using Algorithmic Trading -Version: 0.5.2.9050 +Version: 0.5.2.9060 Author: Vladimir Zhbanko Maintainer: Vladimir Zhbanko Description: Provide sets of functions and methods to learn and practice data science using idea of algorithmic trading. diff --git a/NEWS.md b/NEWS.md index ba38298..cdb7fc3 100644 --- a/NEWS.md +++ b/NEWS.md @@ -17,6 +17,7 @@ * fail safe in `aml_collect_data` function will delete already recorded rds file if it has different amount of columns * add new function `util_find_pid` to find the PIDs of the terminal.exe application +* function mt_stat_transf is now using a rule to assing market type classes # lazytrade 0.5.1 diff --git a/R/mt_stat_transf.R b/R/mt_stat_transf.R index cbeaf8a..2cac446 100644 --- a/R/mt_stat_transf.R +++ b/R/mt_stat_transf.R @@ -25,6 +25,7 @@ #' 'kmeans' or 'hclust'. Default value is 'kmeans' #' @param clust_opt Character, option to select how to perform h clustering #' "average", "single", "complete", "ward". Default value is 'complete' +#' @param rule_opt Boolean, option to perform rule-based Market Type Assignment, defaults to TRUE #' #' @return Dataframe with statistically transformed and classified dataset for classification modeling #' @export @@ -44,17 +45,29 @@ #' #' #option #' #mt_classes = c('BUN', 'BEN', 'RAN','BUV', 'BEV', 'RAV') +#' #mt_classes = c('BUN', 'BEN', 'RAN') #' #clust_method = 'hclust' #' #clust_opt = 'ward' +#' +#' #build dataset for Market Type detection without rule based check +#' ai_class_rand <- mt_stat_transf(indicator_dataset = price_dataset_big, +#' num_bars = 64, +#' timeframe = 60, +#' path_data = path_data, +#' mt_classes = c('BUN', 'BEN', 'RAN'), +#' clust_method = 'kmeans', +#' clust_opt = 'complete', +#' rule_opt = FALSE) #' -#' mt_stat_transf(indicator_dataset = price_dataset_big, -#' num_bars = 64, -#' timeframe = 60, -#' path_data = path_data, -#' mt_classes = c('BUN', 'BEN', 'RAN'), -#' clust_method = 'kmeans', -#' clust_opt = 'complete') -#' +#' #use rule base check +#' ai_class_rule <- mt_stat_transf(indicator_dataset = price_dataset_big, +#' num_bars = 64, +#' timeframe = 60, +#' path_data = path_data, +#' mt_classes = c('BUN', 'BEN', 'RAN'), +#' clust_method = 'kmeans', +#' clust_opt = 'complete', +#' rule_opt = TRUE) #' #' mt_stat_transf <- function(indicator_dataset, @@ -63,7 +76,8 @@ mt_stat_transf <- function(indicator_dataset, path_data, mt_classes, clust_method = 'kmeans', - clust_opt = 'complete'){ + clust_opt = 'complete', + rule_opt = TRUE){ requireNamespace("dplyr", quietly = TRUE) requireNamespace("readr", quietly = TRUE) @@ -107,7 +121,11 @@ mt_stat_transf <- function(indicator_dataset, #Q3 vector with third quantile q3 <- apply(lg12, 2,stats::quantile, 0.75) - + + #n1 vector with first element of selected row + n1 <- apply(dfr12, 2,head,1) + #n2 vector with last element of selected row + n2 <- apply(dfr12, 2, tail,1) # vector with kurtosis #k1 <- apply(lg12, 2,moments::kurtosis) @@ -117,7 +135,9 @@ mt_stat_transf <- function(indicator_dataset, ## combine these vectors dfC <- data.frame(Q1 = q1, Q2 = q2, - Q3 = q3) + Q3 = q3, + N1 = n1, + N2 = n2) #K1 = k1, #S1 = s1 #) @@ -139,7 +159,10 @@ mt_stat_transf <- function(indicator_dataset, #Q3 vector with third quantile q3 <- apply(lg12, 2,stats::quantile, 0.75) - + #n1 vector with first element of selected row + n1 <- apply(dfr12, 2,head,1) + #n2 vector with last element of selected row + n2 <- apply(dfr12, 2, tail,1) # vector with kurtosis #k1 <- apply(lg12, 2,moments::kurtosis) @@ -147,9 +170,11 @@ mt_stat_transf <- function(indicator_dataset, # s1 <- apply(lg12, 2, moments::skewness) ## combine these vectors - dfC1 <- data.frame(Q1 = q1, - Q2 = q2, - Q3 = q3) + dfC1 <- data.frame(Q1 = q1, + Q2 = q2, + Q3 = q3, + N1 = n1, + N2 = n2) #K1 = k1, #S1 = s1 #) @@ -160,7 +185,11 @@ mt_stat_transf <- function(indicator_dataset, } } # end of the for loop - + + # Create copies of the dataset for verification purposes + dfCa <- dfC + dfC <- dfC[, 1:3] + ## performing clustering algorithm to classify this dataset into N classes # scale data dfCsc <- scale(dfC) @@ -187,14 +216,53 @@ mt_stat_transf <- function(indicator_dataset, } - ## TDL -> properly assign classes to labels - - - + ## rule based assignment of classes to labels + if(rule_opt && N==3){ + ## derive which class is which!!! + # join columns with price values to the dataset + dfCa$M_T <- dfC$M_T + + # calculate summary statistics for each class + dfC2 <- dfCa %>% + dplyr::mutate(dN = 1000*(N2-N1)) %>% + dplyr::group_by(M_T) %>% + dplyr::summarise(Q1mean = mean(Q1), + Q2mean = mean(Q2), + Q3mean = mean(Q3), + dNmean = mean(dN), + Nobs = n()) + # result of dN is probably biased + + # max dN && dN > 0 -> BUN + dfBUN <- dfC2 %>% + dplyr::slice(which.max(dNmean)) %>% + dplyr::select(M_T) %>% + dplyr::mutate(MT_A = "BUN") + + # min dN && dN < 0 -> BEN + dfBEN <- dfC2 %>% + dplyr::slice(which.min(dNmean)) %>% + dplyr::select(M_T) %>% + dplyr::mutate(MT_A = "BEN") + # keep them together + dfBUNBEN <- dplyr::bind_rows(dfBUN, dfBEN) + df_all <- dfBUNBEN %>% + dplyr::full_join(dfC2) %>% + dplyr::arrange(M_T) + + # remaining is RAN... + df_all$MT_A[is.na(df_all$MT_A)] <- "RAN" + + # relabel the column M_T + dfC$M_T <- factor(dfC$M_T, labels=df_all$MT_A) + # relabel for manual checking + dfCa$M_T <- factor(dfCa$M_T, labels = df_all$MT_A) + + }else{ #rename clusters to be like desired (but classes will be unsupervised) dfC$M_T <- factor(dfC$M_T, labels=mt_classes) #plot(dfC, col = dfC$M_T) - + } full_path <- file.path(path_data, paste0('auto_M_T', timeframe, '.rds')) readr::write_rds(dfC, full_path) diff --git a/R/zzz.R b/R/zzz.R index ae44248..f623148 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -22,7 +22,7 @@ "rewardseq.OFF", "rewardseq.ON", "totreward", "trstate", "MarketType","predict_CMSUM", "LABEL_CMSUM", "CUMSUM_PNL", "DFR", "PairGain", "Symbol", "aes", "asset_name", - + "M_T", "N1", "N2", "Q1", "Q2", "Q3", "dN", "dNmean", "Hold_NB", "MaxPerf", "NB_hold", "PnL_NB", "TR_Level", "X2_NB", "qrtl", "value","FrstQntlPerf", "col_number", "t_running", diff --git a/man/mt_stat_transf.Rd b/man/mt_stat_transf.Rd index c937fe5..9968676 100644 --- a/man/mt_stat_transf.Rd +++ b/man/mt_stat_transf.Rd @@ -11,7 +11,8 @@ mt_stat_transf( path_data, mt_classes, clust_method = "kmeans", - clust_opt = "complete" + clust_opt = "complete", + rule_opt = TRUE ) } \arguments{ @@ -31,6 +32,8 @@ Each row is a time index, multiple columns are required but not strictly needed} \item{clust_opt}{Character, option to select how to perform h clustering "average", "single", "complete", "ward". Default value is 'complete'} + +\item{rule_opt}{Boolean, option to perform rule-based Market Type Assignment, defaults to TRUE} } \value{ Dataframe with statistically transformed and classified dataset for classification modeling @@ -64,17 +67,29 @@ data(price_dataset_big) #option #mt_classes = c('BUN', 'BEN', 'RAN','BUV', 'BEV', 'RAV') +#mt_classes = c('BUN', 'BEN', 'RAN') #clust_method = 'hclust' #clust_opt = 'ward' -mt_stat_transf(indicator_dataset = price_dataset_big, - num_bars = 64, - timeframe = 60, - path_data = path_data, - mt_classes = c('BUN', 'BEN', 'RAN'), - clust_method = 'kmeans', - clust_opt = 'complete') - +#build dataset for Market Type detection without rule based check +ai_class_rand <- mt_stat_transf(indicator_dataset = price_dataset_big, + num_bars = 64, + timeframe = 60, + path_data = path_data, + mt_classes = c('BUN', 'BEN', 'RAN'), + clust_method = 'kmeans', + clust_opt = 'complete', + rule_opt = FALSE) + +#use rule base check +ai_class_rule <- mt_stat_transf(indicator_dataset = price_dataset_big, + num_bars = 64, + timeframe = 60, + path_data = path_data, + mt_classes = c('BUN', 'BEN', 'RAN'), + clust_method = 'kmeans', + clust_opt = 'complete', + rule_opt = TRUE) } From 21e09151a71c9e287a99d9a687d2a48cfe4cee25 Mon Sep 17 00:00:00 2001 From: vladdsm Date: Sun, 20 Jun 2021 19:44:59 +0200 Subject: [PATCH 2/2] update with new features --- DESCRIPTION | 2 +- NEWS.md | 11 ++- R/mt_make_model.R | 171 ++++++++++++++++++++++++++-------------- R/mt_stat_evaluate.R | 1 + R/mt_stat_transf.R | 1 - cran-comments.md | 15 ++-- man/mt_make_model.Rd | 79 +++++++++++++------ man/mt_stat_evaluate.Rd | 1 + man/mt_stat_transf.Rd | 1 - 9 files changed, 186 insertions(+), 96 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 424aa7c..251cb26 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: lazytrade Type: Package Title: Learn Computer and Data Science using Algorithmic Trading -Version: 0.5.2.9060 +Version: 0.5.2 Author: Vladimir Zhbanko Maintainer: Vladimir Zhbanko Description: Provide sets of functions and methods to learn and practice data science using idea of algorithmic trading. diff --git a/NEWS.md b/NEWS.md index cdb7fc3..b868bb7 100644 --- a/NEWS.md +++ b/NEWS.md @@ -3,6 +3,7 @@ ## Planned Changes * setup github actions +* add fail safe for function input parameters # lazytrade 0.5.2 @@ -10,15 +11,13 @@ ## Changes -* add second parameter to simulation function +* add second parameter to simulation function `aml_simulation` * option to use full columns for model training when selecting 0 as a parameter `num_cols_used` * add suppress messages option during `readr::read_csv()` function calls -* option to use full columns for model training when selecting 0 as a parameter num_cols_used -* fail safe in `aml_collect_data` function will delete already recorded rds file if it has different -amount of columns +* fail safe in `aml_collect_data` function will delete already recorded rds file if it has different amount of columns * add new function `util_find_pid` to find the PIDs of the terminal.exe application -* function mt_stat_transf is now using a rule to assing market type classes - +* function `mt_stat_transf` is now using a rule to assign 3 market type classes +* rewrite function `mt_make_model` with the same philosophy as in `aml_make_model` # lazytrade 0.5.1 diff --git a/R/mt_make_model.R b/R/mt_make_model.R index 280fb73..b2a56a4 100644 --- a/R/mt_make_model.R +++ b/R/mt_make_model.R @@ -1,11 +1,13 @@ #' Function to train Deep Learning Classification model for Market Type recognition #' -#' @description Function is training h2o deep learning model to match manually classified patterns of the financial -#' indicator. Main idea is to be able to detect Market Type by solely relying on the current indicator pattern. -#' This is in the attempt to evaluate current market type and to use proper trading strategy. -#' Function will always try to gather mode data to update the model. -#' -#' Selected Market Periods according to the theory from Van K. Tharp: +#' @description Function is training h2o deep learning model to match +#' classified patterns of the financial indicator. +#' Main idea is to be able to detect Market Type by solely relying on the +#' current indicator pattern. +#' This is in the attempt to evaluate current market type for trading purposes. +#' +#' Selected Market Periods could be manually classified +#' according to the theory from Van K. Tharp: #' 1. Bull normal, BUN #' 2. Bull volatile, BUV #' 3. Bear normal, BEN @@ -13,26 +15,41 @@ #' 5. Sideways quiet, RAN #' 6. Sideways volatile, RAV #' -#' `r lifecycle::badge('stable')` +#' For automatic classification, could only be used: BUN, BEN, RAN market types +#' +#' `r lifecycle::badge('experimental')` #' -#' @details Function is using manually prepared dataset and tries several different random neural network structures. -#' Once the best neural network is found then the better model is trained and stored. +#' @details Function is using labeled dataset and tries several different random +#' neural network structures. Once the best neural network is found then the +#' better model is selected and stored. Dataset can be either manually labelled +#' or generated using function mt_stat_transf.R. In the latter case parameter +#' is_cluster shall be set to TRUE. #' #' @author (C) 2020, 2021 Vladimir Zhbanko #' @backref Market Type research of Van Tharp Institute: #' -#' @param indicator_dataset Dataframe, Dataset containing indicator patterns to train the model +#' @param indicator_dataset Data frame, Data set containing indicator patterns to train the model #' @param num_bars Integer, Number of bars used to detect pattern -#' @param timeframe Integer, Data timeframe in Minutes. +#' @param timeframe Integer, Data time frame in minutes. #' @param path_model String, Path where the models are be stored -#' @param path_data String, Path where the aggregated historical data is stored, if exists in rds format -#' @param activate_balance Boolean, option to choose if to balance market type classes or not, default TRUE -#' @param num_nn_options Integer, value from 3 to 24 or more. Used to change number of variants -#' of the random neural network structures. Value 3 will mean that only one -#' random structure will be used. To avoid warnings make sure to set this value +#' @param path_data String, Path where the aggregated historical data is stored, +#' if exists, in rds format +#' @param activate_balance Boolean, option to choose to balance market type classes or not, +#' default TRUE +#' @param num_nn_options Integer, value from 0 to 24 or more as multiple of 3. +#' Used to change number of variants for 3 hidden layer structure. +#' Random neural network structures will be generated. +#' When value 0 is set then a fixed structure will be used as +#' defined by parameter fixed_nn_struct. +#' To avoid warnings make sure to set this value as #' multiple of 3. Higher values will increase computation time. +#' @param fixed_nn_struct Integer vector with numeric elements, see par hidden in ?h2o.deeplearning, +#' default value is c(100,100). +#' Note this will only work if num_nn_options is 0 +#' @param num_epoch Integer, see parameter epochs in ?h2o.deeplearning, default value is 100 +#' Higher number may lead to long code execution #' @param is_cluster Boolean, set TRUE to use automatically clustered data -#' +#' #' @return Function is writing file object with the model #' @export #' @@ -58,17 +75,18 @@ #' h2o.init(nthreads = 2) #' #' -#' # performing Deep Learning Classification using the custom function manually prepared data +#' # performing Deep Learning Classification using manually labelled data #' mt_make_model(indicator_dataset = macd_ML60M, #' num_bars = 64, #' timeframe = 60, #' path_model = path_model, #' path_data = path_data, #' activate_balance = TRUE, -#' num_nn_options = 3) +#' num_nn_options = 3, +#' num_epoch = 10) #' #' data(price_dataset_big) -#' data <- head(price_dataset_big, 500) #reduce computational time +#' data <- head(price_dataset_big, 5000) #reduce computational time #' #' ai_class <- mt_stat_transf(indicator_dataset = data, #' num_bars = 64, @@ -83,10 +101,23 @@ #' path_model = path_model, #' path_data = path_data, #' activate_balance = TRUE, -#' num_nn_options = 3, +#' num_nn_options = 6, +#' num_epoch = 10, #' is_cluster = TRUE) #' -#' +#' # performing Deep Learning Classification using the custom function auto clustered data +#' # and fixed nn structure +#' mt_make_model(indicator_dataset = ai_class, +#' num_bars = 64, +#' timeframe = 60, +#' path_model = path_model, +#' path_data = path_data, +#' activate_balance = TRUE, +#' num_nn_options = 0, +#' fixed_nn_struct = c(10, 10), +#' num_epoch = 10, +#' is_cluster = TRUE) +#' #' # stop h2o engine #' h2o.shutdown(prompt = FALSE) #' @@ -98,22 +129,31 @@ #' #' mt_make_model <- function(indicator_dataset, - num_bars, + num_bars = 64, timeframe = 60, - path_model, path_data, + path_model, + path_data, activate_balance = TRUE, num_nn_options = 24, + fixed_nn_struct = c(100, 100), + num_epoch = 100, is_cluster = FALSE){ requireNamespace("dplyr", quietly = TRUE) requireNamespace("readr", quietly = TRUE) requireNamespace("h2o", quietly = TRUE) + # generate a file name for model + m_name <- paste0("DL_Classification", "_", timeframe, "M") + m_path <- file.path(path_model, m_name) + if(is_cluster == TRUE){ num_bars <- ncol(indicator_dataset)-1 } - macd_ML2 <- indicator_dataset %>% dplyr::mutate_at("M_T", as.factor) + macd_ML2 <- indicator_dataset %>% + #make sure column with label is a factor + dplyr::mutate(across("M_T", as.factor)) # check if we don't have too much data x1_nrows <- macd_ML2 %>% nrow() @@ -125,19 +165,36 @@ mt_make_model <- function(indicator_dataset, utils::head(40000) } + # split data into 2 groups + # split data to train and test blocks + # note: model will be trained on the OLDEST data + test_ind <- 1:round(0.3*x1_nrows) #test indices 1:xxx + dat21 <- macd_ML2[test_ind, ] #dataset to test the model using 30% of data + dat22 <- macd_ML2[-test_ind, ] #dataset to train the model + + # get this data into h2o: - macd_ML <- as.h2o(x = macd_ML2, destination_frame = "macd_ML") - + macd_ML <- h2o::as.h2o(x = dat22, destination_frame = "macd_ML") + recent_ML <- h2o::as.h2o(x = dat21, destination_frame = "recent_ML") + + # for loop to select the best neural network structure + ### fix or random network structure num_nn_options <- 24 + ### + n_layers <- length(fixed_nn_struct) + + if(num_nn_options == 0){ + nn_sets <- fixed_nn_struct %>% matrix(ncol = n_layers) + } else { + nn_sets <- sample.int(n = 100, num_nn_options) %>% matrix(ncol = 3) + } + # try different models and choose the best one... - ### random network structure - nn_sets <- sample.int(n = 100, num_nn_options) %>% matrix(ncol = 3) - for (i in 1:dim(nn_sets)[1]) { # i <- 1 - ModelC <- h2o.deeplearning( + ModelM <- h2o.deeplearning( model_id = paste0("DL_Classification", "_", timeframe, "M"), x = names(macd_ML[,1:num_bars]), y = "M_T", @@ -152,14 +209,18 @@ mt_make_model <- function(indicator_dataset, distribution = "AUTO", stopping_metric = "AUTO", balance_classes = activate_balance, - epochs = 200) - - #ModelC - #summary(ModelC) - #h2o.performance(ModelC) - RMSE <- h2o::h2o.performance(ModelC)@metrics$RMSE %>% as.data.frame() + epochs = num_epoch) + + #ModelM + #summary(ModelM) + #h2o.performance(ModelM) + + ### define best model using RMSE + RMSE <- h2o::h2o.performance(ModelM,newdata = recent_ML)@metrics$RMSE %>% + as.data.frame() + #RMSE <- h2o::h2o.performance(ModelM)@metrics$RMSE %>% as.data.frame() names(RMSE) <- 'RMSE' - + # record results of modelling if(!exists("df_res")){ df_res <- nn_sets[i,] %>% t() %>% as.data.frame() %>% dplyr::bind_cols(RMSE) @@ -168,32 +229,24 @@ mt_make_model <- function(indicator_dataset, df_res <- df_res %>% dplyr::bind_rows(df_row) } - - + #save intermediate models! + # save model object + temp_model_path <- file.path(path_model, i) + if(!dir.exists(temp_model_path)){dir.create(temp_model_path)} + h2o::h2o.saveModel(ModelM, path = temp_model_path, force = T) } # end of for loop # find which row in the df_res has the smallest RMSE value slice(which.min(Employees)) lowest_RMSE <- df_res %>% dplyr::slice(which.min(RMSE)) %>% select(-RMSE) %>% unlist() %>% unname() - - ModelC <- h2o.deeplearning( - model_id = paste0("DL_Classification", "_", timeframe, "M"), - x = names(macd_ML[,1:num_bars]), - y = "M_T", - training_frame = macd_ML, - activation = "Tanh", - overwrite_with_best_model = TRUE, - autoencoder = FALSE, - hidden = lowest_RMSE, - loss = "Automatic", - sparse = TRUE, - l1 = 1e-4, - distribution = "AUTO", - stopping_metric = "AUTO", - balance_classes = activate_balance, - epochs = 200) - -h2o.saveModel(ModelC, path = path_model, force = TRUE) + best_row <- which.min(df_res$RMSE) + + ## retrieve and copy/paste the best model + best_model_location <- file.path(path_model, best_row, m_name) + best_model_destination <- file.path(path_model, m_name) + # copy best model object + file.copy(best_model_location, path_model, overwrite = TRUE) + #h2o.shutdown(prompt = FALSE) diff --git a/R/mt_stat_evaluate.R b/R/mt_stat_evaluate.R index 2cd9e1b..969abf8 100644 --- a/R/mt_stat_evaluate.R +++ b/R/mt_stat_evaluate.R @@ -55,6 +55,7 @@ #' path_data = path_data, #' activate_balance = TRUE, #' num_nn_options = 3, +#' num_epoch = 10, #' is_cluster = TRUE) #' #' diff --git a/R/mt_stat_transf.R b/R/mt_stat_transf.R index 2cac446..72f87d4 100644 --- a/R/mt_stat_transf.R +++ b/R/mt_stat_transf.R @@ -32,7 +32,6 @@ #' #' @examples #' -#' #' library(dplyr) #' library(stats) #' library(magrittr) diff --git a/cran-comments.md b/cran-comments.md index 93de14c..9f521b6 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -1,14 +1,17 @@ ## Test environments -* windows, R 4.0.4 (2021-02-15) -- "Lost Library Book" Platform: x86_64-w64-mingw32/x64 (64-bit) -* R-hub builder -* R version R 4.0.4 (2021-02-15) -* R Under development (unstable) (2021-03-11 r80086) +* Docker Container, R 4.1.0 (2021-05-18) -- "Camp Pontanezen" +Platform: x86_64-pc-linux-gnu (64-bit) +* R-hub builder +Platform: Debian Linux, R-devel, GCC +* Winbuilder +* R version 4.1.0 (2021-05-18) +* R Under development (unstable) (2021-06-18 r80528) ## R CMD check results Status: OK -R version R 4.0.4 (2021-02-15) +R version R 4.1.0 (2021-05-18) There were no ERRORs or WARNINGs or NOTEs ## Downstream dependencies -R version 3.6.3 (2020-02-29) +R version 4.0.5 (2021-03-31) diff --git a/man/mt_make_model.Rd b/man/mt_make_model.Rd index fdeb3df..0030360 100644 --- a/man/mt_make_model.Rd +++ b/man/mt_make_model.Rd @@ -6,45 +6,61 @@ \usage{ mt_make_model( indicator_dataset, - num_bars, + num_bars = 64, timeframe = 60, path_model, path_data, activate_balance = TRUE, num_nn_options = 24, + fixed_nn_struct = c(100, 100), + num_epoch = 100, is_cluster = FALSE ) } \arguments{ -\item{indicator_dataset}{Dataframe, Dataset containing indicator patterns to train the model} +\item{indicator_dataset}{Data frame, Data set containing indicator patterns to train the model} \item{num_bars}{Integer, Number of bars used to detect pattern} -\item{timeframe}{Integer, Data timeframe in Minutes.} +\item{timeframe}{Integer, Data time frame in minutes.} \item{path_model}{String, Path where the models are be stored} -\item{path_data}{String, Path where the aggregated historical data is stored, if exists in rds format} +\item{path_data}{String, Path where the aggregated historical data is stored, +if exists, in rds format} -\item{activate_balance}{Boolean, option to choose if to balance market type classes or not, default TRUE} +\item{activate_balance}{Boolean, option to choose to balance market type classes or not, +default TRUE} -\item{num_nn_options}{Integer, value from 3 to 24 or more. Used to change number of variants -of the random neural network structures. Value 3 will mean that only one -random structure will be used. To avoid warnings make sure to set this value +\item{num_nn_options}{Integer, value from 0 to 24 or more as multiple of 3. +Used to change number of variants for 3 hidden layer structure. +Random neural network structures will be generated. +When value 0 is set then a fixed structure will be used as +defined by parameter fixed_nn_struct. +To avoid warnings make sure to set this value as multiple of 3. Higher values will increase computation time.} +\item{fixed_nn_struct}{Integer vector with numeric elements, see par hidden in ?h2o.deeplearning, +default value is c(100,100). +Note this will only work if num_nn_options is 0} + +\item{num_epoch}{Integer, see parameter epochs in ?h2o.deeplearning, default value is 100 +Higher number may lead to long code execution} + \item{is_cluster}{Boolean, set TRUE to use automatically clustered data} } \value{ Function is writing file object with the model } \description{ -Function is training h2o deep learning model to match manually classified patterns of the financial -indicator. Main idea is to be able to detect Market Type by solely relying on the current indicator pattern. -This is in the attempt to evaluate current market type and to use proper trading strategy. -Function will always try to gather mode data to update the model. - -Selected Market Periods according to the theory from Van K. Tharp: +Function is training h2o deep learning model to match +classified patterns of the financial indicator. +Main idea is to be able to detect Market Type by solely relying on the +current indicator pattern. +This is in the attempt to evaluate current market type for trading purposes. + +Selected Market Periods could be manually classified +according to the theory from Van K. Tharp: \enumerate{ \item Bull normal, BUN \item Bull volatile, BUV @@ -54,11 +70,16 @@ Selected Market Periods according to the theory from Van K. Tharp: \item Sideways volatile, RAV } -\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} +For automatic classification, could only be used: BUN, BEN, RAN market types + +\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} } \details{ -Function is using manually prepared dataset and tries several different random neural network structures. -Once the best neural network is found then the better model is trained and stored. +Function is using labeled dataset and tries several different random +neural network structures. Once the best neural network is found then the +better model is selected and stored. Dataset can be either manually labelled +or generated using function mt_stat_transf.R. In the latter case parameter +is_cluster shall be set to TRUE. } \examples{ @@ -82,17 +103,18 @@ Sys.sleep(5) h2o.init(nthreads = 2) -# performing Deep Learning Classification using the custom function manually prepared data +# performing Deep Learning Classification using manually labelled data mt_make_model(indicator_dataset = macd_ML60M, num_bars = 64, timeframe = 60, path_model = path_model, path_data = path_data, activate_balance = TRUE, - num_nn_options = 3) + num_nn_options = 3, + num_epoch = 10) data(price_dataset_big) -data <- head(price_dataset_big, 500) #reduce computational time +data <- head(price_dataset_big, 5000) #reduce computational time ai_class <- mt_stat_transf(indicator_dataset = data, num_bars = 64, @@ -107,10 +129,23 @@ mt_make_model(indicator_dataset = ai_class, path_model = path_model, path_data = path_data, activate_balance = TRUE, - num_nn_options = 3, + num_nn_options = 6, + num_epoch = 10, is_cluster = TRUE) - +# performing Deep Learning Classification using the custom function auto clustered data +# and fixed nn structure +mt_make_model(indicator_dataset = ai_class, + num_bars = 64, + timeframe = 60, + path_model = path_model, + path_data = path_data, + activate_balance = TRUE, + num_nn_options = 0, + fixed_nn_struct = c(10, 10), + num_epoch = 10, + is_cluster = TRUE) + # stop h2o engine h2o.shutdown(prompt = FALSE) diff --git a/man/mt_stat_evaluate.Rd b/man/mt_stat_evaluate.Rd index 18906b3..1044e90 100644 --- a/man/mt_stat_evaluate.Rd +++ b/man/mt_stat_evaluate.Rd @@ -63,6 +63,7 @@ mt_make_model(indicator_dataset = ai_class, path_data = path_data, activate_balance = TRUE, num_nn_options = 3, + num_epoch = 10, is_cluster = TRUE) diff --git a/man/mt_stat_transf.Rd b/man/mt_stat_transf.Rd index 9968676..ee714ee 100644 --- a/man/mt_stat_transf.Rd +++ b/man/mt_stat_transf.Rd @@ -54,7 +54,6 @@ automated way of doing such data classification } \examples{ - library(dplyr) library(stats) library(magrittr)