From 3415f4912172701b3f0602d1690edf3f0c59ad02 Mon Sep 17 00:00:00 2001
From: vladdsm <vladimir.zhbanko@gmail.com>
Date: Mon, 14 Jun 2021 22:13:43 +0200
Subject: [PATCH 1/2] rule assign option for market type

---
 DESCRIPTION           |   2 +-
 NEWS.md               |   1 +
 R/mt_stat_transf.R    | 110 ++++++++++++++++++++++++++++++++++--------
 R/zzz.R               |   2 +-
 man/mt_stat_transf.Rd |  33 +++++++++----
 5 files changed, 116 insertions(+), 32 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 30333fd..424aa7c 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: lazytrade
 Type: Package
 Title: Learn Computer and Data Science using Algorithmic Trading
-Version: 0.5.2.9050
+Version: 0.5.2.9060
 Author: Vladimir Zhbanko
 Maintainer: Vladimir Zhbanko <vladimir.zhbanko@gmail.com>
 Description: Provide sets of functions and methods to learn and practice data science using idea of algorithmic trading.
diff --git a/NEWS.md b/NEWS.md
index ba38298..cdb7fc3 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -17,6 +17,7 @@
 * fail safe in `aml_collect_data` function will delete already recorded rds file if it has different
 amount of columns
 * add new function `util_find_pid` to find the PIDs of the terminal.exe application
+* function mt_stat_transf is now using a rule to assing market type classes
 
 
 # lazytrade 0.5.1
diff --git a/R/mt_stat_transf.R b/R/mt_stat_transf.R
index cbeaf8a..2cac446 100644
--- a/R/mt_stat_transf.R
+++ b/R/mt_stat_transf.R
@@ -25,6 +25,7 @@
 #'                            'kmeans' or 'hclust'. Default value is 'kmeans'
 #' @param clust_opt           Character, option to select how to perform h clustering
 #'                            "average", "single", "complete", "ward". Default value is 'complete'
+#' @param rule_opt            Boolean, option to perform rule-based Market Type Assignment, defaults to TRUE
 #'
 #' @return Dataframe with statistically transformed and classified dataset for classification modeling
 #' @export
@@ -44,17 +45,29 @@
 #'
 #' #option
 #' #mt_classes = c('BUN', 'BEN', 'RAN','BUV', 'BEV', 'RAV')
+#' #mt_classes = c('BUN', 'BEN', 'RAN')
 #' #clust_method = 'hclust'
 #' #clust_opt = 'ward'
+#' 
+#' #build dataset for Market Type detection without rule based check
+#' ai_class_rand <- mt_stat_transf(indicator_dataset = price_dataset_big,
+#'                                 num_bars = 64,
+#'                                 timeframe = 60,
+#'                                 path_data = path_data,
+#'                                 mt_classes = c('BUN', 'BEN', 'RAN'),
+#'                                 clust_method = 'kmeans',
+#'                                 clust_opt = 'complete',
+#'                                 rule_opt = FALSE)
 #'
-#' mt_stat_transf(indicator_dataset = price_dataset_big,
-#'                num_bars = 64,
-#'                timeframe = 60,
-#'                path_data = path_data,
-#'                mt_classes = c('BUN', 'BEN', 'RAN'),
-#'                clust_method = 'kmeans',
-#'                clust_opt = 'complete')
-#'
+#' #use rule base check
+#' ai_class_rule <- mt_stat_transf(indicator_dataset = price_dataset_big,
+#'                                 num_bars = 64,
+#'                                 timeframe = 60,
+#'                                 path_data = path_data,
+#'                                 mt_classes = c('BUN', 'BEN', 'RAN'),
+#'                                 clust_method = 'kmeans',
+#'                                 clust_opt = 'complete',
+#'                                 rule_opt = TRUE)
 #'
 #'
 mt_stat_transf <- function(indicator_dataset,
@@ -63,7 +76,8 @@ mt_stat_transf <- function(indicator_dataset,
                            path_data,
                            mt_classes,
                            clust_method = 'kmeans',
-                           clust_opt = 'complete'){
+                           clust_opt = 'complete',
+                           rule_opt = TRUE){
 
   requireNamespace("dplyr", quietly = TRUE)
   requireNamespace("readr", quietly = TRUE)
@@ -107,7 +121,11 @@ mt_stat_transf <- function(indicator_dataset,
 
       #Q3 vector with third quantile
       q3 <- apply(lg12, 2,stats::quantile, 0.75)
-
+      
+      #n1 vector with first element of selected row
+      n1 <- apply(dfr12, 2,head,1)
+      #n2 vector with last element of selected row
+      n2 <- apply(dfr12, 2, tail,1)
       # vector with kurtosis
       #k1 <- apply(lg12, 2,moments::kurtosis)
 
@@ -117,7 +135,9 @@ mt_stat_transf <- function(indicator_dataset,
       ## combine these vectors
       dfC <- data.frame(Q1 = q1,
                         Q2 = q2,
-                        Q3 = q3)
+                        Q3 = q3,
+                        N1 = n1,
+                        N2 = n2)
                         #K1 = k1,
                         #S1 = s1
                         #)
@@ -139,7 +159,10 @@ mt_stat_transf <- function(indicator_dataset,
 
       #Q3 vector with third quantile
       q3 <- apply(lg12, 2,stats::quantile, 0.75)
-
+      #n1 vector with first element of selected row
+      n1 <- apply(dfr12, 2,head,1)
+      #n2 vector with last element of selected row
+      n2 <- apply(dfr12, 2, tail,1)
       # vector with kurtosis
       #k1 <- apply(lg12, 2,moments::kurtosis)
 
@@ -147,9 +170,11 @@ mt_stat_transf <- function(indicator_dataset,
       # s1 <- apply(lg12, 2, moments::skewness)
 
       ## combine these vectors
-      dfC1  <- data.frame(Q1 = q1,
-                          Q2 = q2,
-                          Q3 = q3)
+      dfC1 <- data.frame(Q1 = q1,
+                        Q2 = q2,
+                        Q3 = q3,
+                        N1 = n1,
+                        N2 = n2)
       #K1 = k1,
       #S1 = s1
       #)
@@ -160,7 +185,11 @@ mt_stat_transf <- function(indicator_dataset,
     }
 
   } # end of the for loop
-
+  
+  # Create copies of the dataset for verification purposes
+  dfCa <- dfC
+  dfC <- dfC[, 1:3]
+  
   ## performing clustering algorithm to classify this dataset into N classes
   # scale data
   dfCsc <- scale(dfC)
@@ -187,14 +216,53 @@ mt_stat_transf <- function(indicator_dataset,
   }
 
 
-  ## TDL -> properly assign classes to labels
-
-
-
+  ## rule based assignment of classes to labels
+  if(rule_opt && N==3){  
+    ## derive which class is which!!!
+    # join columns with price values to the dataset
+    dfCa$M_T <- dfC$M_T
+
+    # calculate summary statistics for each class
+    dfC2 <- dfCa %>% 
+      dplyr::mutate(dN = 1000*(N2-N1)) %>% 
+      dplyr::group_by(M_T) %>% 
+      dplyr::summarise(Q1mean = mean(Q1),
+                       Q2mean = mean(Q2),
+                       Q3mean = mean(Q3),
+                       dNmean = mean(dN),
+                       Nobs = n()) 
+      # result of dN is probably biased 
+
+    # max dN && dN > 0 -> BUN
+        dfBUN <- dfC2 %>% 
+      dplyr::slice(which.max(dNmean)) %>% 
+      dplyr::select(M_T) %>% 
+      dplyr::mutate(MT_A = "BUN")
+        
+        # min dN && dN < 0 -> BEN
+    dfBEN <- dfC2 %>% 
+      dplyr::slice(which.min(dNmean)) %>% 
+      dplyr::select(M_T) %>% 
+      dplyr::mutate(MT_A = "BEN")
+    # keep them together
+    dfBUNBEN <- dplyr::bind_rows(dfBUN, dfBEN)
+    df_all <- dfBUNBEN %>% 
+      dplyr::full_join(dfC2) %>% 
+      dplyr::arrange(M_T)
+    
+    # remaining is RAN...
+    df_all$MT_A[is.na(df_all$MT_A)] <- "RAN"
+    
+    # relabel the column M_T
+    dfC$M_T <- factor(dfC$M_T, labels=df_all$MT_A)
+    # relabel for manual checking
+    dfCa$M_T <- factor(dfCa$M_T, labels = df_all$MT_A)
+    
+    }else{
   #rename clusters to be like desired (but classes will be unsupervised)
   dfC$M_T <- factor(dfC$M_T, labels=mt_classes)
   #plot(dfC, col = dfC$M_T)
-
+  }
   full_path <- file.path(path_data, paste0('auto_M_T', timeframe, '.rds'))
 
   readr::write_rds(dfC, full_path)
diff --git a/R/zzz.R b/R/zzz.R
index ae44248..f623148 100644
--- a/R/zzz.R
+++ b/R/zzz.R
@@ -22,7 +22,7 @@
         "rewardseq.OFF", "rewardseq.ON",
         "totreward", "trstate", "MarketType","predict_CMSUM", "LABEL_CMSUM",
         "CUMSUM_PNL", "DFR", "PairGain", "Symbol", "aes", "asset_name",
-
+        "M_T", "N1", "N2", "Q1", "Q2", "Q3", "dN", "dNmean",
         "Hold_NB", "MaxPerf", "NB_hold", "PnL_NB", "TR_Level", "X2_NB",
         "qrtl", "value","FrstQntlPerf", "col_number", "t_running",
 
diff --git a/man/mt_stat_transf.Rd b/man/mt_stat_transf.Rd
index c937fe5..9968676 100644
--- a/man/mt_stat_transf.Rd
+++ b/man/mt_stat_transf.Rd
@@ -11,7 +11,8 @@ mt_stat_transf(
   path_data,
   mt_classes,
   clust_method = "kmeans",
-  clust_opt = "complete"
+  clust_opt = "complete",
+  rule_opt = TRUE
 )
 }
 \arguments{
@@ -31,6 +32,8 @@ Each row is a time index, multiple columns are required but not strictly needed}
 
 \item{clust_opt}{Character, option to select how to perform h clustering
 "average", "single", "complete", "ward". Default value is 'complete'}
+
+\item{rule_opt}{Boolean, option to perform rule-based Market Type Assignment, defaults to TRUE}
 }
 \value{
 Dataframe with statistically transformed and classified dataset for classification modeling
@@ -64,17 +67,29 @@ data(price_dataset_big)
 
 #option
 #mt_classes = c('BUN', 'BEN', 'RAN','BUV', 'BEV', 'RAV')
+#mt_classes = c('BUN', 'BEN', 'RAN')
 #clust_method = 'hclust'
 #clust_opt = 'ward'
 
-mt_stat_transf(indicator_dataset = price_dataset_big,
-               num_bars = 64,
-               timeframe = 60,
-               path_data = path_data,
-               mt_classes = c('BUN', 'BEN', 'RAN'),
-               clust_method = 'kmeans',
-               clust_opt = 'complete')
-
+#build dataset for Market Type detection without rule based check
+ai_class_rand <- mt_stat_transf(indicator_dataset = price_dataset_big,
+                                num_bars = 64,
+                                timeframe = 60,
+                                path_data = path_data,
+                                mt_classes = c('BUN', 'BEN', 'RAN'),
+                                clust_method = 'kmeans',
+                                clust_opt = 'complete',
+                                rule_opt = FALSE)
+
+#use rule base check
+ai_class_rule <- mt_stat_transf(indicator_dataset = price_dataset_big,
+                                num_bars = 64,
+                                timeframe = 60,
+                                path_data = path_data,
+                                mt_classes = c('BUN', 'BEN', 'RAN'),
+                                clust_method = 'kmeans',
+                                clust_opt = 'complete',
+                                rule_opt = TRUE)
 
 
 }

From 21e09151a71c9e287a99d9a687d2a48cfe4cee25 Mon Sep 17 00:00:00 2001
From: vladdsm <vladimir.zhbanko@gmail.com>
Date: Sun, 20 Jun 2021 19:44:59 +0200
Subject: [PATCH 2/2] update with new features

---
 DESCRIPTION             |   2 +-
 NEWS.md                 |  11 ++-
 R/mt_make_model.R       | 171 ++++++++++++++++++++++++++--------------
 R/mt_stat_evaluate.R    |   1 +
 R/mt_stat_transf.R      |   1 -
 cran-comments.md        |  15 ++--
 man/mt_make_model.Rd    |  79 +++++++++++++------
 man/mt_stat_evaluate.Rd |   1 +
 man/mt_stat_transf.Rd   |   1 -
 9 files changed, 186 insertions(+), 96 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 424aa7c..251cb26 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: lazytrade
 Type: Package
 Title: Learn Computer and Data Science using Algorithmic Trading
-Version: 0.5.2.9060
+Version: 0.5.2
 Author: Vladimir Zhbanko
 Maintainer: Vladimir Zhbanko <vladimir.zhbanko@gmail.com>
 Description: Provide sets of functions and methods to learn and practice data science using idea of algorithmic trading.
diff --git a/NEWS.md b/NEWS.md
index cdb7fc3..b868bb7 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -3,6 +3,7 @@
 ## Planned Changes
 
 * setup github actions
+* add fail safe for function input parameters
 
 # lazytrade 0.5.2
 
@@ -10,15 +11,13 @@
 
 ## Changes
 
-* add second parameter to simulation function
+* add second parameter to simulation function `aml_simulation`
 * option to use full columns for model training when selecting 0 as a parameter `num_cols_used`
 * add suppress messages option during `readr::read_csv()` function calls
-* option to use full columns for model training when selecting 0 as a parameter num_cols_used
-* fail safe in `aml_collect_data` function will delete already recorded rds file if it has different
-amount of columns
+* fail safe in `aml_collect_data` function will delete already recorded rds file if it has different amount of columns
 * add new function `util_find_pid` to find the PIDs of the terminal.exe application
-* function mt_stat_transf is now using a rule to assing market type classes
-
+* function `mt_stat_transf` is now using a rule to assign 3 market type classes
+* rewrite function `mt_make_model` with the same philosophy as in `aml_make_model`
 
 # lazytrade 0.5.1
 
diff --git a/R/mt_make_model.R b/R/mt_make_model.R
index 280fb73..b2a56a4 100644
--- a/R/mt_make_model.R
+++ b/R/mt_make_model.R
@@ -1,11 +1,13 @@
 #' Function to train Deep Learning Classification model for Market Type recognition
 #'
-#' @description  Function is training h2o deep learning model to match manually classified patterns of the financial
-#' indicator. Main idea is to be able to detect Market Type by solely relying on the current indicator pattern.
-#' This is in the attempt to evaluate current market type and to use proper trading strategy.
-#' Function will always try to gather mode data to update the model.
-#'
-#' Selected Market Periods according to the theory from Van K. Tharp:
+#' @description  Function is training h2o deep learning model to match
+#' classified patterns of the financial indicator. 
+#' Main idea is to be able to detect Market Type by solely relying on the 
+#' current indicator pattern.
+#' This is in the attempt to evaluate current market type for trading purposes.
+#'
+#' Selected Market Periods could be manually classified 
+#' according to the theory from Van K. Tharp:
 #' 1. Bull normal, BUN
 #' 2. Bull volatile, BUV
 #' 3. Bear normal, BEN
@@ -13,26 +15,41 @@
 #' 5. Sideways quiet, RAN
 #' 6. Sideways volatile, RAV
 #'
-#' `r lifecycle::badge('stable')`
+#' For automatic classification, could only be used: BUN, BEN, RAN market types  
+#'
+#' `r lifecycle::badge('experimental')`
 #'
-#' @details Function is using manually prepared dataset and tries several different random neural network structures.
-#' Once the best neural network is found then the better model is trained and stored.
+#' @details Function is using labeled dataset and tries several different random 
+#' neural network structures. Once the best neural network is found then the 
+#' better model is selected and stored. Dataset can be either manually labelled
+#' or generated using function mt_stat_transf.R. In the latter case parameter
+#' is_cluster shall be set to TRUE.
 #'
 #' @author (C) 2020, 2021 Vladimir Zhbanko
 #' @backref Market Type research of Van Tharp Institute: <https://www.vantharp.com/>
 #'
-#' @param indicator_dataset   Dataframe, Dataset containing indicator patterns to train the model
+#' @param indicator_dataset   Data frame, Data set containing indicator patterns to train the model
 #' @param num_bars            Integer, Number of bars used to detect pattern
-#' @param timeframe           Integer, Data timeframe in Minutes.
+#' @param timeframe           Integer, Data time frame in minutes.
 #' @param path_model          String, Path where the models are be stored
-#' @param path_data           String, Path where the aggregated historical data is stored, if exists in rds format
-#' @param activate_balance    Boolean, option to choose if to balance market type classes or not, default TRUE
-#' @param num_nn_options      Integer, value from 3 to 24 or more. Used to change number of variants
-#'                            of the random neural network structures. Value 3 will mean that only one
-#'                            random structure will be used. To avoid warnings make sure to set this value
+#' @param path_data           String, Path where the aggregated historical data is stored,
+#'                            if exists, in rds format
+#' @param activate_balance    Boolean, option to choose to balance market type classes or not,
+#'                            default TRUE
+#' @param num_nn_options      Integer, value from 0 to 24 or more as multiple of 3.
+#'                            Used to change number of variants for 3 hidden layer structure.
+#'                            Random neural network structures will be generated.
+#'                            When value 0 is set then a fixed structure will be used as 
+#'                            defined by parameter fixed_nn_struct. 
+#'                            To avoid warnings make sure to set this value as
 #'                            multiple of 3. Higher values will increase computation time.
+#' @param fixed_nn_struct     Integer vector with numeric elements, see par hidden in ?h2o.deeplearning,
+#'                            default value is c(100,100). 
+#'                            Note this will only work if num_nn_options is 0
+#' @param num_epoch           Integer, see parameter epochs in ?h2o.deeplearning, default value is 100
+#'                            Higher number may lead to long code execution
 #' @param is_cluster          Boolean, set TRUE to use automatically clustered data
-#'
+#' 
 #' @return Function is writing file object with the model
 #' @export
 #'
@@ -58,17 +75,18 @@
 #' h2o.init(nthreads = 2)
 #'
 #'
-#' # performing Deep Learning Classification using the custom function manually prepared data
+#' # performing Deep Learning Classification using manually labelled data
 #' mt_make_model(indicator_dataset = macd_ML60M,
 #'               num_bars = 64,
 #'               timeframe = 60,
 #'               path_model = path_model,
 #'               path_data = path_data,
 #'               activate_balance = TRUE,
-#'               num_nn_options = 3)
+#'               num_nn_options = 3,
+#'               num_epoch = 10)
 #'
 #' data(price_dataset_big)
-#' data <- head(price_dataset_big, 500) #reduce computational time
+#' data <- head(price_dataset_big, 5000) #reduce computational time
 #'
 #' ai_class <- mt_stat_transf(indicator_dataset = data,
 #'                       num_bars = 64,
@@ -83,10 +101,23 @@
 #'               path_model = path_model,
 #'               path_data = path_data,
 #'               activate_balance = TRUE,
-#'               num_nn_options = 3,
+#'               num_nn_options = 6,
+#'               num_epoch = 10,
 #'               is_cluster = TRUE)
 #'
-#'
+#' # performing Deep Learning Classification using the custom function auto clustered data
+#' # and fixed nn structure
+#' mt_make_model(indicator_dataset = ai_class,
+#'               num_bars = 64,
+#'               timeframe = 60,
+#'               path_model = path_model,
+#'               path_data = path_data,
+#'               activate_balance = TRUE,
+#'               num_nn_options = 0,
+#'               fixed_nn_struct = c(10, 10),
+#'               num_epoch = 10,
+#'               is_cluster = TRUE)
+#'               
 #' # stop h2o engine
 #' h2o.shutdown(prompt = FALSE)
 #'
@@ -98,22 +129,31 @@
 #'
 #'
 mt_make_model <- function(indicator_dataset,
-                          num_bars,
+                          num_bars = 64,
                           timeframe = 60,
-                          path_model, path_data,
+                          path_model, 
+                          path_data,
                           activate_balance = TRUE,
                           num_nn_options = 24,
+                          fixed_nn_struct = c(100, 100),
+                          num_epoch = 100,
                           is_cluster = FALSE){
 
   requireNamespace("dplyr", quietly = TRUE)
   requireNamespace("readr", quietly = TRUE)
   requireNamespace("h2o", quietly = TRUE)
 
+  # generate a file name for model
+  m_name <- paste0("DL_Classification", "_", timeframe, "M")
+  m_path <- file.path(path_model, m_name)
+  
   if(is_cluster == TRUE){
     num_bars <- ncol(indicator_dataset)-1
   }
 
-  macd_ML2 <- indicator_dataset %>% dplyr::mutate_at("M_T", as.factor)
+  macd_ML2 <- indicator_dataset %>% 
+    #make sure column with label is a factor
+    dplyr::mutate(across("M_T", as.factor))
 
   # check if we don't have too much data
   x1_nrows <- macd_ML2 %>% nrow()
@@ -125,19 +165,36 @@ mt_make_model <- function(indicator_dataset,
       utils::head(40000)
   }
 
+  # split data into 2 groups
+  # split data to train and test blocks
+  # note: model will be trained on the OLDEST data
+  test_ind  <- 1:round(0.3*x1_nrows) #test indices 1:xxx
+  dat21 <- macd_ML2[test_ind, ]    #dataset to test the model using 30% of data
+  dat22 <- macd_ML2[-test_ind, ]   #dataset to train the model
+  
+  
   # get this data into h2o:
-  macd_ML  <- as.h2o(x = macd_ML2, destination_frame = "macd_ML")
-
+  macd_ML  <- h2o::as.h2o(x = dat22, destination_frame = "macd_ML")
+  recent_ML  <- h2o::as.h2o(x = dat21, destination_frame = "recent_ML")
+  
+  # for loop to select the best neural network structure
+  ### fix or random network structure num_nn_options <- 24
+  ###
+  n_layers <- length(fixed_nn_struct)
+  
+  if(num_nn_options == 0){
+    nn_sets <- fixed_nn_struct %>% matrix(ncol = n_layers)
+  } else {
+    nn_sets <- sample.int(n = 100, num_nn_options) %>% matrix(ncol = 3)
+  }
+  
   # try different models and choose the best one...
-  ### random network structure
-  nn_sets <- sample.int(n = 100, num_nn_options) %>% matrix(ncol = 3)
-
   for (i in 1:dim(nn_sets)[1]) {
 
     # i <- 1
 
 
-  ModelC <- h2o.deeplearning(
+  ModelM <- h2o.deeplearning(
     model_id = paste0("DL_Classification", "_", timeframe, "M"),
     x = names(macd_ML[,1:num_bars]),
     y = "M_T",
@@ -152,14 +209,18 @@ mt_make_model <- function(indicator_dataset,
     distribution = "AUTO",
     stopping_metric = "AUTO",
     balance_classes = activate_balance,
-    epochs = 200)
-
-  #ModelC
-  #summary(ModelC)
-  #h2o.performance(ModelC)
-  RMSE <- h2o::h2o.performance(ModelC)@metrics$RMSE %>% as.data.frame()
+    epochs = num_epoch)
+
+  #ModelM
+  #summary(ModelM)
+  #h2o.performance(ModelM)
+  
+  ### define best model using RMSE
+  RMSE <- h2o::h2o.performance(ModelM,newdata = recent_ML)@metrics$RMSE %>%
+    as.data.frame()
+  #RMSE <- h2o::h2o.performance(ModelM)@metrics$RMSE %>% as.data.frame()
   names(RMSE) <- 'RMSE'
-
+  
   # record results of modelling
   if(!exists("df_res")){
     df_res <- nn_sets[i,] %>% t() %>% as.data.frame() %>% dplyr::bind_cols(RMSE)
@@ -168,32 +229,24 @@ mt_make_model <- function(indicator_dataset,
     df_res <- df_res %>% dplyr::bind_rows(df_row)
   }
 
-
-
+  #save intermediate models!
+  # save model object
+  temp_model_path <- file.path(path_model, i)
+  if(!dir.exists(temp_model_path)){dir.create(temp_model_path)}
+  h2o::h2o.saveModel(ModelM, path = temp_model_path, force = T)
 
   } # end of for loop
 
   # find which row in the df_res has the smallest RMSE value slice(which.min(Employees))
   lowest_RMSE <- df_res %>% dplyr::slice(which.min(RMSE)) %>% select(-RMSE) %>% unlist() %>% unname()
-
-  ModelC <- h2o.deeplearning(
-    model_id = paste0("DL_Classification", "_", timeframe, "M"),
-    x = names(macd_ML[,1:num_bars]),
-    y = "M_T",
-    training_frame = macd_ML,
-    activation = "Tanh",
-    overwrite_with_best_model = TRUE,
-    autoencoder = FALSE,
-    hidden = lowest_RMSE,
-    loss = "Automatic",
-    sparse = TRUE,
-    l1 = 1e-4,
-    distribution = "AUTO",
-    stopping_metric = "AUTO",
-    balance_classes = activate_balance,
-    epochs = 200)
-
-h2o.saveModel(ModelC, path = path_model, force = TRUE)
+  best_row <- which.min(df_res$RMSE)
+  
+  ## retrieve and copy/paste the best model
+  best_model_location <- file.path(path_model, best_row, m_name)
+  best_model_destination <- file.path(path_model, m_name)
+  # copy best model object
+  file.copy(best_model_location, path_model, overwrite = TRUE)
+  
 
   #h2o.shutdown(prompt = FALSE)
 
diff --git a/R/mt_stat_evaluate.R b/R/mt_stat_evaluate.R
index 2cd9e1b..969abf8 100644
--- a/R/mt_stat_evaluate.R
+++ b/R/mt_stat_evaluate.R
@@ -55,6 +55,7 @@
 #'               path_data = path_data,
 #'               activate_balance = TRUE,
 #'               num_nn_options = 3,
+#'               num_epoch = 10,
 #'               is_cluster = TRUE)
 #'
 #'
diff --git a/R/mt_stat_transf.R b/R/mt_stat_transf.R
index 2cac446..72f87d4 100644
--- a/R/mt_stat_transf.R
+++ b/R/mt_stat_transf.R
@@ -32,7 +32,6 @@
 #'
 #' @examples
 #'
-#'
 #' library(dplyr)
 #' library(stats)
 #' library(magrittr)
diff --git a/cran-comments.md b/cran-comments.md
index 93de14c..9f521b6 100644
--- a/cran-comments.md
+++ b/cran-comments.md
@@ -1,14 +1,17 @@
 ## Test environments
-* windows, R 4.0.4 (2021-02-15) -- "Lost Library Book" Platform: x86_64-w64-mingw32/x64 (64-bit)
-* R-hub builder
-* R version R 4.0.4 (2021-02-15)
-* R Under development (unstable) (2021-03-11 r80086)
+* Docker Container, R 4.1.0 (2021-05-18) -- "Camp Pontanezen"
+Platform: x86_64-pc-linux-gnu (64-bit)
+* R-hub builder 
+Platform:	Debian Linux, R-devel, GCC
+* Winbuilder
+* R version 4.1.0 (2021-05-18)
+* R Under development (unstable) (2021-06-18 r80528)
 
 ## R CMD check results
 Status: OK
-R version R 4.0.4 (2021-02-15)
+R version R 4.1.0 (2021-05-18)
 
 There were no ERRORs or WARNINGs or NOTEs
 
 ## Downstream dependencies
-R version 3.6.3 (2020-02-29)
+R version 4.0.5 (2021-03-31)
diff --git a/man/mt_make_model.Rd b/man/mt_make_model.Rd
index fdeb3df..0030360 100644
--- a/man/mt_make_model.Rd
+++ b/man/mt_make_model.Rd
@@ -6,45 +6,61 @@
 \usage{
 mt_make_model(
   indicator_dataset,
-  num_bars,
+  num_bars = 64,
   timeframe = 60,
   path_model,
   path_data,
   activate_balance = TRUE,
   num_nn_options = 24,
+  fixed_nn_struct = c(100, 100),
+  num_epoch = 100,
   is_cluster = FALSE
 )
 }
 \arguments{
-\item{indicator_dataset}{Dataframe, Dataset containing indicator patterns to train the model}
+\item{indicator_dataset}{Data frame, Data set containing indicator patterns to train the model}
 
 \item{num_bars}{Integer, Number of bars used to detect pattern}
 
-\item{timeframe}{Integer, Data timeframe in Minutes.}
+\item{timeframe}{Integer, Data time frame in minutes.}
 
 \item{path_model}{String, Path where the models are be stored}
 
-\item{path_data}{String, Path where the aggregated historical data is stored, if exists in rds format}
+\item{path_data}{String, Path where the aggregated historical data is stored,
+if exists, in rds format}
 
-\item{activate_balance}{Boolean, option to choose if to balance market type classes or not, default TRUE}
+\item{activate_balance}{Boolean, option to choose to balance market type classes or not,
+default TRUE}
 
-\item{num_nn_options}{Integer, value from 3 to 24 or more. Used to change number of variants
-of the random neural network structures. Value 3 will mean that only one
-random structure will be used. To avoid warnings make sure to set this value
+\item{num_nn_options}{Integer, value from 0 to 24 or more as multiple of 3.
+Used to change number of variants for 3 hidden layer structure.
+Random neural network structures will be generated.
+When value 0 is set then a fixed structure will be used as
+defined by parameter fixed_nn_struct.
+To avoid warnings make sure to set this value as
 multiple of 3. Higher values will increase computation time.}
 
+\item{fixed_nn_struct}{Integer vector with numeric elements, see par hidden in ?h2o.deeplearning,
+default value is c(100,100).
+Note this will only work if num_nn_options is 0}
+
+\item{num_epoch}{Integer, see parameter epochs in ?h2o.deeplearning, default value is 100
+Higher number may lead to long code execution}
+
 \item{is_cluster}{Boolean, set TRUE to use automatically clustered data}
 }
 \value{
 Function is writing file object with the model
 }
 \description{
-Function is training h2o deep learning model to match manually classified patterns of the financial
-indicator. Main idea is to be able to detect Market Type by solely relying on the current indicator pattern.
-This is in the attempt to evaluate current market type and to use proper trading strategy.
-Function will always try to gather mode data to update the model.
-
-Selected Market Periods according to the theory from Van K. Tharp:
+Function is training h2o deep learning model to match
+classified patterns of the financial indicator.
+Main idea is to be able to detect Market Type by solely relying on the
+current indicator pattern.
+This is in the attempt to evaluate current market type for trading purposes.
+
+Selected Market Periods could be manually classified
+according to the theory from Van K. Tharp:
 \enumerate{
 \item Bull normal, BUN
 \item Bull volatile, BUV
@@ -54,11 +70,16 @@ Selected Market Periods according to the theory from Van K. Tharp:
 \item Sideways volatile, RAV
 }
 
-\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}}
+For automatic classification, could only be used: BUN, BEN, RAN market types
+
+\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}}
 }
 \details{
-Function is using manually prepared dataset and tries several different random neural network structures.
-Once the best neural network is found then the better model is trained and stored.
+Function is using labeled dataset and tries several different random
+neural network structures. Once the best neural network is found then the
+better model is selected and stored. Dataset can be either manually labelled
+or generated using function mt_stat_transf.R. In the latter case parameter
+is_cluster shall be set to TRUE.
 }
 \examples{
 
@@ -82,17 +103,18 @@ Sys.sleep(5)
 h2o.init(nthreads = 2)
 
 
-# performing Deep Learning Classification using the custom function manually prepared data
+# performing Deep Learning Classification using manually labelled data
 mt_make_model(indicator_dataset = macd_ML60M,
               num_bars = 64,
               timeframe = 60,
               path_model = path_model,
               path_data = path_data,
               activate_balance = TRUE,
-              num_nn_options = 3)
+              num_nn_options = 3,
+              num_epoch = 10)
 
 data(price_dataset_big)
-data <- head(price_dataset_big, 500) #reduce computational time
+data <- head(price_dataset_big, 5000) #reduce computational time
 
 ai_class <- mt_stat_transf(indicator_dataset = data,
                       num_bars = 64,
@@ -107,10 +129,23 @@ mt_make_model(indicator_dataset = ai_class,
               path_model = path_model,
               path_data = path_data,
               activate_balance = TRUE,
-              num_nn_options = 3,
+              num_nn_options = 6,
+              num_epoch = 10,
               is_cluster = TRUE)
 
-
+# performing Deep Learning Classification using the custom function auto clustered data
+# and fixed nn structure
+mt_make_model(indicator_dataset = ai_class,
+              num_bars = 64,
+              timeframe = 60,
+              path_model = path_model,
+              path_data = path_data,
+              activate_balance = TRUE,
+              num_nn_options = 0,
+              fixed_nn_struct = c(10, 10),
+              num_epoch = 10,
+              is_cluster = TRUE)
+              
 # stop h2o engine
 h2o.shutdown(prompt = FALSE)
 
diff --git a/man/mt_stat_evaluate.Rd b/man/mt_stat_evaluate.Rd
index 18906b3..1044e90 100644
--- a/man/mt_stat_evaluate.Rd
+++ b/man/mt_stat_evaluate.Rd
@@ -63,6 +63,7 @@ mt_make_model(indicator_dataset = ai_class,
               path_data = path_data,
               activate_balance = TRUE,
               num_nn_options = 3,
+              num_epoch = 10,
               is_cluster = TRUE)
 
 
diff --git a/man/mt_stat_transf.Rd b/man/mt_stat_transf.Rd
index 9968676..ee714ee 100644
--- a/man/mt_stat_transf.Rd
+++ b/man/mt_stat_transf.Rd
@@ -54,7 +54,6 @@ automated way of doing such data classification
 }
 \examples{
 
-
 library(dplyr)
 library(stats)
 library(magrittr)