diff --git a/DESCRIPTION b/DESCRIPTION index 55271aaa9..32f1574ac 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -54,8 +54,8 @@ Suggests: FNN, formattable, future, - gbm, - glmnet, + gbm (>= 2.2.2), + glmnet (>= 4.1-6), gss, jsonlite, keras (>= 2.3.0), @@ -63,11 +63,11 @@ Suggests: knitr, ks, LiblineaR, - lightgbm (>= 4.4.0), + lightgbm (>= 4.5.0), lme4, locfit, logspline, - mboost, + mboost (>= 2.9-10), mda, mgcv, mlr3cluster, @@ -78,8 +78,8 @@ Suggests: nnet, np, param6, - partykit, - penalized, + partykit (>= 1.2-21), + penalized (>= 0.9-52), pendensity, plugdensity, pracma, @@ -87,8 +87,8 @@ Suggests: pseudo, randomForest, randomPlantedForest, - randomForestSRC, - ranger, + randomForestSRC (>= 3.3.0), + ranger (>= 0.16.0), remotes, reticulate (>= 1.16), rpart, @@ -101,10 +101,10 @@ Suggests: stats, survival, survivalmodels (>= 0.1.19), - survivalsvm, + survivalsvm (>= 0.0.5), tensorflow (>= 2.0.0), testthat, - xgboost + xgboost (>= 1.7.8.1) Remotes: binderh/CoxBoost, catboost/catboost/catboost/R-package, diff --git a/NEWS.md b/NEWS.md index 1066bdc15..f471e9c43 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,20 @@ # dev +* Add "Prediction types" doc section for all 30 survival learners + make sure it is consistent #347 +* All survival learners have `crank` as main prediction type (and it is always returned) #331 +* Added minimum working version for all survival learners in `DESCRIPTION` file +* Harmonized the use of times points for prediction as much as possible across survival learners #387 + * added `gridify_times()` function to coarse time points + * fixed `surv.parametric` and `surv.akritas` use of `ntime` argument +* `surv.parametric` is now used by default with `discrete = TRUE` (no survival learner returns now `distr6` vectorized distribution by default) +* Doc update for `mlr3` (version `0.21.0`) +* Fixed custom and initial values across all learners documentation pages +* Fixed doc examples that used `learner$importance()` +* Set `n_thread = 1` for `surv.aorsf` and use unique event time points for predicted `S(t)` +* Add `selected_features()` for `surv.penalized` +* Fix `surv.prioritylasso` learner + add `distr` predictions via Breslow #344 +* Survival SVM `gamma.mu` parameter was split to `gamma` and `mu` to enable easier tuning (`surv.svm` learner) + # mlr3extralearners 0.9.0 * Added response (i.e., survival time) prediction to `aorsf` learner diff --git a/R/helpers.R b/R/helpers.R index 04d03dcc7..9cf79d3ee 100644 --- a/R/helpers.R +++ b/R/helpers.R @@ -119,3 +119,15 @@ rename = function(x, old, new) { } x } + +# coerce given times points to an `ntime` grid is `ntime` is not NULL, +# otherwise just returns the sorted unique times points +gridify_times = function(times, ntime) { + times = sort(unique(times)) + if (!is.null(ntime)) { + indx = unique(round(seq.int(1, length(times), length.out = ntime))) + times = times[indx] + } + + times +} diff --git a/R/learner_BART_surv_bart.R b/R/learner_BART_surv_bart.R index 0318f6dc0..b37f80004 100644 --- a/R/learner_BART_surv_bart.R +++ b/R/learner_BART_surv_bart.R @@ -6,11 +6,12 @@ #' Fits a Bayesian Additive Regression Trees (BART) learner to right-censored #' survival data. Calls [BART::mc.surv.bart()] from \CRANpkg{BART}. #' -#' @details -#' Two types of prediction are returned for this learner: +#' @section Prediction types: +#' This learner returns two prediction types: #' 1. `distr`: a 3d survival array with observations as 1st dimension, time #' points as 2nd and the posterior draws as 3rd dimension. -#' 2. `crank`: the expected mortality using [mlr3proba::.surv_return]. The parameter +#' Calculated using the internal `predict.survbart()` function. +#' 2. `crank`: the expected mortality using [mlr3proba::.surv_return()]. The parameter #' `which.curve` decides which posterior draw (3rd dimension) will be used for the #' calculation of the expected mortality. Note that the median posterior is #' by default used for the calculation of survival measures that require a `distr` diff --git a/R/learner_CoxBoost_surv_coxboost.R b/R/learner_CoxBoost_surv_coxboost.R index 2405f6b30..76cb4b7dc 100644 --- a/R/learner_CoxBoost_surv_coxboost.R +++ b/R/learner_CoxBoost_surv_coxboost.R @@ -6,6 +6,15 @@ #' Fit a Survival Cox model with a likelihood based boosting algorithm. #' Calls [CoxBoost::CoxBoost()] from package 'CoxBoost'. #' +#' @section Prediction types: +#' This learner returns three prediction types, using the internal `predict.CoxBoost()` function: +#' 1. `lp`: a vector containing the linear predictors (relative risk scores), +#' where each score corresponds to a specific test observation. +#' 2. `crank`: same as `lp`. +#' 3. `distr`: a 2d survival matrix, with observations as rows and time points +#' as columns. The internal transformation uses the Breslow estimator to compute +#' the baseline hazard and compose the survival distributions from the `lp` predictions. +#' #' @template learner #' @templateVar id surv.coxboost #' @@ -18,15 +27,6 @@ #' multiple hyperparameters, \CRANpkg{mlr3tuning} and [LearnerSurvCoxboost] will likely give better #' results. #' -#' Three prediction types are returned for this learner, using the internal -#' `predict.CoxBoost()` function: -#' 1. `lp`: a vector of linear predictors (relative risk scores), one per -#' observation. -#' 2. `crank`: same as `lp`. -#' 3. `distr`: a 2d survival matrix, with observations as rows and time points -#' as columns. The internal transformation uses the Breslow estimator to compose -#' the survival distributions from the `lp` predictions. -#' #' @references #' `r format_bib("binder2009boosting")` #' @@ -60,7 +60,7 @@ LearnerSurvCoxboost = R6Class("LearnerSurvCoxboost", id = "surv.coxboost", packages = c("mlr3extralearners", "CoxBoost", "pracma"), feature_types = c("integer", "numeric"), - predict_types = c("distr", "crank", "lp"), + predict_types = c("crank", "lp", "distr"), param_set = ps, properties = c("weights", "selected_features"), man = "mlr3extralearners::mlr_learners_surv.coxboost", @@ -126,16 +126,16 @@ LearnerSurvCoxboost = R6Class("LearnerSurvCoxboost", .args = pars, type = "lp")) + # all the unique training time points + times = sort(unique(self$model$time)) surv = invoke(predict, self$model, newdata = newdata, .args = pars, type = "risk", - times = sort(unique(self$model$time))) + times = times) - mlr3proba::.surv_return(times = sort(unique(self$model$time)), - surv = surv, - lp = lp) + mlr3proba::.surv_return(times = times, surv = surv, lp = lp) } ) ) diff --git a/R/learner_CoxBoost_surv_cv_coxboost.R b/R/learner_CoxBoost_surv_cv_coxboost.R index 8e5a869c8..0edee3773 100644 --- a/R/learner_CoxBoost_surv_cv_coxboost.R +++ b/R/learner_CoxBoost_surv_cv_coxboost.R @@ -3,10 +3,11 @@ #' @name mlr_learners_surv.cv_coxboost #' #' @description -#' Fits a survival Cox model using likelihood based boosting and interal cross-validation for the +#' Fits a survival Cox model using likelihood based boosting and internal cross-validation for the #' number of steps. #' Calls [CoxBoost::CoxBoost()] or [CoxBoost::cv.CoxBoost()] from package 'CoxBoost'. #' +#' @inheritSection mlr_learners_surv.coxboost Prediction types #' @template learner #' @templateVar id surv.cv_coxboost #' @@ -22,15 +23,6 @@ #' If `penalty == "optimCoxBoostPenalty"` then [CoxBoost::optimCoxBoostPenalty] is used to determine #' the penalty value to be used in [CoxBoost::cv.CoxBoost]. #' -#' Three prediction types are returned for this learner, using the internal -#' `predict.CoxBoost()` function: -#' 1. `lp`: a vector of linear predictors (relative risk scores), one per -#' observation. -#' 2. `crank`: same as `lp`. -#' 2. `distr`: a 2d survival matrix, with observations as rows and time points -#' as columns. The internal transformation uses the Breslow estimator to compose -#' the survival distributions from the `lp` predictions. -#' #' @references #' `r format_bib("binder2009boosting")` #' @@ -77,7 +69,7 @@ LearnerSurvCVCoxboost = R6Class("LearnerSurvCVCoxboost", id = "surv.cv_coxboost", packages = c("mlr3extralearners", "CoxBoost", "pracma"), feature_types = c("integer", "numeric"), - predict_types = c("distr", "crank", "lp"), + predict_types = c("crank", "lp", "distr"), param_set = ps, properties = c("weights", "selected_features"), man = "mlr3extralearners::mlr_learners_surv.cv_coxboost", @@ -189,16 +181,16 @@ LearnerSurvCVCoxboost = R6Class("LearnerSurvCVCoxboost", .args = pars, type = "lp")) + # all the unique training time points + times = sort(unique(self$model$time)) surv = invoke(predict, self$model, newdata = newdata, .args = pars, type = "risk", - times = sort(unique(self$model$time))) + times = times) - mlr3proba::.surv_return(times = sort(unique(self$model$time)), - surv = surv, - lp = lp) + mlr3proba::.surv_return(times = times, surv = surv, lp = lp) } ) ) diff --git a/R/learner_RWeka_classif_LMT.R b/R/learner_RWeka_classif_LMT.R index 42ec26cff..9a34624e2 100644 --- a/R/learner_RWeka_classif_LMT.R +++ b/R/learner_RWeka_classif_LMT.R @@ -9,7 +9,7 @@ #' @template learner #' @templateVar id classif.LMT #' -#' @section CUstom mlr3 parameters: +#' @section Custom mlr3 parameters: #' - `output_debug_info`: #' - original id: output-debug-info #' diff --git a/R/learner_abess_classif_abess.R b/R/learner_abess_classif_abess.R index bda8c307e..a1cdd5195 100644 --- a/R/learner_abess_classif_abess.R +++ b/R/learner_abess_classif_abess.R @@ -10,13 +10,10 @@ #' @template learner #' #' @section Initial parameter values: -#' * `num.threads`: This parameter is initialized to 1 (default is 0) to avoid conflicts with the mlr3 parallelization. -#' -#' @section Custom mlr3 parameters: -#' * `family` - Depending on the task type, if the parameter `family` is `NULL`, it is set to `"binomial"` for binary +#' - `num.threads`: This parameter is initialized to 1 (default is 0) to avoid conflicts with the mlr3 parallelization. +#' - `family`: Depends on the task type, if the parameter `family` is `NULL`, it is set to `"binomial"` for binary #' classification tasks and to `"multinomial"` for multiclass classification problems. #' -#' #' @template seealso_learner #' @template example #' @export diff --git a/R/learner_aorsf_surv_aorsf.R b/R/learner_aorsf_surv_aorsf.R index b123616c5..69628508f 100644 --- a/R/learner_aorsf_surv_aorsf.R +++ b/R/learner_aorsf_surv_aorsf.R @@ -9,13 +9,17 @@ #' principle deal with missing values, the behaviour has to be configured using #' the parameter `na_action`. #' -#' @details +#' @section Initial parameter values: +#' * `n_thread`: This parameter is initialized to 1 (default is 0) to avoid conflicts with the mlr3 parallelization. +#' +#' @section Prediction types: #' This learner returns three prediction types: #' 1. `distr`: a survival matrix in two dimensions, where observations are #' represented in rows and (unique event) time points in columns. +#' Calculated using the internal `predict.ObliqueForest()` function. #' 2. `response`: the restricted mean survival time of each test observation, #' derived from the survival matrix prediction (`distr`). -#' 3. `crank`: the expected mortality using [mlr3proba::.surv_return]. +#' 3. `crank`: the expected mortality using [mlr3proba::.surv_return()]. #' #' @template learner #' @templateVar id surv.aorsf @@ -27,10 +31,7 @@ #' Note that `mtry` and `mtry_ratio` are mutually exclusive. #' #' @references -#' `r format_bib("jaeger_2019")` -#' -#' `r format_bib("jaeger_2022")` -#' +#' `r format_bib("jaeger_2019", "jaeger_2022")` #' #' @template seealso_learner #' @template example @@ -45,7 +46,7 @@ LearnerSurvAorsf = R6Class("LearnerSurvAorsf", n_tree = p_int(default = 500L, lower = 1L, tags = "train"), n_split = p_int(default = 5L, lower = 1L, tags = "train"), n_retry = p_int(default = 3L, lower = 0L, tags = "train"), - n_thread = p_int(default = 0, lower = 0, tags = c("train", "predict")), + n_thread = p_int(default = 0, lower = 0, tags = c("train", "predict", "threads")), pred_aggregate = p_lgl(default = TRUE, tags = "predict"), pred_simplify = p_lgl(default = FALSE, tags = "predict"), oobag = p_lgl(default = FALSE, tags = 'predict'), @@ -81,6 +82,8 @@ LearnerSurvAorsf = R6Class("LearnerSurvAorsf", verbose_progress = p_lgl(default = FALSE, tags = "train"), na_action = p_fct(levels = c("fail", "omit", "impute_meanmode"), default = "fail", tags = "train")) + ps$values = list(n_thread = 1) + super$initialize( id = "surv.aorsf", packages = c("mlr3extralearners", "aorsf", "pracma"), @@ -177,7 +180,8 @@ LearnerSurvAorsf = R6Class("LearnerSurvAorsf", }, .predict = function(task) { pv = self$param_set$get_values(tags = "predict") - utime = task$unique_event_times() # increasing by default + # estimate S(t) on the unique event times from the train set + utime = self$model$event_times surv = mlr3misc::invoke(predict, self$model, new_data = ordered_features(task, self), diff --git a/R/learner_dbarts_regr_bart.R b/R/learner_dbarts_regr_bart.R index 748be7083..0f93d3ef0 100644 --- a/R/learner_dbarts_regr_bart.R +++ b/R/learner_dbarts_regr_bart.R @@ -10,11 +10,11 @@ #' @templateVar id regr.bart #' #' @section Custom mlr3 parameters: -#' * Parameter: offset +#' * Parameter: `offset` #' * The parameter is removed, because only `dbarts::bart2` allows an offset during training, #' and therefore the offset parameter in `dbarts:::predict.bart` is irrelevant for #' `dbarts::dbart`. -#' * Parameter: nchain, combineChains, combinechains +#' * Parameter: `nchain`, `combineChains`, `combinechains` #' * The parameters are removed as parallelization of multiple models is handled by future. #' #' @section Initial parameter values: diff --git a/R/learner_flexsurv_surv_flexible.R b/R/learner_flexsurv_surv_flexible.R index 2d40dc843..87a9ffc79 100644 --- a/R/learner_flexsurv_surv_flexible.R +++ b/R/learner_flexsurv_surv_flexible.R @@ -9,14 +9,14 @@ #' @template learner #' @templateVar id surv.flexible #' -#' @details -#' This learner returns two prediction types: -#' 1. `lp`: a vector of linear predictors (relative risk scores), for each test -#' observation. +#' @section Prediction types: +#' This learner returns three prediction types: +#' 1. `lp`: a vector containing the linear predictors (relative risk scores), +#' where each score corresponds to a specific test observation. #' Calculated using [flexsurv::flexsurvspline()] and the estimated coefficients. -#' For fitted coefficients, \eqn{\beta = (\beta_0,...,\beta_P)}, -#' and covariates \eqn{X^T = (X_0,...,X_P)^T}, where \eqn{X_0}{X0} -#' is a column of \eqn{1}s, the linear predictor (`lp`) is \eqn{lp = \beta X}. +#' For fitted coefficients, \eqn{\hat{\beta} = (\hat{\beta_0},...,\hat{\beta_P})}, +#' and the test data covariates \eqn{X^T = (X_0,...,X_P)^T}, where \eqn{X_0}{X0} +#' is a column of \eqn{1}s, the linear predictor vector is \eqn{lp = \hat{\beta} X^T}. #' 2. `distr`: a survival matrix in two dimensions, where observations are #' represented in rows and time points in columns. #' Calculated using `predict.flexsurvreg()`. @@ -111,8 +111,8 @@ predict_flexsurvreg = function(object, task, learner, ...) { } X = stats::model.matrix(formulate(rhs = task$feature_names), - data = newdata, - xlev = task$levels()) + data = newdata, + xlev = task$levels()) # collect the auxiliary arguments for the fitted object args = object$aux diff --git a/R/learner_gbm_surv_gbm.R b/R/learner_gbm_surv_gbm.R index 62109438b..ff200476b 100644 --- a/R/learner_gbm_surv_gbm.R +++ b/R/learner_gbm_surv_gbm.R @@ -6,13 +6,19 @@ #' Gradient Boosting for Survival Analysis. #' Calls [gbm::gbm()] from \CRANpkg{gbm}. #' +#' @section Prediction types: +#' This learner returns two prediction types, using the internal `predict.gbm()` function: +#' 1. `lp`: a vector containing the linear predictors (relative risk scores), +#' where each score corresponds to a specific test observation. +#' 2. `crank`: same as `lp`. +#' #' @templateVar id surv.gbm #' @template learner #' -#' @section Parameter changes: +#' @section Initial parameter values: #' - `distribution`: -#' - Actual default: "bernoulli" -#' - Adjusted default: "coxph" +#' - Actual default: `"bernoulli"` +#' - Adjusted default: `"coxph"` #' - Reason for change: This is the only distribution available for survival. #' - `keep.data`: #' - Actual default: TRUE @@ -20,9 +26,9 @@ #' - Reason for change: `keep.data = FALSE` saves memory during model fitting. #' - `n.cores`: #' - Actual default: NULL -#' - Adjusted default: 1 +#' - Adjusted default: `1` #' - Reason for change: Suppressing the automatic internal parallelization if -#' `cv.folds` > 0. +#' `cv.folds` > 0 and avoid threading conflicts with \CRANpkg{future}. #' #' @references #' `r format_bib("friedman2002stochastic")` @@ -82,9 +88,7 @@ LearnerSurvGBM = R6Class("LearnerSurvGBM", private = list( .train = function(task) { - # hacky formula construction as gbm fails when "type" argument specified in Surv() - tn = task$target_names lhs = sprintf("Surv(%s, %s)", tn[1L], tn[2L]) f = formulate(lhs, task$feature_names, env = getNamespace("survival")) diff --git a/R/learner_glmnet_surv_cv_glmnet.R b/R/learner_glmnet_surv_cv_glmnet.R index ba087703c..189d58370 100644 --- a/R/learner_glmnet_surv_cv_glmnet.R +++ b/R/learner_glmnet_surv_cv_glmnet.R @@ -6,20 +6,21 @@ #' Generalized linear models with elastic net regularization. #' Calls [glmnet::cv.glmnet()] from package \CRANpkg{glmnet}. #' -#' @section Custom mlr3 parameters: +#' @section Initial parameter values: #' - `family` is set to `"cox"` and cannot be changed. #' -#' @details -#' This learner returns two prediction types: -#' 1. `lp`: a vector of linear predictors (relative risk scores), one per -#' observation. +#' @section Prediction types: +#' This learner returns three prediction types: +#' 1. `lp`: a vector containing the linear predictors (relative risk scores), +#' where each score corresponds to a specific test observation. #' Calculated using [glmnet::predict.cv.glmnet()]. -#' 2. `distr`: a survival matrix in two dimensions, where observations are +#' 2. `crank`: same as `lp`. +#' 3. `distr`: a survival matrix in two dimensions, where observations are #' represented in rows and time points in columns. #' Calculated using [glmnet::survfit.cv.glmnet()]. #' Parameters `stype` and `ctype` relate to how `lp` predictions are transformed #' into survival predictions and are described in [survival::survfit.coxph()]. -#' By default the Breslow estimator is used. +#' By default the Breslow estimator is used for computing the baseline hazard. #' #' @templateVar id surv.cv_glmnet #' @template learner diff --git a/R/learner_glmnet_surv_glmnet.R b/R/learner_glmnet_surv_glmnet.R index 115e686d7..6a9d521f1 100644 --- a/R/learner_glmnet_surv_glmnet.R +++ b/R/learner_glmnet_surv_glmnet.R @@ -6,20 +6,21 @@ #' Generalized linear models with elastic net regularization. #' Calls [glmnet::glmnet()] from package \CRANpkg{glmnet}. #' -#' @section Custom mlr3 parameters: +#' @section Initial parameter values: #' - `family` is set to `"cox"` and cannot be changed. #' -#' @details -#' This learner returns two prediction types: -#' 1. `lp`: a vector of linear predictors (relative risk scores), one per -#' observation. +#' @section Prediction types: +#' This learner returns three prediction types: +#' 1. `lp`: a vector containing the linear predictors (relative risk scores), +#' where each score corresponds to a specific test observation. #' Calculated using [glmnet::predict.coxnet()]. -#' 2. `distr`: a survival matrix in two dimensions, where observations are +#' 2. `crank`: same as `lp`. +#' 3. `distr`: a survival matrix in two dimensions, where observations are #' represented in rows and time points in columns. #' Calculated using [glmnet::survfit.coxnet()]. #' Parameters `stype` and `ctype` relate to how `lp` predictions are transformed #' into survival predictions and are described in [survival::survfit.coxph()]. -#' By default the Breslow estimator is used. +#' By default the Breslow estimator is used for computing the baseline hazard. #' #' **Caution**: This learner is different to learners calling [glmnet::cv.glmnet()] #' in that it does not use the internal optimization of parameter `lambda`. diff --git a/R/learner_lightgbm_classif_lightgbm.R b/R/learner_lightgbm_classif_lightgbm.R index f9da073bf..79f9de0f9 100644 --- a/R/learner_lightgbm_classif_lightgbm.R +++ b/R/learner_lightgbm_classif_lightgbm.R @@ -20,10 +20,9 @@ #' * Actual default: 1L #' * Initial value: -1L #' * Reason for change: Prevents accidental conflicts with mlr messaging system. -#' -#' @section Custom mlr3 defaults: #' * `objective`: -#' Depending if the task is binary / multiclass, the default is `"binary"` or `"multiclasss"`. +#' * Depends on the task: if binary classification, then this parameter is set to +#' `"binary"`, otherwise `"multiclasss"` and cannot be changed. #' #' @section Custom mlr3 parameters: #' * `num_class`: @@ -91,6 +90,7 @@ LearnerClassifLightGBM = R6Class("LearnerClassifLightGBM", neg_bagging_fraction = p_dbl(default = 1.0, lower = 0.0, upper = 1.0, tags = "train"), bagging_freq = p_int(default = 0L, lower = 0L, tags = "train"), bagging_seed = p_int(default = 3L, tags = "train"), + bagging_by_query = p_lgl(default = FALSE, tags = "train"), feature_fraction = p_dbl(default = 1.0, lower = 0.0, upper = 1.0, tags = "train"), feature_fraction_bynode = p_dbl(default = 1.0, lower = 0.0, upper = 1.0, tags = "train"), feature_fraction_seed = p_int(default = 2L, tags = "train"), diff --git a/R/learner_lightgbm_regr_lightgbm.R b/R/learner_lightgbm_regr_lightgbm.R index 9872ca1ee..4b8fb8e73 100644 --- a/R/learner_lightgbm_regr_lightgbm.R +++ b/R/learner_lightgbm_regr_lightgbm.R @@ -78,6 +78,7 @@ LearnerRegrLightGBM = R6Class("LearnerRegrLightGBM", bagging_fraction = p_dbl(default = 1.0, lower = 0.0, upper = 1.0, tags = "train"), bagging_freq = p_int(default = 0L, lower = 0L, tags = "train"), bagging_seed = p_int(default = 3L, tags = "train"), + bagging_by_query = p_lgl(default = FALSE, tags = "train"), feature_fraction = p_dbl(default = 1.0, lower = 0.0, upper = 1.0, tags = "train"), feature_fraction_bynode = p_dbl(default = 1.0, lower = 0.0, upper = 1.0, tags = "train"), feature_fraction_seed = p_int(default = 2L, tags = "train"), diff --git a/R/learner_mboost_regr_gamboost.R b/R/learner_mboost_regr_gamboost.R index 2e82153ed..629dc3786 100644 --- a/R/learner_mboost_regr_gamboost.R +++ b/R/learner_mboost_regr_gamboost.R @@ -15,8 +15,9 @@ #' @export #' @template seealso_learner #' @examples +#' # Define the Learner #' learner = lrn("regr.gamboost", baselearner = "bols") -#' learner +#' print(learner) LearnerRegrGAMBoost = R6Class("LearnerRegrGAMBoost", inherit = LearnerRegr, public = list( diff --git a/R/learner_mboost_surv_blackboost.R b/R/learner_mboost_surv_blackboost.R index 00b3f08d9..be013722e 100644 --- a/R/learner_mboost_surv_blackboost.R +++ b/R/learner_mboost_surv_blackboost.R @@ -6,6 +6,20 @@ #' Gradient boosting with regression trees for survival analysis. #' Calls [mboost::blackboost()] from \CRANpkg{mboost}. #' +#' @section Prediction types: +#' This learner returns two to three prediction types: +#' 1. `lp`: a vector containing the linear predictors (relative risk scores), +#' where each score corresponds to a specific test observation. +#' Calculated using [mboost::predict.blackboost()]. +#' If the `family` parameter is not `"coxph"`, `-lp` is returned, since non-coxph +#' families represent AFT-style distributions where lower `lp` values indicate higher risk. +#' 2. `crank`: same as `lp`. +#' 3. `distr`: a survival matrix in two dimensions, where observations are +#' represented in rows and time points in columns. +#' Calculated using [mboost::survFit()]. +#' This prediction type is present only when the `family` distribution parameter +#' is equal to `"coxph"` (default). +#' By default the Breslow estimator is used for computing the baseline hazard. #' #' @template learner #' @templateVar id surv.blackboost @@ -87,7 +101,7 @@ LearnerSurvBlackBoost = R6Class("LearnerSurvBlackBoost", id = "surv.blackboost", param_set = ps, feature_types = c("integer", "numeric", "factor"), - predict_types = c("distr", "crank", "lp"), + predict_types = c("crank", "lp", "distr"), properties = "weights", packages = c("mlr3extralearners", "mboost", "pracma"), man = "mlr3extralearners::mlr_learners_surv.blackboost", @@ -171,29 +185,22 @@ LearnerSurvBlackBoost = R6Class("LearnerSurvBlackBoost", pars = self$param_set$get_values(tags = "predict") newdata = ordered_features(task, self) # predict linear predictor - lp = as.numeric(invoke(predict, self$model, newdata = newdata, type = "link", - .args = pars - )) + lp = as.numeric( + invoke(predict, + self$model, + newdata = newdata, + type = "link", + .args = pars) + ) # predict survival if (is.null(self$param_set$values$family) || self$param_set$values$family == "coxph") { + # uses Breslow estimator internally survfit = invoke(mboost::survFit, self$model, newdata = newdata) - - mlr3proba::.surv_return(times = survfit$time, - surv = t(survfit$surv), - lp = lp) + mlr3proba::.surv_return(times = survfit$time, surv = t(survfit$surv), lp = lp) } else { mlr3proba::.surv_return(lp = -lp) } - - - # FIXME - RE-ADD ONCE INTERPRETATION IS CLEAR - # response = NULL - # if (!is.null(self$param_set$values$family)) { - # if (self$param_set$values$family %in% c("weibull", "loglog", "lognormal", "gehan")) { - # response = exp(lp) - # } - # } } ) ) diff --git a/R/learner_mboost_surv_gamboost.R b/R/learner_mboost_surv_gamboost.R index 5bac8de86..bf76820a5 100644 --- a/R/learner_mboost_surv_gamboost.R +++ b/R/learner_mboost_surv_gamboost.R @@ -9,15 +9,26 @@ #' @template learner #' @templateVar id surv.gamboost #' -#' @details -#' `distr` prediction made by [mboost::survFit()]. +#' @section Prediction types: +#' This learner returns two to three prediction types: +#' 1. `lp`: a vector containing the linear predictors (relative risk scores), +#' where each score corresponds to a specific test observation. +#' Calculated using [mboost::predict.gamboost()]. +#' If the `family` parameter is not `"coxph"`, `-lp` is returned, since non-coxph +#' families represent AFT-style distributions where lower `lp` values indicate higher risk. +#' 2. `crank`: same as `lp`. +#' 3. `distr`: a survival matrix in two dimensions, where observations are +#' represented in rows and time points in columns. +#' Calculated using [mboost::survFit()]. +#' This prediction type is present only when the `family` distribution parameter +#' is equal to `"coxph"` (default). +#' By default the Breslow estimator is used for computing the baseline hazard. #' #' @references #' `r format_bib("buhlmann2003boosting")` #' #' @export #' @template seealso_learner -#' @examples #' @template simple_example LearnerSurvGAMBoost = R6Class("LearnerSurvGAMBoost", inherit = mlr3proba::LearnerSurv, @@ -58,7 +69,7 @@ LearnerSurvGAMBoost = R6Class("LearnerSurvGAMBoost", id = "surv.gamboost", param_set = ps, feature_types = c("integer", "numeric", "factor", "logical"), - predict_types = c("distr", "crank", "lp"), + predict_types = c("crank", "lp", "distr"), properties = c("weights", "importance", "selected_features"), packages = c("mlr3extralearners", "mboost", "pracma"), man = "mlr3extralearners::mlr_learners_surv.gamboost", @@ -139,7 +150,6 @@ LearnerSurvGAMBoost = R6Class("LearnerSurvGAMBoost", pars = pars[!(names(pars) %in% formalArgs(mboost::Cindex))] pars = pars[!(names(pars) %in% c("family", "custom.family"))] - with_package("mboost", { invoke(mboost::gamboost, formula = task$formula(task$feature_names), @@ -151,30 +161,24 @@ LearnerSurvGAMBoost = R6Class("LearnerSurvGAMBoost", pars = self$param_set$get_values(tags = "predict") newdata = ordered_features(task, self) # predict linear predictor - lp = as.numeric(invoke(predict, self$model, newdata = newdata, type = "link", - .args = pars - )) + lp = as.numeric( + invoke(predict, + self$model, + newdata = newdata, + type = "link", + .args = pars) + ) # predict survival if (is.null(self$param_set$values$family) || self$param_set$values$family == "coxph") { + # uses Breslow estimator internally survfit = invoke(mboost::survFit, self$model, newdata = newdata) - - mlr3proba::.surv_return(times = survfit$time, - surv = t(survfit$surv), - lp = lp) + mlr3proba::.surv_return(times = survfit$time, surv = t(survfit$surv), lp = lp) } else { mlr3proba::.surv_return(lp = -lp) } - - - # FIXME - RE-ADD ONCE INTERPRETATION IS CLEAR - # response = NULL - # if (!is.null(self$param_set$values$family)) { - # if (self$param_set$values$family %in% c("weibull", "loglog", "lognormal", "gehan")) { - # response = exp(lp) - # } - # } } ) ) + .extralrns_dict$add("surv.gamboost", LearnerSurvGAMBoost) diff --git a/R/learner_mboost_surv_glmboost.R b/R/learner_mboost_surv_glmboost.R index 102ac302d..8bf86070b 100644 --- a/R/learner_mboost_surv_glmboost.R +++ b/R/learner_mboost_surv_glmboost.R @@ -9,18 +9,20 @@ #' @template learner #' @templateVar id surv.glmboost #' -#' @details -#' This learner returns up to three prediction types: -#' 1. `crank`: same as `lp`. -#' 2. `lp`: a vector of linear predictors (relative risk scores), one per -#' observation. +#' @section Prediction types: +#' This learner returns two to three prediction types: +#' 1. `lp`: a vector containing the linear predictors (relative risk scores), +#' where each score corresponds to a specific test observation. #' Calculated using [mboost::predict.glmboost()]. -#' 3. `distr`: a survival matrix in two dimensions, where rows are observations -#' and columns are the time points. -#' This predict type is returned only when the `family` parameter is set to -#' `"coxph"` (which is the default). -#' Calculated using [mboost::survFit()] which uses the Breslow estimator for the -#' baseline hazard function. +#' If the `family` parameter is not `"coxph"`, `-lp` is returned, since non-coxph +#' families represent AFT-style distributions where lower `lp` values indicate higher risk. +#' 2. `crank`: same as `lp`. +#' 3. `distr`: a survival matrix in two dimensions, where observations are +#' represented in rows and time points in columns. +#' Calculated using [mboost::survFit()]. +#' This prediction type is present only when the `family` distribution parameter +#' is equal to `"coxph"` (default). +#' By default the Breslow estimator is used for computing the baseline hazard. #' #' @references #' `r format_bib("buhlmann2003boosting")` @@ -209,9 +211,12 @@ LearnerSurvGLMBoost = R6Class("LearnerSurvGLMBoost", # predict linear predictor pars = self$param_set$get_values(tags = "predict") lp = as.numeric( - invoke(predict, self$model, newdata = newdata, type = "link", - .args = pars - )) + invoke(predict, + self$model, + newdata = newdata, + type = "link", + .args = pars) + ) # predict survival if (is.null(self$param_set$values$family) || self$param_set$values$family == "coxph") { diff --git a/R/learner_mboost_surv_mboost.R b/R/learner_mboost_surv_mboost.R index 73d5b92b9..a89d525d1 100644 --- a/R/learner_mboost_surv_mboost.R +++ b/R/learner_mboost_surv_mboost.R @@ -6,6 +6,21 @@ #' Model-based boosting for survival analysis. #' Calls [mboost::mboost()] from \CRANpkg{mboost}. #' +#' @section Prediction types: +#' This learner returns two to three prediction types: +#' 1. `lp`: a vector containing the linear predictors (relative risk scores), +#' where each score corresponds to a specific test observation. +#' Calculated using [mboost::predict.mboost()]. +#' If the `family` parameter is not `"coxph"`, `-lp` is returned, since non-coxph +#' families represent AFT-style distributions where lower `lp` values indicate higher risk. +#' 2. `crank`: same as `lp`. +#' 3. `distr`: a survival matrix in two dimensions, where observations are +#' represented in rows and time points in columns. +#' Calculated using [mboost::survFit()]. +#' This prediction type is present only when the `family` distribution parameter +#' is equal to `"coxph"` (default). +#' By default the Breslow estimator is used for computing the baseline hazard. +#' #' @template learner #' @templateVar id surv.mboost #' @@ -57,7 +72,7 @@ LearnerSurvMBoost = R6Class("LearnerSurvMBoost", id = "surv.mboost", param_set = ps, feature_types = c("integer", "numeric", "factor", "logical"), - predict_types = c("distr", "crank", "lp"), + predict_types = c("crank", "lp", "distr"), properties = c("weights", "importance", "selected_features"), packages = c("mlr3extralearners", "mboost"), man = "mlr3extralearners::mlr_learners_surv.mboost", @@ -145,30 +160,18 @@ LearnerSurvMBoost = R6Class("LearnerSurvMBoost", }, .predict = function(task) { - newdata = ordered_features(task, self) # predict linear predictor lp = as.numeric(invoke(predict, self$model, newdata = newdata, type = "link")) # predict survival if (is.null(self$param_set$values$family) || self$param_set$values$family == "coxph") { + # uses Breslow estimator internally survfit = invoke(mboost::survFit, self$model, newdata = newdata) - - mlr3proba::.surv_return(times = survfit$time, - surv = t(survfit$surv), - lp = lp) + mlr3proba::.surv_return(times = survfit$time, surv = t(survfit$surv), lp = lp) } else { mlr3proba::.surv_return(lp = -lp) } - - - # FIXME - RE-ADD ONCE INTERPRETATION IS CLEAR - # response = NULL - # if (!is.null(self$param_set$values$family)) { - # if (self$param_set$values$family %in% c("weibull", "loglog", "lognormal", "gehan")) { - # response = exp(lp) - # } - # } } ) ) diff --git a/R/learner_mgcv_classif_gam.R b/R/learner_mgcv_classif_gam.R index fb602ea61..a2919b593 100644 --- a/R/learner_mgcv_classif_gam.R +++ b/R/learner_mgcv_classif_gam.R @@ -22,13 +22,13 @@ #' @references #' `r format_bib("hastie2017generalized", "wood2012mgcv")` #' -#' @examples +#' @examplesIf requireNamespace("mgcv", quietly = TRUE) #' # simple example #' t = mlr3::tsk("spam")$filter(1:1000) #' l = mlr3::lrn("classif.gam") #' l$param_set$set_values( -#' formula = type ~ s(george, k = 3) + s(charDollar, k = 3) + s(edu) -#' ') +#' formula = type ~ s(george, k = 3) + s(charDollar, k = 3) + s(edu) +#' ) #' l$train(t) #' l$model #' @export diff --git a/R/learner_mgcv_regr_gam.R b/R/learner_mgcv_regr_gam.R index 182309ee4..bc6b8b9cb 100644 --- a/R/learner_mgcv_regr_gam.R +++ b/R/learner_mgcv_regr_gam.R @@ -19,8 +19,7 @@ #' @references #' `r format_bib("hastie2017generalized", "wood2012mgcv")` #' -#' @examples -#' +#' @examplesIf requireNamespace("mgcv", quietly = TRUE) #' # simple example #' t = mlr3::tsk("mtcars") #' l = mlr3::lrn("regr.gam") diff --git a/R/learner_partykit_classif_mob.R b/R/learner_partykit_classif_mob.R index 9b09e2ebe..7a52138be 100644 --- a/R/learner_partykit_classif_mob.R +++ b/R/learner_partykit_classif_mob.R @@ -14,7 +14,7 @@ #' #' @export #' @template seealso_learner -#' @examples +#' @examplesIf requireNamespace("partykit", quietly = TRUE) #' library(mlr3) #' logit_ = function(y, x, start = NULL, weights = NULL, offset = NULL, ...) { #' glm(y ~ 1, family = binomial, start = start, ...) diff --git a/R/learner_partykit_regr_mob.R b/R/learner_partykit_regr_mob.R index 7a48f482b..7d5a1c55c 100644 --- a/R/learner_partykit_regr_mob.R +++ b/R/learner_partykit_regr_mob.R @@ -14,7 +14,7 @@ #' #' @export #' @template seealso_learner -#' @examples +#' @examplesIf requireNamespace("partykit", quietly = TRUE) #' library(mlr3) #' lm_ = function(y, x, start = NULL, weights = NULL, offset = NULL, ...) { #' lm(y ~ 1, ...) diff --git a/R/learner_partykit_surv_cforest.R b/R/learner_partykit_surv_cforest.R index 30bdcffd6..933ef309e 100644 --- a/R/learner_partykit_surv_cforest.R +++ b/R/learner_partykit_surv_cforest.R @@ -6,11 +6,22 @@ #' A random forest based on conditional inference trees ([ctree][partykit::ctree]). #' Calls [partykit::cforest()] from \CRANpkg{partykit}. #' +#' @section Prediction types: +#' This learner returns two prediction types: +#' 1. `distr`: a survival matrix in two dimensions, where observations are +#' represented in rows and time points in columns. +#' Calculated using the internal [partykit::predict.cforest()] function. +#' 2. `crank`: the expected mortality using [mlr3proba::.surv_return()]. +#' #' @template learner #' @templateVar id surv.cforest #' #' @inheritSection mlr_learners_classif.cforest Custom mlr3 parameters #' +#' @section Initial parameter values: +#' * `cores`: This parameter is initialized to 1 (default is `NULL`) to avoid +#' threading conflicts with \CRANpkg{future}. +#' #' @references #' `r format_bib("hothorn_2015", "hothorn_2006")` #' @@ -31,14 +42,12 @@ LearnerSurvCForest = R6Class("LearnerSurvCForest", ps = ps( ntree = p_int(default = 500L, lower = 1L, tags = "train"), replace = p_lgl(default = FALSE, tags = "train"), - fraction = p_dbl(default = 0.632, lower = 0, upper = 1, - tags = "train"), - mtry = p_int(lower = 0L, special_vals = list(Inf), - tags = "train"), # default actually "ceiling(sqrt(nvar))" + fraction = p_dbl(default = 0.632, lower = 0, upper = 1, tags = "train"), + # mtry default is actually `ceiling(sqrt(nvar))` + mtry = p_int(lower = 0L, special_vals = list(Inf), tags = "train"), mtryratio = p_dbl(lower = 0, upper = 1, tags = "train"), applyfun = p_uty(tags = c("train", "importance")), - cores = p_int(default = NULL, special_vals = list(NULL), - tags = c("train", "importance", "threads")), + cores = p_int(default = NULL, special_vals = list(NULL), tags = c("train", "importance", "threads")), trace = p_lgl(default = FALSE, tags = "train"), offset = p_uty(tags = "train"), cluster = p_uty(tags = "train"), @@ -46,33 +55,28 @@ LearnerSurvCForest = R6Class("LearnerSurvCForest", scores = p_uty(tags = "train"), teststat = p_fct(default = "quadratic", - levels = c("quadratic", "maximum"), tags = "train"), + levels = c("quadratic", "maximum"), tags = "train"), splitstat = p_fct(default = "quadratic", - levels = c("quadratic", "maximum"), tags = "train"), + levels = c("quadratic", "maximum"), tags = "train"), splittest = p_lgl(default = FALSE, tags = "train"), testtype = p_fct(default = "Univariate", - levels = c("Bonferroni", "MonteCarlo", "Univariate", "Teststatistic"), - tags = "train"), + levels = c("Bonferroni", "MonteCarlo", "Univariate", "Teststatistic"), + tags = "train"), nmax = p_uty(tags = "train"), - alpha = p_dbl(default = 0.05, lower = 0, upper = 1, - tags = "train"), - mincriterion = p_dbl(default = 0.95, lower = 0, upper = 1, - tags = "train"), + alpha = p_dbl(default = 0.05, lower = 0, upper = 1, tags = "train"), + mincriterion = p_dbl(default = 0.95, lower = 0, upper = 1, tags = "train"), logmincriterion = p_dbl(default = log(0.95), tags = "train"), minsplit = p_int(lower = 1L, default = 20L, tags = "train"), minbucket = p_int(lower = 1L, default = 7L, tags = "train"), - minprob = p_dbl(default = 0.01, lower = 0, upper = 1, - tags = "train"), + minprob = p_dbl(default = 0.01, lower = 0, upper = 1, tags = "train"), stump = p_lgl(default = FALSE, tags = "train"), lookahead = p_lgl(default = FALSE, tags = "train"), MIA = p_lgl(default = FALSE, tags = "train"), nresample = p_int(default = 9999L, lower = 1L, tags = "train"), - tol = p_dbl(default = sqrt(.Machine$double.eps), lower = 0, - tags = "train"), + tol = p_dbl(default = sqrt(.Machine$double.eps), lower = 0, tags = "train"), maxsurrogate = p_int(default = 0L, lower = 0L, tags = "train"), numsurrogate = p_lgl(default = FALSE, tags = "train"), - maxdepth = p_int(default = Inf, lower = 0L, - special_vals = list(Inf), tags = "train"), + maxdepth = p_int(default = Inf, lower = 0L, special_vals = list(Inf), tags = "train"), multiway = p_lgl(default = FALSE, tags = "train"), splittry = p_int(default = 2L, lower = 0L, tags = "train"), intersplit = p_lgl(default = FALSE, tags = "train"), @@ -80,8 +84,7 @@ LearnerSurvCForest = R6Class("LearnerSurvCForest", caseweights = p_lgl(default = TRUE, tags = "train"), saveinfo = p_lgl(default = FALSE, tags = "train"), update = p_lgl(default = FALSE, tags = "train"), - splitflavour = p_fct(default = "ctree", - levels = c("ctree", "exhaustive"), tags = "train"), + splitflavour = p_fct(default = "ctree", levels = c("ctree", "exhaustive"), tags = "train"), maxvar = p_int(lower = 1L, tags = "train"), # predict; missing FUN and simplify (not needed here) @@ -101,8 +104,7 @@ LearnerSurvCForest = R6Class("LearnerSurvCForest", releps = p_dbl(default = 0, lower = 0, tags = "train") ) - ps$add_dep("nresample", on = "testtype", - cond = CondEqual$new("MonteCarlo")) + ps$add_dep("nresample", on = "testtype", cond = CondEqual$new("MonteCarlo")) # ps$add_dep("nperm", on = "conditional", cond = CondEqual$new(TRUE)) # ps$add_dep("threshold", on = "conditional", cond = CondEqual$new(TRUE)) @@ -112,11 +114,14 @@ LearnerSurvCForest = R6Class("LearnerSurvCForest", ps$values$mincriterion = 0 ps$values$saveinfo = FALSE + # set cores to 1 + ps$values$cores = 1 + super$initialize( id = "surv.cforest", param_set = ps, # can also predict weights, node, but not really useful here - predict_types = c("distr", "crank"), + predict_types = c("crank", "distr"), feature_types = c("integer", "numeric", "factor", "ordered"), properties = c("weights"), packages = c("mlr3extralearners", "partykit", "sandwich", "coin"), diff --git a/R/learner_partykit_surv_ctree.R b/R/learner_partykit_surv_ctree.R index aed661d0a..839a69831 100644 --- a/R/learner_partykit_surv_ctree.R +++ b/R/learner_partykit_surv_ctree.R @@ -6,6 +6,12 @@ #' Survival Partition Tree where a significance test is used to determine the univariate splits. #' Calls [partykit::ctree()] from \CRANpkg{partykit}. #' +#' @section Prediction types: +#' This learner returns two prediction types: +#' 1. `distr`: a survival matrix in two dimensions, where observations are +#' represented in rows and time points in columns. +#' Calculated using the internal [partykit::predict.party()] function. +#' 2. `crank`: the expected mortality using [mlr3proba::.surv_return()]. #' #' @template learner #' @templateVar id surv.ctree @@ -82,7 +88,7 @@ LearnerSurvCTree = R6Class("LearnerSurvCTree", id = "surv.ctree", packages = c("mlr3extralearners", "partykit", "coin", "sandwich"), feature_types = c("integer", "numeric", "factor", "ordered"), - predict_types = c("distr", "crank"), + predict_types = c("crank", "distr"), param_set = ps, properties = "weights", man = "mlr3extralearners::mlr_learners_surv.ctree", diff --git a/R/learner_penalized_surv_penalized.R b/R/learner_penalized_surv_penalized.R index b9e9b02ce..062666695 100644 --- a/R/learner_penalized_surv_penalized.R +++ b/R/learner_penalized_surv_penalized.R @@ -1,11 +1,23 @@ -#' @title Survival L1 and L2 Penalized Regression Learner +#' @title Survival L1 and L2 Penalized Cox Learner #' @author RaphaelS1 #' @name mlr_learners_surv.penalized #' #' @description -#' Penalized (L1 and L2) generalized linear models. +#' Penalized (L1 and L2) Cox Proportional Hazards model. #' Calls [penalized::penalized()] from \CRANpkg{penalized}. #' +#' @section Prediction types: +#' This learner returns two prediction types: +#' 1. `distr`: a survival matrix in two dimensions, where observations are +#' represented in rows and time points in columns. +#' Calculated using the internal [penalized::predict()] function. +#' By default the Breslow estimator [penalized::breslow()] is used for computing +#' the baseline hazard. +#' 2. `crank`: the expected mortality using [mlr3proba::.surv_return()]. +#' +#' @section Initial parameter values: +#' - `trace` is set to `"FALSE"` to disable printing output during model training. +#' #' @template learner #' @templateVar id surv.penalized #' @@ -13,7 +25,7 @@ #' The `penalized` and `unpenalized` arguments in the learner are implemented slightly #' differently than in [penalized::penalized()]. Here, there is no parameter for `penalized` but #' instead it is assumed that every variable is penalized unless stated in the `unpenalized` -#' parameter, see examples. +#' parameter. #' #' @references #' `r format_bib("goeman2010l1")` @@ -43,37 +55,55 @@ LearnerSurvPenalized = R6Class("LearnerSurvPenalized", trace = p_lgl(default = TRUE, tags = "train") ) + ps$values = list(trace = FALSE) + super$initialize( id = "surv.penalized", packages = c("mlr3extralearners", "penalized", "pracma"), feature_types = c("integer", "numeric", "factor", "logical"), - predict_types = c("distr", "crank"), + predict_types = c("crank", "distr"), param_set = ps, man = "mlr3extralearners::mlr_learners_surv.penalized", label = "Penalized Regression" ) + }, + + #' @description + #' Selected features are extracted with the method `coef()` of the S4 model + #' object, see [penalized::penfit()]. + #' By default it returns features with non-zero coefficients. + #' + #' **Note**: Selected features can be retrieved only for datasets with + #' `numeric` features, as the presence of factors with multiple levels makes + #' it difficult to get the original feature names. + #' + #' @return `character()`. + selected_features = function() { + if (is.null(self$model$model)) { + stopf("No model stored") + } + + if (self$model$task_has_factors) { + stopf("Can't return selected features as trained task had factor variables + and original feature names cannot be retrieved") + } + + # Per default, only coefficients of selected variables are returned by coef() + names(penalized::coef(self$model$model)) } ), private = list( .train = function(task) { - - # Checks missing data early to prevent crashing, which is not caught earlier by task/train - - if (any(task$missings() > 0)) { - stop("Missing data is not supported by ", self$id) - } - - # Changes the structure of the penalized and unpenalized parameters to be more user friendly. - # Now the user supplies the column names as a vector and these are added to the formula as - # required. pars = self$param_set$get_values(tags = "train") if (length(pars$unpenalized) == 0) { + # if no "unpenalized" features, penalize all (no need to set `pars$unpenalized`) penalized = formulate(rhs = task$feature_names) } else { - if (any(pars$penalized %nin% task$feature_names)) { - stopf("Parameter 'penalized' contains values not present in task") + if (any(pars$unpenalized %nin% task$feature_names)) { + stopf("Parameter 'unpenalized' contains values not present in task") } + # if some "unpenalized" features exist, penalize the rest penalized = formulate(rhs = task$feature_names[task$feature_names %nin% pars$unpenalized]) pars$unpenalized = formulate(rhs = pars$unpenalized) } @@ -83,19 +113,23 @@ LearnerSurvPenalized = R6Class("LearnerSurvPenalized", # also there is a bug in withr, which does not clean up Depends, therefore # we need the double with_package # https://github.com/r-lib/withr/issues/261 - with_package("survival", { + model = with_package("survival", { with_package("penalized", { invoke(penalized::penalized, response = task$truth(), penalized = penalized, data = task$data(cols = task$feature_names), model = "cox", .args = pars) }) }) + + list( + model = model, + task_has_factors = any(task$feature_types$type == "factor") + ) }, .predict = function(task) { # Again the penalized and unpenalized covariates are automatically converted to the # correct formula - pars = self$param_set$get_values(tags = "predict") if (length(pars$unpenalized) == 0) { penalized = formulate(rhs = task$feature_names) @@ -105,10 +139,11 @@ LearnerSurvPenalized = R6Class("LearnerSurvPenalized", } surv = with_package("penalized", { - invoke(penalized::predict, self$model, - penalized = penalized, - data = ordered_features(task, self), - .args = pars) + invoke(penalized::predict, + self$model$model, + penalized = penalized, + data = ordered_features(task, self), + .args = pars) }) mlr3proba::.surv_return(times = surv@time, surv = surv@curves) diff --git a/R/learner_prioritylasso_classif_priority_lasso.R b/R/learner_prioritylasso_classif_priority_lasso.R index 2374e9bef..74719b999 100644 --- a/R/learner_prioritylasso_classif_priority_lasso.R +++ b/R/learner_prioritylasso_classif_priority_lasso.R @@ -6,6 +6,9 @@ #' Patient outcome prediction based on multi-omics data taking practitioners’ preferences into account. #' Calls [prioritylasso::prioritylasso()] from \CRANpkg{prioritylasso}. #' +#' @section Initial parameter values: +#' - `family` is set to `"binomial"` and canno be changed +#' #' @templateVar id classif.priority_lasso #' @template learner #' @@ -104,7 +107,7 @@ LearnerClassifPriorityLasso = R6Class("LearnerClassifPriorityLasso", }, #' @description - #' Selected features, i.e. those where the coefficient is positive. + #' Selected features, i.e. those where the coefficient is non-zero. #' @return `character()`. selected_features = function() { if (is.null(self$model)) { diff --git a/R/learner_prioritylasso_regr_priority_lasso.R b/R/learner_prioritylasso_regr_priority_lasso.R index 7a48fbcdd..077a67bb6 100644 --- a/R/learner_prioritylasso_regr_priority_lasso.R +++ b/R/learner_prioritylasso_regr_priority_lasso.R @@ -6,6 +6,9 @@ #' Patient outcome prediction based on multi-omics data taking practitioners’ preferences into account. #' Calls [prioritylasso::prioritylasso()] from \CRANpkg{prioritylasso}. #' +#' @section Initial parameter values: +#' - `family` is set to `"gaussian"` and cannot be changed +#' - `type.measure` set to `"mse"` (cross-validation measure) #' #' @templateVar id regr.priority_lasso #' @template learner diff --git a/R/learner_prioritylasso_surv_priority_lasso.R b/R/learner_prioritylasso_surv_priority_lasso.R index 9e3cec673..6bcae68e6 100644 --- a/R/learner_prioritylasso_surv_priority_lasso.R +++ b/R/learner_prioritylasso_surv_priority_lasso.R @@ -5,6 +5,25 @@ #' @description #' Patient outcome prediction based on multi-omics data taking practitioners’ preferences into account. #' Calls [prioritylasso::prioritylasso()] from \CRANpkg{prioritylasso}. +#' Many parameters for this survival learner are the same as [mlr_learners_surv.cv_glmnet] +#' as `prioritylasso()` calls [glmnet::cv.glmnet()] during training phase. +#' Note that `prioritylasso()` has ways to deal with block-wise missing data, +#' but this feature is not supported currently. +#' +#' @section Prediction types: +#' This learner returns three prediction types: +#' 1. `lp`: a vector containing the linear predictors (relative risk scores), +#' where each score corresponds to a specific test observation. +#' Calculated using [prioritylasso::predict.prioritylasso()]. +#' 2. `crank`: same as `lp`. +#' 3. `distr`: a survival matrix in two dimensions, where observations are +#' represented in rows and time points in columns. +#' Calculated using [mlr3proba::breslow()] where the Breslow estimator is used +#' for computing the baseline hazard. +#' +#' @section Initial parameter values: +#' - `family` is set to `"cox"` for the Cox survival objective and cannot be changed +#' - `type.measure` set to `"deviance"` (cross-validation measure) #' #' @templateVar id surv.priority_lasso #' @template learner @@ -13,7 +32,27 @@ #' `r format_bib("klau2018priolasso")` #' #' @template seealso_learner -#' @template simple_example +#' @examplesIf mlr3misc::require_namespaces(c("mlr3proba"), quietly = TRUE) +#' # Define a Task +#' task = tsk("grace") +#' # Create train and test set +#' ids = partition(task) +#' # check task's features +#' task$feature_names +#' # partition features to 2 blocks +#' blocks = list(bl1 = 1:3, bl2 = 4:6) +#' # define learner +#' learner = lrn("surv.priority_lasso", blocks = blocks, block1.penalization = FALSE, +#' lambda.type = "lambda.1se", standardize = TRUE, nfolds = 5) +#' # Train the learner on the training ids +#' learner$train(task, row_ids = ids$train) +#' # selected features +#' learner$selected_features() +#' # Make predictions for the test observations +#' pred = learner$predict(task, row_ids = ids$test) +#' pred +#' # Score the predictions +#' pred$score() #' @export LearnerSurvPriorityLasso = R6Class("LearnerSurvPriorityLasso", inherit = mlr3proba::LearnerSurv, @@ -22,21 +61,20 @@ LearnerSurvPriorityLasso = R6Class("LearnerSurvPriorityLasso", #' Creates a new instance of this [R6][R6::R6Class] class. initialize = function() { param_set = ps( - blocks = p_uty(tags = c("train", "required")), - max.coef = p_uty(default = NULL, tags = "train"), - block1.penalization = p_lgl(default = TRUE, tags = "train"), - lambda.type = p_fct(default = "lambda.min", levels = c("lambda.min", "lambda.1se"), tags = c("train", "predict")), #nolint - standardize = p_lgl(default = TRUE, tags = "train"), - nfolds = p_int(default = 5L, lower = 1L, tags = "train"), - foldid = p_uty(default = NULL, tags = "train"), - cvoffset = p_lgl(default = FALSE, tags = "train"), - cvoffsetnfolds = p_int(default = 10, lower = 1L, tags = "train"), - return.x = p_lgl(default = TRUE, tags = "train"), - handle.missingtestdata = p_fct(c("none", "omit.prediction", "set.zero", "impute.block"), tags = "predict"), - include.allintercepts = p_lgl(default = FALSE, tags = "predict"), - use.blocks = p_uty(default = "all", tags = "predict"), + blocks = p_uty(tags = c("train", "required")), + max.coef = p_uty(default = NULL, tags = "train"), + block1.penalization = p_lgl(default = TRUE, tags = "train"), + lambda.type = p_fct(default = "lambda.min", levels = c("lambda.min", "lambda.1se"), tags = "train"), + standardize = p_lgl(default = TRUE, tags = "train"), + nfolds = p_int(default = 5L, lower = 1L, tags = "train"), + foldid = p_uty(default = NULL, tags = "train"), + cvoffset = p_lgl(default = FALSE, tags = "train"), + cvoffsetnfolds = p_int(default = 10, lower = 1L, tags = "train"), + return.x = p_lgl(default = TRUE, tags = "train"), + include.allintercepts = p_lgl(default = FALSE, tags = "predict"), + use.blocks = p_uty(default = "all", tags = "predict"), - # params from cv.glmnet + # params from cv.glmnet, passed to `prioritylasso()` during `.train()` alignment = p_fct(c("lambda", "fraction"), default = "lambda", tags = "train"), alpha = p_dbl(0, 1, default = 1, tags = "train"), big = p_dbl(default = 9.9e35, tags = "train"), @@ -72,16 +110,14 @@ LearnerSurvPriorityLasso = R6Class("LearnerSurvPriorityLasso", type.logistic = p_fct(c("Newton", "modified.Newton"), default = "Newton", tags = "train"), type.multinomial = p_fct(c("ungrouped", "grouped"), default = "ungrouped", tags = "train"), upper.limits = p_uty(default = Inf, tags = "train"), - predict.gamma = p_dbl(default = "gamma.1se", special_vals = list("gamma.1se", "gamma.min"), tags = "predict"), #nolint - relax = p_lgl(default = FALSE, tags = "train"), - s = p_dbl(0, 1, special_vals = list("lambda.1se", "lambda.min"), default = "lambda.1se", tags = "predict") #nolint + relax = p_lgl(default = FALSE, tags = "train") ) super$initialize( id = "surv.priority_lasso", packages = "prioritylasso", feature_types = c("logical", "integer", "numeric"), - predict_types = c("response", "lp"), + predict_types = c("crank", "lp", "distr"), param_set = param_set, properties = c("weights", "selected_features"), man = "mlr3extralearners::mlr_learners_surv.priority_lasso", @@ -90,7 +126,7 @@ LearnerSurvPriorityLasso = R6Class("LearnerSurvPriorityLasso", }, #' @description - #' Selected features, i.e. those where the coefficient is positive. + #' Selected features, i.e. those where the coefficient is non-zero. #' @return `character()`. selected_features = function() { if (is.null(self$model)) { @@ -113,31 +149,41 @@ LearnerSurvPriorityLasso = R6Class("LearnerSurvPriorityLasso", } data = as.matrix(task$data(cols = task$feature_names)) target = task$truth() - invoke(prioritylasso::prioritylasso, - X = data, Y = target, - .args = pars) + + model = invoke(prioritylasso::prioritylasso, X = data, Y = target, .args = pars) + # add (time, status) of training data for breslow distr prediction + model$train_times = task$times() + model$train_status = task$status() + + model }, - .predict = function(task) { - # get parameters with tag "predict" - pars = self$param_set$get_values(tags = "predict") - pars = rename(pars, "predict.gamma", "gamma") + .predict = function(task) { # get newdata and ensure same ordering in train and predict newdata = as.matrix(ordered_features(task, self)) - # Calculate predictions for the selected predict type. - type = self$predict_type - if (type == "lp") { - type = "link" - } + # get parameters with tag "predict" + pv = self$param_set$get_values(tags = "predict") - pred = invoke(predict, self$model, newdata = newdata, type = type, .args = pars) + # get linear predictor for train data + lp_train = as.numeric( + invoke(predict, self$model, type = "link", .args = pv) + ) - if (type == "response") { - list(response = pred, crank = pred) - } else { - list(lp = pred, crank = pred) - } + # get linear predictor for test data + lp_test = as.numeric( + invoke(predict, self$model, newdata = newdata, type = "link", .args = pv) + ) + + # get survival probability matrix using the Breslow estimator for the baseline hazard + surv = mlr3proba::breslow( + times = self$model$train_times, + status = self$model$train_status, + lp_train = lp_train, + lp_test = lp_test + ) + + mlr3proba::.surv_return(surv = surv, lp = lp_test) } ) ) diff --git a/R/learner_randomForestSRC_classif_imbalanced_rfsrc.R b/R/learner_randomForestSRC_classif_imbalanced_rfsrc.R index 5a1ca3c1d..b47b0eb06 100644 --- a/R/learner_randomForestSRC_classif_imbalanced_rfsrc.R +++ b/R/learner_randomForestSRC_classif_imbalanced_rfsrc.R @@ -16,7 +16,28 @@ #' `r format_bib("obrien2019imbrfsrc", "chen2004imbrf")` #' #' @template seealso_learner -#' @template example +#' @examplesIf requireNamespace("randomForestSRC", quietly = TRUE) +#' # Define the Learner +#' learner = mlr3::lrn("classif.imbalanced_rfsrc", importance = "TRUE") +#' print(learner) +#' +#' # Define a Task +#' task = mlr3::tsk("sonar") +#' # Create train and test set +#' ids = mlr3::partition(task) +#' +#' # Train the learner on the training ids +#' learner$train(task, row_ids = ids$train) +#' +#' print(learner$model) +#' print(learner$importance()) +#' +#' # Make predictions for the test rows +#' predictions = learner$predict(task, row_ids = ids$test) +#' +#' # Score the predictions +#' predictions$score() +#' #' @export LearnerClassifImbalancedRandomForestSRC = R6Class("LearnerClassifImbalancedRandomForestSRC", inherit = LearnerClassif, @@ -25,7 +46,7 @@ LearnerClassifImbalancedRandomForestSRC = R6Class("LearnerClassifImbalancedRando #' Creates a new instance of this [R6][R6::R6Class] class. initialize = function() { ps = ps( - ntree = p_int(default = 3000, lower = 1L, tags = "train"), + ntree = p_int(default = 500L, lower = 1L, tags = "train"), method = p_fct( default = "rfq", levels = c("rfq", "brf", "standard"), diff --git a/R/learner_randomForestSRC_classif_rfsrc.R b/R/learner_randomForestSRC_classif_rfsrc.R index 4c5b92089..e7d1463e9 100644 --- a/R/learner_randomForestSRC_classif_rfsrc.R +++ b/R/learner_randomForestSRC_classif_rfsrc.R @@ -10,22 +10,40 @@ #' @templateVar id classif.rfsrc #' #' @section Custom mlr3 parameters: -#' - `mtry`: -#' - This hyperparameter can alternatively be set via the added hyperparameter `mtry.ratio` -#' as `mtry = max(ceiling(mtry.ratio * n_features), 1)`. -#' Note that `mtry` and `mtry.ratio` are mutually exclusive. -#' - `sampsize`: -#' - This hyperparameter can alternatively be set via the added hyperparameter `sampsize.ratio` -#' as `sampsize = max(ceiling(sampsize.ratio * n_obs), 1)`. -#' Note that `sampsize` and `sampsize.ratio` are mutually exclusive. -#' - `cores`: -#' This value is set as the option `rf.cores` during training and is set to 1 by default. +#' - `mtry`: This hyperparameter can alternatively be set via the added +#' hyperparameter `mtry.ratio` as `mtry = max(ceiling(mtry.ratio * n_features), 1)`. +#' Note that `mtry` and `mtry.ratio` are mutually exclusive. +#' - `sampsize`: This hyperparameter can alternatively be set via the added +#' hyperparameter `sampsize.ratio` as `sampsize = max(ceiling(sampsize.ratio * n_obs), 1)`. +#' Note that `sampsize` and `sampsize.ratio` are mutually exclusive. +#' - `cores`: This value is set as the option `rf.cores` during training and is +#' set to 1 by default. #' #' @references #' `r format_bib("breiman_2001")` #' #' @template seealso_learner -#' @template example +#' @examplesIf requireNamespace("randomForestSRC", quietly = TRUE) +#' # Define the Learner +#' learner = mlr3::lrn("classif.rfsrc", importance = "TRUE") +#' print(learner) +#' +#' # Define a Task +#' task = mlr3::tsk("sonar") +#' # Create train and test set +#' ids = mlr3::partition(task) +#' +#' # Train the learner on the training ids +#' learner$train(task, row_ids = ids$train) +#' +#' print(learner$model) +#' print(learner$importance()) +#' +#' # Make predictions for the test rows +#' predictions = learner$predict(task, row_ids = ids$test) +#' +#' # Score the predictions +#' predictions$score() #' @export LearnerClassifRandomForestSRC = R6Class("LearnerClassifRandomForestSRC", inherit = LearnerClassif, @@ -35,7 +53,7 @@ LearnerClassifRandomForestSRC = R6Class("LearnerClassifRandomForestSRC", #' Creates a new instance of this [R6][R6::R6Class] class. initialize = function() { ps = ps( - ntree = p_int(default = 1000, lower = 1L, tags = "train"), + ntree = p_int(default = 500L, lower = 1L, tags = "train"), mtry = p_int(lower = 1L, tags = "train"), mtry.ratio = p_dbl(lower = 0, upper = 1, tags = "train"), nodesize = p_int(default = 15L, lower = 1L, tags = "train"), diff --git a/R/learner_randomForestSRC_regr_rfsrc.R b/R/learner_randomForestSRC_regr_rfsrc.R index 5727883b8..381223c75 100644 --- a/R/learner_randomForestSRC_regr_rfsrc.R +++ b/R/learner_randomForestSRC_regr_rfsrc.R @@ -15,7 +15,27 @@ #' `r format_bib("breiman_2001")` #' #' @template seealso_learner -#' @template example +#' @examplesIf requireNamespace("randomForestSRC", quietly = TRUE) +#' # Define the Learner +#' learner = mlr3::lrn("regr.rfsrc", importance = "TRUE") +#' print(learner) +#' +#' # Define a Task +#' task = mlr3::tsk("mtcars") +#' # Create train and test set +#' ids = mlr3::partition(task) +#' +#' # Train the learner on the training ids +#' learner$train(task, row_ids = ids$train) +#' +#' print(learner$model) +#' print(learner$importance()) +#' +#' # Make predictions for the test rows +#' predictions = learner$predict(task, row_ids = ids$test) +#' +#' # Score the predictions +#' predictions$score() #' @export LearnerRegrRandomForestSRC = R6Class("LearnerRegrRandomForestSRC", inherit = LearnerRegr, @@ -25,7 +45,7 @@ LearnerRegrRandomForestSRC = R6Class("LearnerRegrRandomForestSRC", #' Creates a new instance of this [R6][R6::R6Class] class. initialize = function() { ps = ps( - ntree = p_int(default = 1000, lower = 1L, tags = "train"), + ntree = p_int(default = 500L, lower = 1L, tags = "train"), mtry = p_int(lower = 1L, tags = "train"), mtry.ratio = p_dbl(lower = 0, upper = 1, tags = "train"), nodesize = p_int(default = 15L, lower = 1L, tags = "train"), diff --git a/R/learner_randomForestSRC_surv_rfsrc.R b/R/learner_randomForestSRC_surv_rfsrc.R index b431ad29a..bc52e7c2d 100644 --- a/R/learner_randomForestSRC_surv_rfsrc.R +++ b/R/learner_randomForestSRC_surv_rfsrc.R @@ -6,23 +6,58 @@ #' Random survival forest. #' Calls [randomForestSRC::rfsrc()] from \CRANpkg{randomForestSRC}. #' +#' @section Prediction types: +#' This learner returns two prediction types: +#' 1. `distr`: a survival matrix in two dimensions, where observations are +#' represented in rows and (unique event) time points in columns. +#' Calculated using the internal [randomForestSRC::predict.rfsrc()] function. +#' 2. `crank`: the expected mortality using [mlr3proba::.surv_return()]. +#' #' @template learner #' @templateVar id surv.rfsrc #' #' @inheritSection mlr_learners_classif.rfsrc Custom mlr3 parameters #' -#' @details -#' [randomForestSRC::predict.rfsrc()] returns both cumulative hazard function (chf) and -#' survival function (surv) but uses different estimators to derive these. `chf` uses a -#' bootstrapped Nelson-Aalen estimator, (Ishwaran, 2008) whereas `surv` uses a bootstrapped -#' Kaplan-Meier estimator. The choice of which estimator to use is given by the extra -#' `estimator` hyper-parameter, default is `nelson`. +#' @section Custom mlr3 parameters: +#' - `estimator`: Hidden parameter that controls the type of estimator used to +#' derive the survival function during prediction. The **default** value is `"chf"` which +#' uses a bootstrapped Nelson-Aalen estimator for the cumulative hazard function +#' \eqn{H(t)}, (Ishwaran, 2008) from which we calculate \eqn{S(t) = \exp(-H(t))}, +#' whereas `"surv"` uses a bootstrapped Kaplan-Meier estimator to directly estimate +#' \eqn{S(t)}. +#' +#' @section Initial parameter values: +#' - `ntime`: Number of time points to coerce the observed event times for use in the +#' estimated survival function during prediction. We changed the default value +#' of `150` to `0` in order to be in line with other random survival forest +#' learners and use all the **unique event times from the train set**. #' #' @references #' `r format_bib("ishwaran_2008", "breiman_2001")` #' #' @template seealso_learner -#' @template example +#' @examplesIf requireNamespace("randomForestSRC", quietly = TRUE) +#' # Define the Learner +#' learner = mlr3::lrn("surv.rfsrc", importance = "TRUE") +#' print(learner) +#' +#' # Define a Task +#' task = mlr3::tsk("grace") +#' +#' # Create train and test set +#' ids = mlr3::partition(task) +#' +#' # Train the learner on the training ids +#' learner$train(task, row_ids = ids$train) +#' +#' print(learner$model) +#' print(learner$importance()) +#' +#' # Make predictions for the test rows +#' predictions = learner$predict(task, row_ids = ids$test) +#' +#' # Score the predictions +#' predictions$score() #' @export LearnerSurvRandomForestSRC = R6Class("LearnerSurvRandomForestSRC", inherit = mlr3proba::LearnerSurv, @@ -32,7 +67,7 @@ LearnerSurvRandomForestSRC = R6Class("LearnerSurvRandomForestSRC", #' Creates a new instance of this [R6][R6::R6Class] class. initialize = function() { ps = ps( - ntree = p_int(default = 1000, lower = 1L, tags = "train"), + ntree = p_int(default = 500L, lower = 1L, tags = "train"), mtry = p_int(lower = 1L, tags = "train"), mtry.ratio = p_dbl(lower = 0, upper = 1, tags = "train"), nodesize = p_int(default = 15L, lower = 1L, tags = "train"), @@ -59,8 +94,8 @@ LearnerSurvRandomForestSRC = R6Class("LearnerSurvRandomForestSRC", na.action = p_fct( default = "na.omit", levels = c("na.omit", "na.impute"), tags = c("train", "predict")), - nimpute = p_int(default = 1L, lower = 1L, tags = "train"), - ntime = p_int(lower = 1L, tags = "train"), + nimpute = p_int(lower = 1L, default = 1L, special_vals = list(NULL), tags = "train"), + ntime = p_int(lower = 0L, default = 150L, special_vals = list(NULL), tags = "train"), cause = p_int(lower = 1L, tags = "train"), proximity = p_fct( default = "FALSE", @@ -99,6 +134,8 @@ LearnerSurvRandomForestSRC = R6Class("LearnerSurvRandomForestSRC", case.depth = p_lgl(default = FALSE, tags = c("train", "predict")) ) + ps$values$ntime = 0 + super$initialize( id = "surv.rfsrc", packages = c("mlr3extralearners", "randomForestSRC", "pracma"), @@ -171,13 +208,11 @@ LearnerSurvRandomForestSRC = R6Class("LearnerSurvRandomForestSRC", pars_predict$cores = NULL p = invoke(predict, object = self$model, newdata = newdata, .args = pars_predict, - .opts = list(rf.cores = cores)) + .opts = list(rf.cores = cores)) # rfsrc uses Nelson-Aalen in chf and Kaplan-Meier for survival, as these # don't give equivalent results one must be chosen and the relevant functions are transformed # as required. - - surv = if (estimator == "nelson") exp(-p$chf) else p$survival mlr3proba::.surv_return(times = self$model$time.interest, surv = surv) diff --git a/R/learner_randomForest_classif_randomForest.R b/R/learner_randomForest_classif_randomForest.R index 412575770..8bb7914e7 100644 --- a/R/learner_randomForest_classif_randomForest.R +++ b/R/learner_randomForest_classif_randomForest.R @@ -12,9 +12,29 @@ #' @references #' `r format_bib("breiman_2001")` #' -#' @export #' @template seealso_learner -#' @template example +#' @examplesIf requireNamespace("randomForest", quietly = TRUE) +#' # Define the Learner +#' learner = mlr3::lrn("classif.randomForest", importance = "accuracy") +#' print(learner) +#' +#' # Define a Task +#' task = mlr3::tsk("sonar") +#' # Create train and test set +#' ids = mlr3::partition(task) +#' +#' # Train the learner on the training ids +#' learner$train(task, row_ids = ids$train) +#' +#' print(learner$model) +#' print(learner$importance()) +#' +#' # Make predictions for the test rows +#' predictions = learner$predict(task, row_ids = ids$test) +#' +#' # Score the predictions +#' predictions$score() +#' @export LearnerClassifRandomForest = R6Class("LearnerClassifRandomForest", inherit = LearnerClassif, diff --git a/R/learner_randomForest_regr_randomForest.R b/R/learner_randomForest_regr_randomForest.R index d6fa2455e..7674217be 100644 --- a/R/learner_randomForest_regr_randomForest.R +++ b/R/learner_randomForest_regr_randomForest.R @@ -12,9 +12,29 @@ #' @references #' `r format_bib("breiman_2001")` #' -#' @export #' @template seealso_learner -#' @template example +#' @examplesIf requireNamespace("randomForest", quietly = TRUE) +#' # Define the Learner +#' learner = mlr3::lrn("regr.randomForest", importance = "mse") +#' print(learner) +#' +#' # Define a Task +#' task = mlr3::tsk("mtcars") +#' # Create train and test set +#' ids = mlr3::partition(task) +#' +#' # Train the learner on the training ids +#' learner$train(task, row_ids = ids$train) +#' +#' print(learner$model) +#' print(learner$importance()) +#' +#' # Make predictions for the test rows +#' predictions = learner$predict(task, row_ids = ids$test) +#' +#' # Score the predictions +#' predictions$score() +#' @export LearnerRegrRandomForest = R6Class("LearnerRegrRandomForest", inherit = LearnerRegr, diff --git a/R/learner_ranger_surv_ranger.R b/R/learner_ranger_surv_ranger.R index 7827caa1b..3e0a2afbe 100644 --- a/R/learner_ranger_surv_ranger.R +++ b/R/learner_ranger_surv_ranger.R @@ -6,11 +6,17 @@ #' Random survival forest. #' Calls [ranger::ranger()] from package \CRANpkg{ranger}. #' +#' @section Prediction types: +#' This learner returns two prediction types: +#' 1. `distr`: a survival matrix in two dimensions, where observations are +#' represented in rows and (unique event) time points in columns. +#' Calculated using the internal [ranger::predict.ranger()] function. +#' 2. `crank`: the expected mortality using [mlr3proba::.surv_return()]. +#' #' @section Custom mlr3 parameters: -#' - `mtry`: -#' - This hyperparameter can alternatively be set via our hyperparameter `mtry.ratio` -#' as `mtry = max(ceiling(mtry.ratio * n_features), 1)`. -#' Note that `mtry` and `mtry.ratio` are mutually exclusive. +#' - `mtry`: This hyperparameter can alternatively be set via our hyperparameter +#' `mtry.ratio` as `mtry = max(ceiling(mtry.ratio * n_features), 1)`. +#' Note that `mtry` and `mtry.ratio` are mutually exclusive. #' #' @section Initial parameter values: #' - `num.threads` is initialized to 1 to avoid conflicts with parallelization via \CRANpkg{future}. @@ -21,9 +27,30 @@ #' @references #' `r format_bib("wright_2017", "breiman_2001")` #' -#' @export #' @template seealso_learner -#' @template example +#' @examplesIf requireNamespace("ranger", quietly = TRUE) +#' # Define the Learner +#' learner = mlr3::lrn("surv.ranger", importance = "permutation") +#' print(learner) +#' +#' # Define a Task +#' task = mlr3::tsk("grace") +#' +#' # Create train and test set +#' ids = mlr3::partition(task) +#' +#' # Train the learner on the training ids +#' learner$train(task, row_ids = ids$train) +#' +#' print(learner$model) +#' print(learner$importance()) +#' +#' # Make predictions for the test rows +#' predictions = learner$predict(task, row_ids = ids$test) +#' +#' # Score the predictions +#' predictions$score() +#' @export LearnerSurvRanger = R6Class("LearnerSurvRanger", inherit = mlr3proba::LearnerSurv, public = list( @@ -67,7 +94,7 @@ LearnerSurvRanger = R6Class("LearnerSurvRanger", super$initialize( id = "surv.ranger", param_set = ps, - predict_types = c("distr", "crank"), + predict_types = c("crank", "distr"), feature_types = c("logical", "integer", "numeric", "character", "factor", "ordered"), properties = c("weights", "importance", "oob_error"), packages = c("mlr3extralearners", "ranger"), diff --git a/R/learner_survival_surv_nelson.R b/R/learner_survival_surv_nelson.R index b433b94ec..82b47bc3d 100644 --- a/R/learner_survival_surv_nelson.R +++ b/R/learner_survival_surv_nelson.R @@ -4,7 +4,17 @@ #' #' @description #' Non-parametric estimator of the cumulative hazard rate function. -#' Calls [survival::survfit()] from \CRANpkg{survival}. +#' Calls [survival::survfit()] from \CRANpkg{survival}. +#' +#' @section Prediction types: +#' This learner returns two prediction types: +#' 1. `distr`: a survival matrix in two dimensions, where observations are +#' represented in rows and time points in columns. +#' The cumulative hazard \eqn{H(t)} is calculated using the train data and the +#' default parameters of the [survival::survfit()] function, i.e. `ctype = 1`, +#' which uses the Nelson-Aalen formula. +#' Then for each test observation the survival curve is \eqn{S(t) = \exp{(-H(t))}}. +#' 2. `crank`: the expected mortality using [mlr3proba::.surv_return()]. #' #' @template learner #' @templateVar id surv.nelson diff --git a/R/learner_survivalmodels_surv_akritas.R b/R/learner_survivalmodels_surv_akritas.R index 20b391ed8..846ae7873 100644 --- a/R/learner_survivalmodels_surv_akritas.R +++ b/R/learner_survivalmodels_surv_akritas.R @@ -6,6 +6,17 @@ #' Survival akritas estimator. #' Calls [survivalmodels::akritas()] from package 'survivalmodels'. #' +#' @section Prediction types: +#' This learner returns two prediction types: +#' 1. `distr`: a survival matrix in two dimensions, where observations are +#' represented in rows and time points in columns. +#' Calculated using the internal [survivalmodels::predict.akritas()] function. +#' The survival matrix uses the unique time points from the training set. +#' We advise to set the parameter `ntime` which allows to adjust the granularity +#' of these time points to a reasonable number (e.g. `150`). +#' This avoids large execution times during prediction. +#' 2. `crank`: the expected mortality using [survivalmodels::surv_to_risk()]. +#' #' @template learner #' @templateVar id surv.akritas #' @@ -27,7 +38,7 @@ LearnerSurvAkritas = R6Class("LearnerSurvAkritas", ps = ps( lambda = p_dbl(default = 0.5, lower = 0, upper = 1, tags = "predict"), reverse = p_lgl(default = FALSE, tags = "train"), - ntime = p_dbl(default = 150, lower = 1, tags = "predict"), + ntime = p_int(lower = 1, default = NULL, special_vals = list(NULL), tags = "predict"), round_time = p_int(default = 2, lower = 0, special_vals = list(FALSE), tags = "predict") ) @@ -45,27 +56,33 @@ LearnerSurvAkritas = R6Class("LearnerSurvAkritas", private = list( .train = function(task) { - pars = self$param_set$get_values(tags = "train") + pv = self$param_set$get_values(tags = "train") invoke( survivalmodels::akritas, data = data.table::setDF(task$data()), time_variable = task$target_names[1L], status_variable = task$target_names[2L], - .args = pars + .args = pv ) }, .predict = function(task) { - pars = self$param_set$get_values(tags = "predict") + pv = self$param_set$get_values(tags = "predict") newdata = ordered_features(task, self) + # use train set times + times = self$model$y[, "time"] + # coerce times points to an `ntime` grid + times = gridify_times(times, pv$ntime) + pred = invoke( predict, self$model, newdata = newdata, + times = times, distr6 = FALSE, type = "all", - .args = pars + .args = pv ) list(crank = pred$risk, distr = pred$surv) diff --git a/R/learner_survivalmodels_surv_coxtime.R b/R/learner_survivalmodels_surv_coxtime.R index 069da6a67..5fb7a0fad 100644 --- a/R/learner_survivalmodels_surv_coxtime.R +++ b/R/learner_survivalmodels_surv_coxtime.R @@ -6,6 +6,13 @@ #' Cox-Time survival model. #' Calls [survivalmodels::coxtime()] from package 'survivalmodels'. #' +#' @section Prediction types: +#' This learner returns two prediction types: +#' 1. `distr`: a survival matrix in two dimensions, where observations are +#' represented in rows and time points in columns. +#' Calculated using the internal [survivalmodels::predict.pycox()] function. +#' 2. `crank`: the expected mortality using [survivalmodels::surv_to_risk()]. +#' #' @template learner #' @templateVar id surv.coxtime #' diff --git a/R/learner_survivalmodels_surv_deephit.R b/R/learner_survivalmodels_surv_deephit.R index 61d3c8df9..e03efabfa 100644 --- a/R/learner_survivalmodels_surv_deephit.R +++ b/R/learner_survivalmodels_surv_deephit.R @@ -6,6 +6,13 @@ #' Neural network 'Deephit' for survival analysis. #' Calls [survivalmodels::deephit()] from pacakge 'survivalmodels'. #' +#' @section Prediction types: +#' This learner returns two prediction types: +#' 1. `distr`: a survival matrix in two dimensions, where observations are +#' represented in rows and time points in columns. +#' Calculated using the internal [survivalmodels::predict.pycox()] function. +#' 2. `crank`: the expected mortality using [survivalmodels::surv_to_risk()]. +#' #' @template learner #' @templateVar id surv.deephit #' diff --git a/R/learner_survivalmodels_surv_deepsurv.R b/R/learner_survivalmodels_surv_deepsurv.R index cbabdccf0..f4e18de97 100644 --- a/R/learner_survivalmodels_surv_deepsurv.R +++ b/R/learner_survivalmodels_surv_deepsurv.R @@ -4,7 +4,14 @@ #' #' @description #' DeepSurv fits a neural network based on the partial likelihood from a Cox PH. -#' Calls [survivalmodels::dnnsurv()] from package 'survivalmodels'. +#' Calls [survivalmodels::deepsurv()] from package 'survivalmodels'. +#' +#' @section Prediction types: +#' This learner returns two prediction types: +#' 1. `distr`: a survival matrix in two dimensions, where observations are +#' represented in rows and time points in columns. +#' Calculated using the internal [survivalmodels::predict.pycox()] function. +#' 2. `crank`: the expected mortality using [survivalmodels::surv_to_risk()]. #' #' @template learner #' @templateVar id surv.deepsurv diff --git a/R/learner_survivalmodels_surv_dnnsurv.R b/R/learner_survivalmodels_surv_dnnsurv.R index 289b70644..89be01e08 100644 --- a/R/learner_survivalmodels_surv_dnnsurv.R +++ b/R/learner_survivalmodels_surv_dnnsurv.R @@ -6,6 +6,13 @@ #' Fits a neural network based on pseudo-conditional survival probabilities. #' Calls [survivalmodels::dnnsurv()] from package 'survivalmodels'. #' +#' @section Prediction types: +#' This learner returns two prediction types: +#' 1. `distr`: a survival matrix in two dimensions, where observations are +#' represented in rows and time points in columns. +#' Calculated using the internal [survivalmodels::predict.dnnsurv()] function. +#' 2. `crank`: the expected mortality using [survivalmodels::surv_to_risk()]. +#' #' @template learner #' @templateVar id surv.dnnsurv #' diff --git a/R/learner_survivalmodels_surv_loghaz.R b/R/learner_survivalmodels_surv_loghaz.R index d16fb5596..6f89e7f20 100644 --- a/R/learner_survivalmodels_surv_loghaz.R +++ b/R/learner_survivalmodels_surv_loghaz.R @@ -6,6 +6,13 @@ #' Survival logistic hazard learner. #' Calls [survivalmodels::loghaz()] from package 'survivalmodels'. #' +#' @section Prediction types: +#' This learner returns two prediction types: +#' 1. `distr`: a survival matrix in two dimensions, where observations are +#' represented in rows and time points in columns. +#' Calculated using the internal [survivalmodels::predict.pycox()] function. +#' 2. `crank`: the expected mortality using [survivalmodels::surv_to_risk()]. +#' #' @templateVar id surv.loghaz #' @template learner #' diff --git a/R/learner_survival_surv_parametric.R b/R/learner_survivalmodels_surv_parametric.R similarity index 75% rename from R/learner_survival_surv_parametric.R rename to R/learner_survivalmodels_surv_parametric.R index 5c2d3fad9..7ed85adfb 100644 --- a/R/learner_survival_surv_parametric.R +++ b/R/learner_survivalmodels_surv_parametric.R @@ -4,38 +4,43 @@ #' #' @description #' Parametric survival model. -#' Calls `parametric()]` from 'survivalmodels'. +#' Calls [survivalmodels::parametric()] from package 'survivalmodels'. +#' +#' @section Prediction types: +#' This learner returns three prediction types: +#' 1. `lp`: a vector of linear predictors (relative risk scores), one per test +#' observation. +#' `lp` is predicted using the formula \eqn{lp = X\beta} where \eqn{X} are the +#' variables in the test data set and \eqn{\beta} are the fitted coefficients. +#' 2. `crank`: same as `lp`. +#' 3. `distr`: a survival matrix in two dimensions, where observations are +#' represented in rows and time points in columns. +#' The distribution `distr` is composed using the `lp` predictions and specifying +#' a model form in the `form` hyper-parameter, see Details. +#' The survival matrix uses the unique time points from the training set. +#' The parameter `ntime` allows to adjust the granularity of these time points +#' to any number (e.g. `150`). #' #' @section Custom mlr3 parameters: #' - `discrete` determines the class of the returned survival probability -#' distribution. If `FALSE` (default) continuous probability -#' distributions are returned using [distr6::VectorDistribution], otherwise -#' [distr6::Matdist] (faster to calculate survival measures that require a -#' `distr` prediction type). +#' distribution. If `FALSE`, vectorized continuous probability distributions are +#' returned using [distr6::VectorDistribution], otherwise a [distr6::Matdist] +#' object, which is faster for calculating survival measures that require a `distr` +#' prediction type. Default option is `TRUE`. #' #' @template learner #' @templateVar id surv.parametric #' @template install_survivalmodels #' #' @details -#' This learner allows you to choose a distribution and a model form to compose -#' a predicted survival probability distribution. +#' This learner allows you to choose a **distribution** and a **model form** to compose +#' a predicted survival probability distribution \eqn{S(t)}. #' #' The predict method is implemented in [survivalmodels::predict.parametric()]. #' Our implementation is more efficient for composition to distributions than #' [survival::predict.survreg()]. #' -#' Three types of prediction are returned for this learner: -#' 1. `lp`: a vector of linear predictors (relative risk scores), one per test -#' observation. -#' `lp` is predicted using the formula \eqn{lp = X\beta} where \eqn{X} are the -#' variables in the test data set and \eqn{\beta} are the fitted coefficients. -#' 2. `crank`: same as `lp`. -#' 3. `distr`: a survival matrix in two dimensions, where observations are -#' represented in rows and time points in columns. -#' The distribution `distr` is composed using the `lp` predictions and specifying -#' a model form in the `form` hyper-parameter. These are as follows, with respective -#' survival functions: +#' The available model forms with their respective survival functions, are as follows: #' #' - Accelerated Failure Time (`aft`) \deqn{S(t) = S_0(\frac{t}{exp(lp)})}{S(t) = S0(t/exp(lp))} #' - Proportional Hazards (`ph`) \deqn{S(t) = S_0(t)^{exp(lp)}}{S(t) = S0(t)^exp(lp)} @@ -45,7 +50,7 @@ #' #' where \eqn{S_0}{S0} is the estimated baseline survival distribution (in #' this case with a given parametric form), \eqn{lp} is the predicted linear -#' predictor, \eqn{\Phi} is the cdf of a N(0, 1) distribution, and \eqn{s} is +#' predictor, \eqn{\Phi} is the cdf of a \eqn{N(0, 1)} distribution, and \eqn{s} is #' the fitted scale parameter. #' #' Whilst any combination of distribution and model form is possible, this does @@ -91,15 +96,17 @@ LearnerSurvParametric = R6Class("LearnerSurvParametric", robust = p_lgl(default = FALSE, tags = "train"), score = p_lgl(default = FALSE, tags = "train"), cluster = p_uty(tags = "train"), - discrete = p_lgl(tags = c("required", "predict")) + discrete = p_lgl(tags = c("required", "predict")), + ntime = p_int(lower = 1, default = NULL, special_vals = list(NULL), tags = "predict"), + round_time = p_int(default = 2, lower = 0, special_vals = list(FALSE), tags = "predict") ) - ps$values = list(discrete = FALSE, dist = "weibull", form = "aft") + ps$values = list(discrete = TRUE, dist = "weibull", form = "aft") super$initialize( id = "surv.parametric", param_set = ps, - predict_types = c("crank", "distr", "lp"), + predict_types = c("crank", "lp", "distr"), feature_types = c("logical", "integer", "numeric", "factor"), properties = "weights", packages = c("mlr3extralearners", "survival", "pracma"), @@ -128,13 +135,18 @@ LearnerSurvParametric = R6Class("LearnerSurvParametric", .predict = function(task) { pv = self$param_set$get_values(tags = "predict") - newdata = as.data.frame(ordered_features(task, self)) + # use train set times (`y` is a `Surv` object) + times = as.numeric(self$model$model$y[, 1L]) + # coerce times points to an `ntime` grid + times = gridify_times(times, pv$ntime) + pred = invoke( predict, self$model, newdata = newdata, + times = times, distr6 = !pv$discrete, type = "all", .args = pv diff --git a/R/learner_survivalmodels_surv_pchazard.R b/R/learner_survivalmodels_surv_pchazard.R index 64594f121..677dfdecd 100644 --- a/R/learner_survivalmodels_surv_pchazard.R +++ b/R/learner_survivalmodels_surv_pchazard.R @@ -7,6 +7,13 @@ #' predictions of a discrete hazard function, also known as Nnet-Survival. #' Calls [survivalmodels::pchazard()] from package 'survivalmodels'. #' +#' @section Prediction types: +#' This learner returns two prediction types: +#' 1. `distr`: a survival matrix in two dimensions, where observations are +#' represented in rows and time points in columns. +#' Calculated using the internal [survivalmodels::predict.pycox()] function. +#' 2. `crank`: the expected mortality using [survivalmodels::surv_to_risk()]. +#' #' @template learner #' @templateVar id surv.pchazard #' diff --git a/R/learner_survivalsvm_surv_svm.R b/R/learner_survivalsvm_surv_svm.R index e4c4dcf60..0bb289e82 100644 --- a/R/learner_survivalsvm_surv_svm.R +++ b/R/learner_survivalsvm_surv_svm.R @@ -9,6 +9,13 @@ #' @template learner #' @templateVar id surv.svm #' +#' @section Prediction types: +#' This learner returns up to two prediction types: +#' 1. `crank`: a vector containing the continuous ranking scores, where each score +#' corresponds to a specific test observation. +#' 2. `response`: the survival time of each test observation, equal to `-crank`. +#' This prediction type if only available for `"type"` equal to `regression` or `hybrid`. +#' #' @details #' Four possible SVMs can be implemented, dependent on the `type` parameter. These correspond #' to predicting the survival time via regression (`regression`), predicting a continuous rank @@ -18,6 +25,11 @@ #' #' `makediff3` is recommended when using `type = "hybrid"`. #' +#' @section Custom mlr3 parameters: +#' +#' - `gamma`, `mu` have replaced `gamma.mu` so that it's easier to tune these separately. +#' `mu` is only used when `type = "hybrid"`. +#' #' @references #' `r format_bib("van2011improved", "van2011support", "shivaswamy2007support")` #' @@ -25,7 +37,7 @@ #' @examplesIf mlr3misc::require_namespaces(c("mlr3extralearners", "survivalsvm"), quietly = TRUE) #' set.seed(123) #' # Define the Learner and set parameter values -#' learner = lrn("surv.svm", gamma.mu = 0.1) +#' learner = lrn("surv.svm", gamma = 0.1) #' print(learner) #' #' # Define a Task @@ -61,7 +73,8 @@ LearnerSurvSVM = R6Class("LearnerSurvSVM", diff.meth = p_fct( levels = c("makediff1", "makediff2", "makediff3"), tags = c("train")), - gamma.mu = p_uty(tags = c("train", "required")), + gamma = p_dbl(default = NULL, special_vals = list(NULL), tags = "train"), + mu = p_dbl(default = NULL, special_vals = list(NULL), tags = "train"), opt.meth = p_fct( default = "quadprog", levels = c("quadprog", "ipop"), tags = "train"), @@ -89,37 +102,55 @@ LearnerSurvSVM = R6Class("LearnerSurvSVM", predict_types = c("crank", "response"), param_set = ps, man = "mlr3extralearners::mlr_learners_surv.svm", - label = "Support Vector Machine" + label = "Survival Support Vector Machine" ) } ), private = list( .train = function(task) { + pars = self$param_set$get_values(tags = "train") + + # Regularization parameters are defined separately to be tuned more easily + gamma = pars$gamma + mu = pars$mu + type = pars$type + if (!is.null(type) && type == "hybrid") { + # a vector with two parameters is required when `type` = "hybrid" + gamma.mu = c(gamma, mu) + } else { + gamma.mu = gamma + } + # remove `gamma` and `mu` + pars$gamma = NULL + pars$mu = NULL + with_package("survivalsvm", { invoke(survivalsvm::survivalsvm, formula = task$formula(), data = task$data(), - .args = self$param_set$get_values(tags = "train")) + gamma.mu = gamma.mu, # pass `gamma.mu` separately + .args = pars) }) }, .predict = function(task) { pars = self$param_set$get_values(tags = "predict") - fit = predict(self$model, newdata = ordered_features(task, self), - .args = pars - ) + fit = invoke(predict, + self$model, + newdata = ordered_features(task, self), + .args = pars) crank = as.numeric(fit$predicted) - if (is.null(self$param_set$values$type) || - (self$param_set$values$type %in% c("regression", "hybrid"))) { - # higher survival time = lower risk + type = self$param_set$values$type + if (is.null(type) || (type %in% c("regression", "hybrid"))) { + # ranking is like survival time response = crank } else { response = NULL } - # higher rank = higher risk + # higher continuous ranking = lower survival time list(crank = -crank, response = response) } ) diff --git a/R/learner_xgboost_surv_xgboost_aft.R b/R/learner_xgboost_surv_xgboost_aft.R index da65b1c6b..06eb4b569 100644 --- a/R/learner_xgboost_surv_xgboost_aft.R +++ b/R/learner_xgboost_surv_xgboost_aft.R @@ -8,7 +8,7 @@ #' Calls [xgboost::xgb.train()] from package \CRANpkg{xgboost} with `objective` #' set to `survival:aft` and `eval_metric` to `aft-nloglik`. #' -#' @details +#' @section Prediction types: #' This learner returns three prediction types: #' 1. `response`: the estimated survival time \eqn{T} for each test observation. #' 2. `lp`: a vector of linear predictors (relative risk scores), one per @@ -26,7 +26,6 @@ #' @template section_early_stopping #' @templateVar id surv.xgboost.aft #' @template learner -#' @template section_early_stopping #' #' @references #' `r format_bib("chen_2016", "barnwal2022")` diff --git a/R/learner_xgboost_surv_xgboost_cox.R b/R/learner_xgboost_surv_xgboost_cox.R index 2060b1aac..be6b4f935 100644 --- a/R/learner_xgboost_surv_xgboost_cox.R +++ b/R/learner_xgboost_surv_xgboost_cox.R @@ -8,7 +8,7 @@ #' Calls [xgboost::xgb.train()] from package \CRANpkg{xgboost} with `objective` #' set to `survival:cox` and `eval_metric` to `cox-nloglik`. #' -#' @details +#' @section Prediction types: #' Three types of prediction are returned for this learner: #' 1. `lp`: a vector of linear predictors (relative risk scores), one per #' observation. diff --git a/man-roxygen/example.R b/man-roxygen/example.R index c7869189c..7ab50476b 100644 --- a/man-roxygen/example.R +++ b/man-roxygen/example.R @@ -2,13 +2,13 @@ pkgs = setdiff(mlr3::lrn(id)$packages, c("mlr3", "mlr3learners")) l = lrn(id) task_id = if ("LearnerClassif" %in% class(l)) { - "sonar" +"sonar" } else if ("LearnerRegr" %in% class(l)) { - "mtcars" +"mtcars" } else if ("LearnerSurv" %in% class(l)) { - "grace" +"grace" } else if ("LearnerDens" %in% class(l)) { - "faithful" +"faithful" } %> #' @@ -27,7 +27,7 @@ task_id = if ("LearnerClassif" %in% class(l)) { #' learner$train(task, row_ids = ids$train) #' #' print(learner$model) -#' <%= if("importance" %in% l$properties) "print(learner$importance)" %> +#' <%= if("importance" %in% l$properties) "print(learner$importance())" %> #' #' # Make predictions for the test rows #' predictions = learner$predict(task, row_ids = ids$test) diff --git a/man/mlr_learners_classif.AdaBoostM1.Rd b/man/mlr_learners_classif.AdaBoostM1.Rd index 1fc0f3b11..34ebbbfc2 100644 --- a/man/mlr_learners_classif.AdaBoostM1.Rd +++ b/man/mlr_learners_classif.AdaBoostM1.Rd @@ -6,7 +6,7 @@ \title{Classification AdaBoostM1 Learner} \description{ Adaptive boosting algorithm for classification. -Calls \code{\link[RWeka:AdaBoostM1]{RWeka::AdaBoostM1()}} from \CRANpkg{RWeka}. +Calls \code{\link[RWeka:Weka_classifier_meta]{RWeka::AdaBoostM1()}} from \CRANpkg{RWeka}. } \section{Dictionary}{ @@ -127,6 +127,7 @@ henrifnk
Inherited methods