diff --git a/.Rbuildignore b/.Rbuildignore index 8c1f77aa..6eebfb9c7 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -12,3 +12,5 @@ ^\.github$ ^vignettes/articles-online-only$ ^release-prep\.R$ +^doc$ +^Meta$ diff --git a/.gitignore b/.gitignore index db3f1c32..9f0bb3a7 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,5 @@ inst/doc dev-helpers.R release-prep.R +/doc/ +/Meta/ diff --git a/DESCRIPTION b/DESCRIPTION index 3d55cb48..7ce3e65e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -29,7 +29,7 @@ URL: https://mc-stan.org/cmdstanr/, https://discourse.mc-stan.org BugReports: https://github.com/stan-dev/cmdstanr/issues Encoding: UTF-8 LazyData: true -RoxygenNote: 7.3.0 +RoxygenNote: 7.3.1 Roxygen: list(markdown = TRUE, r6 = FALSE) SystemRequirements: CmdStan (https://mc-stan.org/users/interfaces/cmdstan) Depends: diff --git a/NAMESPACE b/NAMESPACE index c8a6217d..88f5380f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -26,6 +26,7 @@ export(read_cmdstan_csv) export(read_sample_csv) export(rebuild_cmdstan) export(register_knitr_engine) +export(remaining_columns_to_read) export(set_cmdstan_path) export(set_num_threads) export(write_stan_file) diff --git a/R/args.R b/R/args.R index 7b4a8686..436b8df1 100644 --- a/R/args.R +++ b/R/args.R @@ -1,5 +1,24 @@ # CmdStanArgs ------------------------------------------------------------- +#' Returns the number of inner processes for a given method +#' @param args An args object +#' @noRd +get_num_inner_processes <- function(args) { + x <- NULL + if (inherits(args, "SampleArgs")) { + x <- args$chains + } else if (inherits(args, "PathfinderArgs")) { + x <- args$num_paths + } else if (inherits(args, "GenerateQuantitiesArgs")) { + x <- length(args$fitted_params) + } + if (!is.null(x)) { + return(x) + } else { + return(1) + } +} + #' Internal objects for storing CmdStan arguments #' #' These objects store arguments for creating the call to CmdStan and provide a @@ -43,7 +62,7 @@ CmdStanArgs <- R6::R6Class( sig_figs = NULL, opencl_ids = NULL, model_variables = NULL, - num_threads = NULL) { + threads = NULL) { self$model_name <- model_name self$stan_code <- stan_code @@ -75,17 +94,18 @@ CmdStanArgs <- R6::R6Class( self$output_dir <- output_dir %||% tempdir(check = TRUE) } } + num_inner_processes <- self$num_inner_processes() self$output_dir <- repair_path(self$output_dir) self$output_basename <- output_basename - if (is.function(init)) { - init <- process_init_function(init, length(self$proc_ids), model_variables) - } else if (is.list(init) && !is.data.frame(init)) { - init <- process_init_list(init, length(self$proc_ids), model_variables) - } + init <- process_init(init, num_inner_processes, model_variables) self$init <- init self$opencl_ids <- opencl_ids - self$num_threads = NULL - self$method_args$validate(num_procs = length(self$proc_ids)) + if (is.null(threads)) { + self$num_threads = 1 + } else { + self$num_threads = threads + } + self$method_args$validate(num_procs = num_inner_processes) self$validate() }, validate = function() { @@ -98,9 +118,12 @@ CmdStanArgs <- R6::R6Class( self$init <- absolute_path(self$init) } self$init <- maybe_recycle_init(self$init, length(self$proc_ids)) - self$seed <- maybe_generate_seed(self$seed, length(self$proc_ids)) + self$seed <- maybe_generate_seed(self$seed, self$num_inner_processes()) invisible(self) }, + num_inner_processes = function() { + return(get_num_inner_processes(self$method_args)) + }, new_file_names = function(type = c("output", "diagnostic", "profile")) { basename <- self$model_name @@ -113,17 +136,35 @@ CmdStanArgs <- R6::R6Class( if (type == "output" && !is.null(self$output_basename)) { basename <- self$output_basename } - generate_file_names( + if (type %in% c("output", "diagnostic")) { + id_vals = seq_len(self$num_inner_processes()) + } else { + id_vals = NULL + } + new_base_name = generate_file_names( basename = basename, - ext = ".csv", - ids = self$proc_ids, + ext = "", + ids = NULL, timestamp = is.null(self$output_basename), random = is.null(self$output_basename) ) + # Strip off "." at end of file name + new_base_name = strtrim(new_base_name, nchar(new_base_name) - 1) + if (is.null(id_vals)) { + sep = "" + } else { + sep = "_" + } + new_full_names = paste0(new_base_name, sep, id_vals, ".csv") + new_base_name = paste0(new_base_name, ".csv") + return(list(new_base_name, new_full_names)) + }, new_files = function(type = c("output", "diagnostic", "profile")) { - files <- file.path(self$output_dir, self$new_file_names(type)) - files + file_paths = self$new_file_names(type) + file_paths[[1]] <- file.path(self$output_dir, file_paths[[1]]) + file_paths[[2]] <- file.path(self$output_dir, file_paths[[2]]) + return(file_paths) }, #' Compose all arguments to pass to CmdStan @@ -159,11 +200,13 @@ CmdStanArgs <- R6::R6Class( if (!is.null(self$init)) { args$init <- paste0("init=", wsl_safe_path(self$init[idx])) } + if (!is.null(self$num_threads)) { + args$num_threads <- c(paste0("num_threads=", self$num_threads)) + } if (!is.null(self$data_file)) { args$data <- c("data", paste0("file=", wsl_safe_path(self$data_file))) } - args$output <- c("output", paste0("file=", wsl_safe_path(output_file))) if (!is.null(latent_dynamics_file)) { args$output <- c(args$output, paste0("diagnostic_file=", wsl_safe_path(latent_dynamics_file))) @@ -182,9 +225,6 @@ CmdStanArgs <- R6::R6Class( if (!is.null(self$opencl_ids)) { args$opencl <- c("opencl", paste0("platform=", self$opencl_ids[1]), paste0("device=", self$opencl_ids[2])) } - if (!is.null(self$num_threads)) { - num_threads <- c(args$output, paste0("num_threads=", self$num_threads)) - } args <- do.call(c, append(args, list(use.names = FALSE))) self$method_args$compose(idx, args) }, @@ -217,8 +257,12 @@ SampleArgs <- R6::R6Class( term_buffer = NULL, window = NULL, fixed_param = FALSE, - diagnostics = NULL) { - + diagnostics = NULL, + chains = NULL) { + if (is.null(chains)) { + chains <- 1 + } + self$chains <- chains self$iter_warmup <- iter_warmup self$iter_sampling <- iter_sampling self$save_warmup <- save_warmup @@ -334,7 +378,8 @@ SampleArgs <- R6::R6Class( .make_arg("adapt_engaged"), .make_arg("init_buffer"), .make_arg("term_buffer"), - .make_arg("window") + .make_arg("window"), + .make_arg("chains", cmdstan_arg_name="num_chains") ) } new_args <- do.call(c, new_args) @@ -690,8 +735,15 @@ validate_cmdstan_args <- function(self) { if (!is.null(self$data_file)) { assert_file_exists(self$data_file, access = "r") } - num_procs <- length(self$proc_ids) - validate_init(self$init, num_procs) + + num_inner_procs <- self$num_inner_processes() + # GQ is the only method that still dispatches multiple processes + if (inherits(self$method_args, "GenerateQuantitiesArgs")) { + num_procs <- length(self$method_args$fixed_params) + } else { + num_procs <- 1 + } + validate_init(self$init, num_inner_procs) validate_seed(self$seed, num_procs) if (!is.null(self$opencl_ids)) { if (cmdstan_version() < "2.26") { @@ -1018,17 +1070,62 @@ validate_exe_file <- function(exe_file) { invisible(TRUE) } +#' Generic for processing inits +#' @noRd +process_init <- function(...) { + UseMethod("process_init") +} + +#' Default method +#' @noRd +process_init.default <- function(x, ...) { + return(x) +} + +#' Write initial values to files if provided as posterior `draws` object +#' @noRd +#' @param init A type that inherits the `posterior::draws` class. +#' @param num_procs Number of inits requested +#' @param model_variables A list of all parameters with their types and +#' number of dimensions. Typically the output of model$variables(). +#' @param warn_partial Should a warning be thrown if inits are only specified +#' for a subset of parameters? Can be controlled by global option +#' `cmdstanr_warn_inits`. +#' @return A character vector of file paths. +process_init.draws <- function(init, num_procs, model_variables = NULL, + warn_partial = getOption("cmdstanr_warn_inits", TRUE)) { + if (!is.null(model_variables)) { + variable_names = names(model_variables$parameters) + } else { + variable_names = colnames(draws)[!grepl("__", colnames(draws))] + } + draws <- posterior::subset_draws(init, variable = variable_names) + draws <- posterior::resample_draws(draws, ndraws = num_procs, + method ="simple_no_replace") + draws_rvar = posterior::as_draws_rvars(draws) + inits = lapply(1:num_procs, \(draw_iter) { + init_i = lapply(variable_names, \(var_name) { + x = drop(posterior::draws_of(drop( + posterior::subset_draws(draws_rvar[[var_name]], draw=draw_iter)))) + return(x) + }) + names(init_i) = variable_names + return(init_i) + }) + return(process_init(inits, num_procs, model_variables, warn_partial)) +} + #' Write initial values to files if provided as list of lists #' @noRd #' @param init List of init lists. -#' @param num_procs Number of CmdStan processes. +#' @param num_procs Number of inits needed. #' @param model_variables A list of all parameters with their types and #' number of dimensions. Typically the output of model$variables(). #' @param warn_partial Should a warning be thrown if inits are only specified #' for a subset of parameters? Can be controlled by global option #' `cmdstanr_warn_inits`. #' @return A character vector of file paths. -process_init_list <- function(init, num_procs, model_variables = NULL, +process_init.list <- function(init, num_procs, model_variables = NULL, warn_partial = getOption("cmdstanr_warn_inits", TRUE)) { if (!all(sapply(init, function(x) is.list(x) && !is.data.frame(x)))) { stop("If 'init' is a list it must be a list of lists.", call. = FALSE) @@ -1083,10 +1180,11 @@ process_init_list <- function(init, num_procs, model_variables = NULL, } init_paths <- tempfile( - pattern = paste0("init-", seq_along(init), "-"), + pattern = "init-", tmpdir = cmdstan_tempdir(), - fileext = ".json" + fileext = "" ) + init_paths <- paste0(init_paths, "_", seq_along(init), ".json") for (i in seq_along(init)) { write_stan_json(init[[i]], init_paths[i]) } @@ -1096,11 +1194,12 @@ process_init_list <- function(init, num_procs, model_variables = NULL, #' Write initial values to files if provided as function #' @noRd #' @param init Function generating a single list of initial values. -#' @param num_procs Number of CmdStan processes. +#' @param num_procs Number of inits needed. #' @param model_variables A list of all parameters with their types and #' number of dimensions. Typically the output of model$variables(). #' @return A character vector of file paths. -process_init_function <- function(init, num_procs, model_variables = NULL) { +process_init.function <- function(init, num_procs, model_variables = NULL, + warn_partial = getOption("cmdstanr_warn_inits", TRUE)) { args <- formals(init) if (is.null(args)) { fn_test <- init() @@ -1116,9 +1215,158 @@ process_init_function <- function(init, num_procs, model_variables = NULL) { if (!is.list(fn_test) || is.data.frame(fn_test)) { stop("If 'init' is a function it must return a single list.") } - process_init_list(init_list, num_procs, model_variables) + process_init(init_list, num_procs, model_variables) +} + +#' Write initial values to files if provided as a `CmdStanMCMC` class +#' @noRd +#' @param init A `CmdStanMCMC` class +#' @param num_procs Number of inits requested +#' @param model_variables A list of all parameters with their types and +#' number of dimensions. Typically the output of model$variables(). +#' @param warn_partial Should a warning be thrown if inits are only specified +#' for a subset of parameters? Can be controlled by global option +#' `cmdstanr_warn_inits`. +#' @return A character vector of file paths. +process_init.CmdStanMCMC <- function(init, num_procs, model_variables = NULL, + warn_partial = getOption("cmdstanr_warn_inits", TRUE)) { + # Convert from data.table to data.frame + if (init$return_codes() == 1) { + stop("We are unable to create initial values from a model with no samples. Please check the results of the model used for inits before continuing.") + } else if (!any(names(model_variables$parameters) %in% init$metadata()$stan_variables)) { + stop("None of the names of the parameters for the model used for initial values match the names of parameters from the model currently running.") + } + draws_df = init$draws(format = "df") + if (is.null(model_variables)) { + model_variables = list(parameters = colnames(draws_df)[2:(length(colnames(draws_df)) - 3)]) + } + init_draws_df = posterior::resample_draws(draws_df, ndraws = num_procs, + method = "simple_no_replace") + init_draws_lst = process_init(init_draws_df, + num_procs = num_procs, model_variables = model_variables) + return(init_draws_lst) } +#' Performs PSIS resampling on the draws from an approxmation method for inits. +#' @noRd +#' @param init A set of draws with `lp__` and `lp_approx__` columns. +#' @param num_procs Number of inits requested +#' @param model_variables A list of all parameters with their types and +#' number of dimensions. Typically the output of model$variables(). +#' @param warn_partial Should a warning be thrown if inits are only specified +#' for a subset of parameters? Can be controlled by global option +#' `cmdstanr_warn_inits`. +#' @return A character vector of file paths. +process_init_approx <- function(init, num_procs, model_variables = NULL, + warn_partial = getOption("cmdstanr_warn_inits", TRUE)) { + # Convert from data.table to data.frame + if (init$return_codes() == 1) { + stop("We are unable to create initial values from a model with no samples. Please check the results of the model used for inits before continuing.") + } else if (!any(names(model_variables$parameters) %in% init$metadata()$stan_variables)) { + stop("None of the names of the parameters for the model used for initial values match the names of parameters from the model currently running.") + } + draws_df = init$draws(format = "df") + if (is.null(model_variables)) { + model_variables = list(parameters = colnames(draws_df)[3:(length(colnames(draws_df)) - 3)]) + } + draws_df$lw = draws_df$lp__ - draws_df$lp_approx__ + # Calculate unique draws based on 'lw' using base R functions + unique_draws = length(unique(draws_df$lw)) + if (num_procs > unique_draws) { + stop(paste0("Not enough distinct draws (", num_procs, ") to create inits.")) + } + if (unique_draws < (0.95 * nrow(draws_df))) { + temp_df = aggregate(.draw ~ lw, data = draws_df, FUN = min) + draws_df = posterior::as_draws_df(merge(temp_df, draws_df, by = 'lw')) + draws_df$pareto_weight = exp(draws_df$lw - max(draws_df$lw)) + } else { + draws_df$pareto_weight = posterior::pareto_smooth( + exp(draws_df$lw - max(draws_df$lw)), tail = "right")[["x"]] + } + init_draws_df = posterior::resample_draws(draws_df, ndraws = num_procs, + weights = draws_df$pareto_weight, method = "simple_no_replace") + init_draws_lst = process_init(init_draws_df, + num_procs = num_procs, model_variables = model_variables, warn_partial) + return(init_draws_lst) +} + + +#' Write initial values to files if provided as a `CmdStanPathfinder` class +#' @noRd +#' @param init A `CmdStanPathfinder` class +#' @param num_procs Number of inits requested +#' @param model_variables A list of all parameters with their types and +#' number of dimensions. Typically the output of model$variables(). +#' @param warn_partial Should a warning be thrown if inits are only specified +#' for a subset of parameters? Can be controlled by global option +#' `cmdstanr_warn_inits`. +#' @return A character vector of file paths. +process_init.CmdStanPathfinder <- function(init, num_procs, model_variables = NULL, + warn_partial = getOption("cmdstanr_warn_inits", TRUE)) { + process_init_approx(init, num_procs, model_variables, warn_partial) +} + +#' Write initial values to files if provided as a `CmdStanVB` class +#' @noRd +#' @param init A `CmdStanVB` class +#' @param num_procs Number of inits requested +#' @param model_variables A list of all parameters with their types and +#' number of dimensions. Typically the output of model$variables(). +#' @param warn_partial Should a warning be thrown if inits are only specified +#' for a subset of parameters? Can be controlled by global option +#' `cmdstanr_warn_inits`. +#' @return A character vector of file paths. +process_init.CmdStanVB <- function(init, num_procs, model_variables = NULL, + warn_partial = getOption("cmdstanr_warn_inits", TRUE)) { + process_init_approx(init, num_procs, model_variables, warn_partial) +} + +#' Write initial values to files if provided as a `CmdStanLaplace` class +#' @noRd +#' @param init A `CmdStanLaplace` class +#' @param num_procs Number of inits requested +#' @param model_variables A list of all parameters with their types and +#' number of dimensions. Typically the output of model$variables(). +#' @param warn_partial Should a warning be thrown if inits are only specified +#' for a subset of parameters? Can be controlled by global option +#' `cmdstanr_warn_inits`. +#' @return A character vector of file paths. +process_init.CmdStanLaplace <- function(init, num_procs, model_variables = NULL, + warn_partial = getOption("cmdstanr_warn_inits", TRUE)) { + process_init_approx(init, num_procs, model_variables, warn_partial) +} + + +#' Write initial values to files if provided as a `CmdStanMLE` class +#' @noRd +#' @param init A `CmdStanMLE` class +#' @param num_procs Number of inits requested +#' @param model_variables A list of all parameters with their types and +#' number of dimensions. Typically the output of model$variables(). +#' @param warn_partial Should a warning be thrown if inits are only specified +#' for a subset of parameters? Can be controlled by global option +#' `cmdstanr_warn_inits`. +#' @return A character vector of file paths. +process_init.CmdStanMLE <- function(init, num_procs, model_variables = NULL, + warn_partial = getOption("cmdstanr_warn_inits", TRUE)) { + # Convert from data.table to data.frame + if (init$return_codes() == 1) { + stop("We are unable to create initial values from a model with no samples. Please check the results of the model used for inits before continuing.") + } else if (!any(names(model_variables$parameters) %in% init$metadata()$stan_variables)) { + stop("None of the names of the parameters for the model used for initial values match the names of parameters from the model currently running.") + } + draws_df = init$draws(format = "df") + if (is.null(model_variables)) { + model_variables = list(parameters = colnames(draws_df)[2:(length(colnames(draws_df)) - 3)]) + } + init_draws_df = posterior::resample_draws(draws_df, ndraws = num_procs, + method = "simple") + init_draws_lst_lst = process_init(init_draws_df, + num_procs = num_procs, model_variables = model_variables, warn_partial) + return(init_draws_lst_lst) +} + + #' Validate initial values #' #' For CmdStan `init` must be `NULL`, a single real number >= 0, or paths to @@ -1129,6 +1377,7 @@ process_init_function <- function(init, num_procs, model_variables = NULL) { #' @param num_procs Number of CmdStan processes (number of chains if MCMC) #' @return Either throws an error or returns `invisible(TRUE)`. validate_init <- function(init, num_procs) { + if (is.null(init)) { return(invisible(TRUE)) } @@ -1184,8 +1433,8 @@ validate_seed <- function(seed, num_procs) { } checkmate::assert_integerish(seed, lower = lower_seed) if (length(seed) > 1 && length(seed) != num_procs) { - stop("If 'seed' is specified it must be a single integer or one per chain.", - call. = FALSE) + stop("If 'seed' is specified it must be a single integer or one per generated quantities chain.", + call. = FALSE) } invisible(TRUE) } @@ -1197,9 +1446,11 @@ validate_seed <- function(seed, num_procs) { #' @return An integer vector of length `num_procs`. maybe_generate_seed <- function(seed, num_procs) { if (is.null(seed)) { - seed <- base::rep(base::sample(.Machine$integer.max, 1), num_procs) + seed <- base::sample(.Machine$integer.max, num_procs) } else if (length(seed) == 1 && num_procs > 1) { - seed <- base::rep(as.integer(seed), num_procs) + seed <- rep(as.integer(seed), num_procs) + } else if (length(seed) != num_procs) { + stop("If 'seed' is specified as a vector it must have length 1 or number of chains.") } seed } diff --git a/R/cmdstanr-package.R b/R/cmdstanr-package.R index 305241bf..d1f34bf1 100644 --- a/R/cmdstanr-package.R +++ b/R/cmdstanr-package.R @@ -30,6 +30,6 @@ #' @inherit cmdstan_model examples #' @import R6 #' -NULL +"_PACKAGE" if (getRversion() >= "2.15.1") utils::globalVariables(c("self", "private", "super")) diff --git a/R/csv.R b/R/csv.R index f2e674f8..e2087976 100644 --- a/R/csv.R +++ b/R/csv.R @@ -166,10 +166,13 @@ read_cmdstan_csv <- function(files, if (!is.null(csv_metadata[[1]]$time)) { time <- rbind(time, csv_metadata[[1]]$time) } + # Cmdstan Bug #1257, id is wrong so we need to assume it's id..N + # Since cmdstan and cmdstanr assume file ids are sequential this should be fine if (length(csv_metadata) > 1) { + id_vals = id + 1:length(csv_metadata) for (file_id in 2:length(csv_metadata)) { file_metadata <- csv_metadata[[file_id]] - id <- file_metadata$id + id <- id_vals[file_id - 1] csv_metadata[[1]]$id <- c(csv_metadata[[1]]$id, id) csv_metadata[[1]]$seed <- c(csv_metadata[[1]]$seed, file_metadata$seed) csv_metadata[[1]]$init <- c(csv_metadata[[1]]$init, file_metadata$init) @@ -362,6 +365,8 @@ read_cmdstan_csv <- function(files, } else { post_warmup_sampler_diagnostics <- NULL } + # Fix: Cmdstan Bug #1257 delete once fixed + time$chain_id = seq_len(nrow(time)) list( metadata = metadata, time = list(total = NA_integer_, chains = time), @@ -922,6 +927,7 @@ unrepair_variable_names <- function(names) { names } +#' @export remaining_columns_to_read <- function(requested, currently_read, all) { if (is.null(requested)) { if (is.null(all)) { diff --git a/R/fit.R b/R/fit.R index 99feca4c..4ffe3bfa 100644 --- a/R/fit.R +++ b/R/fit.R @@ -40,7 +40,7 @@ CmdStanFit <- R6::R6Class( invisible(self) }, num_procs = function() { - self$runset$num_procs() + self$runset$args$num_inner_processes() }, print = function(variables = NULL, ..., digits = 2, max_rows = getOption("cmdstanr_max_rows", 10)) { if (is.null(private$draws_) && @@ -518,11 +518,11 @@ unconstrain_variables <- function(variables) { " not provided!", call. = FALSE) } - # Remove zero-length parameters from model_variables, otherwise process_init_list + # Remove zero-length parameters from model_variables, otherwise process_init # warns about missing inputs model_variables$parameters <- model_variables$parameters[nonzero_length_params] - stan_pars <- process_init_list(list(variables), num_procs = 1, model_variables) + stan_pars <- process_init(list(variables), num_procs = 1, model_variables) private$model_methods_env_$unconstrain_variables(private$model_methods_env_$model_ptr_, stan_pars) } CmdStanFit$set("public", name = "unconstrain_variables", value = unconstrain_variables) @@ -594,7 +594,7 @@ unconstrain_draws <- function(files = NULL, draws = NULL, # but not in metadata()$variables nonzero_length_params <- names(model_variables$parameters) %in% model_par_names - # Remove zero-length parameters from model_variables, otherwise process_init_list + # Remove zero-length parameters from model_variables, otherwise process_init # warns about missing inputs pars <- names(model_variables$parameters[nonzero_length_params]) @@ -1347,11 +1347,7 @@ CmdStanMCMC <- R6::R6Class( }, # override the CmdStanFit output method output = function(id = NULL) { - if (is.null(id)) { self$runset$procs$proc_output() - } else { - cat(paste(self$runset$procs$proc_output(id), collapse = "\n")) - } }, # override the CmdStanFit draws method @@ -1767,7 +1763,7 @@ CmdStanMCMC$set("public", name = "inv_metric", value = inv_metric) #' } #' num_chains <- function() { - super$num_procs() + self$runset$args$method_args$chains } CmdStanMCMC$set("public", name = "num_chains", value = num_chains) diff --git a/R/model.R b/R/model.R index 289558f0..3644910b 100644 --- a/R/model.R +++ b/R/model.R @@ -63,7 +63,7 @@ #' data = stan_data, #' seed = 123, #' chains = 2, -#' parallel_chains = 2 +#' threads = 2 #' ) #' #' # Use 'posterior' package for summaries @@ -341,6 +341,9 @@ CmdStanModel <- R6::R6Class( "- ", new_hpp_loc) private$hpp_file_ <- new_hpp_loc invisible(private$hpp_file_) + }, + threads_enabled = function() { + return(as.logical(private$cpp_options_[["STAN_THREADS"]])) } ) ) @@ -414,7 +417,6 @@ CmdStanModel <- R6::R6Class( #' [`$expose_functions()`][model-method-expose_functions] method. #' @param dry_run (logical) If `TRUE`, the code will do all checks before compilation, #' but skip the actual C++ compilation. Used to speedup tests. -#' #' @param threads Deprecated and will be removed in a future release. Please #' turn on threading via `cpp_options = list(stan_threads = TRUE)` instead. #' @@ -461,7 +463,7 @@ compile <- function(quiet = TRUE, pedantic = FALSE, include_paths = NULL, user_header = NULL, - cpp_options = list(), + cpp_options = list(stan_threads = os_use_single_process()), stanc_options = list(), force_recompile = getOption("cmdstanr_force_recompile", default = FALSE), compile_model_methods = FALSE, @@ -1127,9 +1129,8 @@ sample <- function(data = NULL, output_basename = NULL, sig_figs = NULL, chains = 4, - parallel_chains = getOption("mc.cores", 1), chain_ids = seq_len(chains), - threads_per_chain = NULL, + threads = getOption("mc.cores", 1), opencl_ids = NULL, iter_warmup = NULL, iter_sampling = NULL, @@ -1155,18 +1156,24 @@ sample <- function(data = NULL, num_chains = NULL, num_warmup = NULL, num_samples = NULL, + threads_per_chain = NULL, + parallel_chains = NULL, validate_csv = NULL, save_extra_diagnostics = NULL, max_depth = NULL, stepsize = NULL) { # temporary deprecation warnings if (!is.null(cores)) { - warning("'cores' is deprecated. Please use 'parallel_chains' instead.") - parallel_chains <- cores + warning("'cores' is deprecated. Please use 'threads' instead.") + threads <- cores } if (!is.null(num_cores)) { - warning("'num_cores' is deprecated. Please use 'parallel_chains' instead.") - parallel_chains <- num_cores + warning("'num_cores' is deprecated. Please use 'threads' instead.") + threads <- num_cores + } + if (!is.null(parallel_chains)) { + warning("'parallel_chains' is deprecated. Please use 'threads' instead.") + threads <- parallel_chains } if (!is.null(num_chains)) { warning("'num_chains' is deprecated. Please use 'chains' instead.") @@ -1213,10 +1220,19 @@ sample <- function(data = NULL, if (fixed_param) { save_warmup <- FALSE } + if (self$threads_enabled()) { + num_procs = 1 + parallel_procs = 1 + threads_per_proc = threads + } else { + num_procs = chains + parallel_procs = chains + threads_per_proc = as.integer(threads / chains) + } procs <- CmdStanMCMCProcs$new( - num_procs = checkmate::assert_integerish(chains, lower = 1, len = 1), - parallel_procs = checkmate::assert_integerish(parallel_chains, lower = 1, null.ok = TRUE), - threads_per_proc = assert_valid_threads(threads_per_chain, self$cpp_options(), multiple_chains = TRUE), + num_procs = num_procs, + parallel_procs = parallel_procs, + threads_per_proc = assert_valid_threads(threads_per_proc, self$cpp_options(), multiple_chains = TRUE), show_stderr_messages = show_exceptions, show_stdout_messages = show_messages ) @@ -1240,7 +1256,8 @@ sample <- function(data = NULL, term_buffer = term_buffer, window = window, fixed_param = fixed_param, - diagnostics = diagnostics + diagnostics = diagnostics, + chains = chains ) args <- CmdStanArgs$new( method_args = sample_args, @@ -1260,7 +1277,8 @@ sample <- function(data = NULL, output_basename = output_basename, sig_figs = sig_figs, opencl_ids = assert_valid_opencl(opencl_ids, self$cpp_options()), - model_variables = model_variables + model_variables = model_variables, + threads = threads_per_proc ) runset <- CmdStanRun$new(args, procs) runset$run_cmdstan() @@ -1342,6 +1360,7 @@ sample_mpi <- function(data = NULL, iter_sampling = NULL, save_warmup = FALSE, thin = NULL, + threads = NULL, max_treedepth = NULL, adapt_engaged = TRUE, adapt_delta = NULL, @@ -1375,9 +1394,19 @@ sample_mpi <- function(data = NULL, chains <- 1 save_warmup <- FALSE } + if (self$threads_enabled()) { + num_procs = 1 + parallel_procs = 1 + threads_per_proc = threads + } else { + num_procs = chains + parallel_procs = chains + threads_per_proc = as.integer(threads / num_chains) + } procs <- CmdStanMCMCProcs$new( - num_procs = checkmate::assert_integerish(chains, lower = 1, len = 1), - parallel_procs = 1, + num_procs = num_procs, + parallel_procs = parallel_procs, + threads_per_proc = assert_valid_threads(threads_per_proc, self$cpp_options(), multiple_chains = TRUE), show_stderr_messages = show_exceptions, show_stdout_messages = show_messages ) @@ -1420,7 +1449,8 @@ sample_mpi <- function(data = NULL, output_dir = output_dir, output_basename = output_basename, sig_figs = sig_figs, - model_variables = model_variables + model_variables = model_variables, + threads = threads ) runset <- CmdStanRun$new(args, procs) runset$run_cmdstan_mpi(mpi_cmd, mpi_args) @@ -1447,10 +1477,6 @@ CmdStanModel$set("public", name = "sample_mpi", value = sample_mpi) #' metadata of an example model, e.g., #' `cmdstanr_example(method="optimize")$metadata()`. #' @template model-common-args -#' @param threads (positive integer) If the model was -#' [compiled][model-method-compile] with threading support, the number of -#' threads to use in parallelized sections (e.g., when -#' using the Stan functions `reduce_sum()` or `map_rect()`). #' @param iter (positive integer) The maximum number of iterations. #' @param algorithm (string) The optimization algorithm. One of `"lbfgs"`, #' `"bfgs"`, or `"newton"`. The control parameters below are only available @@ -1501,11 +1527,14 @@ optimize <- function(data = NULL, history_size = NULL, show_messages = TRUE, show_exceptions = TRUE) { + if (self$threads_enabled() && is.null(threads)) { + threads <- 1 + } procs <- CmdStanProcs$new( num_procs = 1, + threads_per_proc = assert_valid_threads(threads, self$cpp_options(), multiple_chains = TRUE), show_stderr_messages = show_exceptions, - show_stdout_messages = show_messages, - threads_per_proc = assert_valid_threads(threads, self$cpp_options()) + show_stdout_messages = show_messages ) model_variables <- NULL if (is_variables_method_supported(self)) { @@ -1541,7 +1570,8 @@ optimize <- function(data = NULL, output_basename = output_basename, sig_figs = sig_figs, opencl_ids = assert_valid_opencl(opencl_ids, self$cpp_options()), - model_variables = model_variables + model_variables = model_variables, + threads = threads ) runset <- CmdStanRun$new(args, procs) runset$run_cmdstan() @@ -1571,7 +1601,6 @@ CmdStanModel$set("public", name = "optimize", value = optimize) #' installed version of CmdStan. #' #' @template model-common-args -#' @inheritParams model-method-optimize #' @param save_latent_dynamics Ignored for this method. #' @param mode (multiple options) The mode to center the approximation at. One #' of the following: @@ -1639,11 +1668,14 @@ laplace <- function(data = NULL, if (!is.null(mode) && !is.null(opt_args)) { stop("Cannot specify both 'opt_args' and 'mode' arguments.", call. = FALSE) } + if (self$threads_enabled() && is.null(threads)) { + threads <- 1 + } procs <- CmdStanProcs$new( num_procs = 1, + threads_per_proc = assert_valid_threads(threads, self$cpp_options(), multiple_chains = TRUE), show_stderr_messages = show_exceptions, - show_stdout_messages = show_messages, - threads_per_proc = assert_valid_threads(threads, self$cpp_options()) + show_stdout_messages = show_messages ) model_variables <- NULL if (is_variables_method_supported(self)) { @@ -1706,7 +1738,8 @@ laplace <- function(data = NULL, output_basename = output_basename, sig_figs = sig_figs, opencl_ids = assert_valid_opencl(opencl_ids, self$cpp_options()), - model_variables = model_variables + model_variables = model_variables, + threads = threads ) runset <- CmdStanRun$new(args, procs) runset$run_cmdstan() @@ -1735,10 +1768,6 @@ CmdStanModel$set("public", name = "laplace", value = laplace) #' installed version of CmdStan. #' #' @template model-common-args -#' @param threads (positive integer) If the model was -#' [compiled][model-method-compile] with threading support, the number of -#' threads to use in parallelized sections (e.g., when using the Stan -#' functions `reduce_sum()` or `map_rect()`). #' @param algorithm (string) The algorithm. Either `"meanfield"` or #' `"fullrank"`. #' @param iter (positive integer) The _maximum_ number of iterations. @@ -1790,8 +1819,7 @@ variational <- function(data = NULL, procs <- CmdStanProcs$new( num_procs = 1, show_stderr_messages = show_exceptions, - show_stdout_messages = show_messages, - threads_per_proc = assert_valid_threads(threads, self$cpp_options()) + show_stdout_messages = show_messages ) model_variables <- NULL if (is_variables_method_supported(self)) { @@ -1827,7 +1855,8 @@ variational <- function(data = NULL, output_basename = output_basename, sig_figs = sig_figs, opencl_ids = assert_valid_opencl(opencl_ids, self$cpp_options()), - model_variables = model_variables + model_variables = model_variables, + threads = threads ) runset <- CmdStanRun$new(args, procs) runset$run_cmdstan() @@ -1861,10 +1890,6 @@ CmdStanModel$set("public", name = "variational", value = variational) #' installed version of CmdStan #' #' @template model-common-args -#' @param num_threads (positive integer) If the model was -#' [compiled][model-method-compile] with threading support, the number of -#' threads to use in parallelized sections (e.g., for multi-path pathfinder -#' as well as `reduce_sum`). #' @param init_alpha (positive real) The initial step size parameter. #' @param tol_obj (positive real) Convergence tolerance on changes in objective function value. #' @param tol_rel_obj (positive real) Convergence tolerance on relative changes in objective function value. @@ -1912,7 +1937,7 @@ pathfinder <- function(data = NULL, output_basename = NULL, sig_figs = NULL, opencl_ids = NULL, - num_threads = NULL, + threads = NULL, init_alpha = NULL, tol_obj = NULL, tol_rel_obj = NULL, @@ -1930,11 +1955,14 @@ pathfinder <- function(data = NULL, calculate_lp = NULL, show_messages = TRUE, show_exceptions = TRUE) { + if (self$threads_enabled() && is.null(threads)) { + threads <- 1 + } procs <- CmdStanProcs$new( num_procs = 1, + threads_per_proc = assert_valid_threads(threads, self$cpp_options(), multiple_chains = TRUE), show_stderr_messages = show_exceptions, - show_stdout_messages = show_messages, - threads_per_proc = assert_valid_threads(num_threads, self$cpp_options()) + show_stdout_messages = show_messages ) model_variables <- NULL if (is_variables_method_supported(self)) { @@ -1976,7 +2004,7 @@ pathfinder <- function(data = NULL, sig_figs = sig_figs, opencl_ids = assert_valid_opencl(opencl_ids, self$cpp_options()), model_variables = model_variables, - num_threads = num_threads + threads = threads ) runset <- CmdStanRun$new(args, procs) runset$run_cmdstan() @@ -2064,13 +2092,17 @@ generate_quantities <- function(fitted_params, output_basename = NULL, sig_figs = NULL, parallel_chains = getOption("mc.cores", 1), - threads_per_chain = NULL, + threads = NULL, opencl_ids = NULL) { fitted_params_files <- process_fitted_params(fitted_params) + if (self$threads_enabled() && is.null(threads)) { + threads <- 1 + } procs <- CmdStanGQProcs$new( num_procs = length(fitted_params_files), - parallel_procs = checkmate::assert_integerish(parallel_chains, lower = 1, null.ok = TRUE), - threads_per_proc = assert_valid_threads(threads_per_chain, self$cpp_options(), multiple_chains = TRUE) + parallel_procs = checkmate::assert_integerish(parallel_chains, lower = 1, + null.ok = TRUE), + threads_per_proc = assert_valid_threads(threads, self$cpp_options(), multiple_chains = TRUE), ) model_variables <- NULL if (is_variables_method_supported(self)) { @@ -2092,7 +2124,8 @@ generate_quantities <- function(fitted_params, output_basename = output_basename, sig_figs = sig_figs, opencl_ids = assert_valid_opencl(opencl_ids, self$cpp_options()), - model_variables = model_variables + model_variables = model_variables, + threads = threads ) runset <- CmdStanRun$new(args, procs) runset$run_cmdstan() diff --git a/R/run.R b/R/run.R index 9bce6c3e..2bd81cc9 100644 --- a/R/run.R +++ b/R/run.R @@ -1,5 +1,39 @@ # CmdStanRun -------------------------------------------------------------- +#' Parse a file from end to beginning looking for the elapsed time. +#' @param file A file path. +#' @return A list of numeric warmup, sampling, and total times. +#' @noRd +.sample_file_time = function(file) { + bufferSize <- 256 + size <- file.info(file)$size + + if (size < bufferSize) { + bufferSize <- size + } + + pos <- size - bufferSize + text <- character() + k <- 0L + + f <- file(file, "rb") + on.exit(close(f)) + while(pos != 0L) { + seek(f, where=pos) + chars <- readChar(f, nchars=bufferSize) + text <- c(chars, text) + if(grep("Elapsed Time:", chars)) { + # Cut the buffer to get everything after Elapsed Time: and pull out the times + times <- strsplit(chars, "Elapsed Time: ")[[1]][2] + raw_times = as.numeric(stringr::str_extract_all(times, "[0-9\\.]+")[[1]]) + return(list(warmup = raw_times[1], sampling = raw_times[2], total = raw_times[3])) + } + k <- k + length(gregexpr(pattern="\\n", text=chars)[[1L]]) + pos <- max(pos-bufferSize, 0L) + } + stop(paste0("File: ", file, " did not contain the elapsed time for the model!")) +} + #' Run CmdStan using a specified configuration #' #' The internal `CmdStanRun` R6 class handles preparing the call to CmdStan @@ -22,12 +56,28 @@ CmdStanRun <- R6::R6Class( checkmate::assert_r6(procs, classes = "CmdStanProcs") self$args <- args self$procs <- procs - private$output_files_ <- self$new_output_files() + output_files = self$new_output_files() + private$output_file_base_ = output_files[[1]] + # For these the output file names end in _1 + if (inherits(args$method_args, c("SampleArgs", "GenerateQuantitiesArgs")) && + args$num_inner_processes() > 1) { + private$output_files_ <- output_files[[2]] + } else { + private$output_files_ <- output_files[[1]] + } if (cmdstan_version() >= "2.26.0") { - private$profile_files_ <- self$new_profile_files() + private$profile_files_ <- self$new_profile_files()[[1]] } if (self$args$save_latent_dynamics) { - private$latent_dynamics_files_ <- self$new_latent_dynamics_files() + latent_dynamics_files = self$new_latent_dynamics_files() + private$latent_dynamics_file_base_ <- latent_dynamics_files[[1]] + if (inherits(args$method_args, + c("SampleArgs", "GenerateQuantitiesArgs")) && + args$num_inner_processes() > 1) { + private$latent_dynamics_files_ <- latent_dynamics_files[[2]] + } else { + private$latent_dynamics_files_ <- latent_dynamics_files[[1]] + } } if (os_is_wsl()) { # While the executable built under WSL will be stored in the Windows @@ -108,7 +158,7 @@ CmdStanRun <- R6::R6Class( current_paths = current_files, new_dir = dir, new_basename = basename %||% self$model_name(), - ids = self$procs$proc_ids(), + ids = seq_len(self$args$num_inner_processes()), ext = ".csv", timestamp = timestamp, random = random @@ -133,7 +183,7 @@ CmdStanRun <- R6::R6Class( current_paths = current_files, new_dir = dir, new_basename = paste0(basename %||% self$model_name(), "-diagnostic"), - ids = self$proc_ids(), + ids = seq_len(self$args$num_inner_processes()), ext = ".csv", timestamp = timestamp, random = random @@ -158,7 +208,7 @@ CmdStanRun <- R6::R6Class( current_paths = current_files, new_dir = dir, new_basename = paste0(basename %||% self$model_name(), "-profile"), - ids = self$proc_ids(), + ids = NULL, ext = ".csv", timestamp = timestamp, random = random @@ -197,18 +247,22 @@ CmdStanRun <- R6::R6Class( }, command = function() self$args$command(), - command_args = function() { - if (!length(private$command_args_)) { - # create a list of character vectors (one per run/chain) of cmdstan arguments - private$command_args_ <- lapply(self$procs$proc_ids(), function(j) { - self$args$compose_all_args( - idx = j, - output_file = private$output_files_[j], - profile_file = private$profile_files_[j], - latent_dynamics_file = private$latent_dynamics_files_[j] # maybe NULL - ) - }) + command_args = function(id = 1) { + # create a list of character vectors (one per run/chain) of cmdstan arguments + if (self$procs$num_procs() > 1) { + output_file = private$output_files_[id] + latent_dynamic_file = private$latent_dynamics_files_[id] + } else { + output_file = private$output_file_base_ + latent_dynamic_file = private$latent_dynamics_file_base_ } + # Cmdstan Bug, once idx is respected for multiple chains make this the actual id + private$command_args_ <- self$args$compose_all_args( + idx = id, + output_file = output_file, + profile_file = private$profile_files_, + latent_dynamics_file = latent_dynamic_file # maybe NULL + ) private$command_args_ }, @@ -269,25 +323,39 @@ CmdStanRun <- R6::R6Class( chain_id = self$procs$proc_ids()[self$procs$is_finished()], total = self$procs$proc_total_time()[self$procs$is_finished()] ) - time <- list(total = self$procs$total_time(), chains = chain_time) } else { - chain_ids <- names(self$procs$is_finished()) - chain_time <- data.frame( - chain_id = as.vector(self$procs$proc_ids()), - warmup = as.vector(self$procs$proc_section_time("warmup")), - sampling = as.vector(self$procs$proc_section_time("sampling")), - total = as.vector(self$procs$proc_total_time()[chain_ids]) - ) - time <- list(total = self$procs$total_time(), chains = chain_time) + if (self$procs$is_finished() == 1) { + chain_ids <- seq_len(self$args$num_inner_processes()) + output_files = self$output_files(include_failed = TRUE) + output_times = lapply(output_files, .sample_file_time) + chain_time <- data.frame( + chain_id = chain_ids, + warmup = unlist(lapply(output_times, function(x) x$warmup)), + sampling = unlist(lapply(output_times, function(x) x$sampling)), + total = unlist(lapply(output_times, function(x) x$total)) + ) + time <- list(total = self$procs$total_time(), chains = chain_time) + } else { + chain_ids <- names(self$procs$is_finished()) + chain_time <- data.frame( + chain_id = as.vector(self$procs$proc_ids()), + warmup = as.vector(self$procs$proc_section_time("warmup")), + sampling = as.vector(self$procs$proc_section_time("sampling")), + total = as.vector(self$procs$proc_total_time()[chain_ids]) + ) + time <- list(total = self$procs$total_time(), chains = chain_time) + } } time } ), private = list( + output_file_base_ = character(), output_files_ = character(), profile_files_ = NULL, output_files_saved_ = FALSE, + latent_dynamics_file_base_ = NULL, latent_dynamics_files_ = NULL, latent_dynamics_files_saved_ = FALSE, profile_files_saved_ = FALSE, @@ -340,72 +408,64 @@ check_target_exe <- function(exe) { } mpi_args[["exe"]] <- wsl_safe_path(self$exe_file()) } - if (procs$num_procs() == 1) { - start_msg <- "Running MCMC with 1 chain" - } else if (procs$num_procs() == procs$parallel_procs()) { - start_msg <- paste0("Running MCMC with ", procs$num_procs(), " parallel chains") + start_msg <- paste0("Running MCMC with ", self$args$method_args$chains) + if (self$args$method_args$chains > 1) { + start_msg <- paste0(start_msg, " chains") } else { - if (procs$parallel_procs() == 1) { - if (!is.null(mpi_cmd)) { - if (!is.null(mpi_args[["n"]])) { - mpi_n_process <- mpi_args[["n"]] - } else if (!is.null(mpi_args[["np"]])) { - mpi_n_process <- mpi_args[["np"]] - } - if (is.null(mpi_n_process)) { - start_msg <- paste0("Running MCMC with ", procs$num_procs(), " chains using MPI") - } else { - start_msg <- paste0("Running MCMC with ", procs$num_procs(), " chains using MPI with ", mpi_n_process, " processes") - } - } else { - start_msg <- paste0("Running MCMC with ", procs$num_procs(), " sequential chains") - } - } else { - start_msg <- paste0("Running MCMC with ", procs$num_procs(), " chains, at most ", procs$parallel_procs(), " in parallel") - } + start_msg <- paste0(start_msg, " chain") } - if (is.null(procs$threads_per_proc())) { - if (procs$show_stdout_messages()) { - cat(paste0(start_msg, "...\n\n")) - } + start_msg <- paste0(start_msg, " and ", self$args$num_threads) + if (self$args$num_threads > 1) { + start_msg <- paste0(start_msg, " threads") } else { - if (procs$show_stdout_messages()) { - cat(paste0(start_msg, ", with ", procs$threads_per_proc(), " thread(s) per chain...\n\n")) + start_msg <- paste0(start_msg, " thread") + } + if (!is.null(mpi_cmd)) { + if (!is.null(mpi_args[["n"]])) { + mpi_n_process <- mpi_args[["n"]] + } else if (!is.null(mpi_args[["np"]])) { + mpi_n_process <- mpi_args[["np"]] } - Sys.setenv("STAN_NUM_THREADS" = as.integer(procs$threads_per_proc())) - # Windows environment variables have to be explicitly exported to WSL - if (os_is_wsl()) { - Sys.setenv("WSLENV"="STAN_NUM_THREADS/u") + if (is.null(mpi_n_process)) { + start_msg <- paste0(start_msg, " using MPI") + } else { + start_msg <- paste0(start_msg, " using MPI with ", mpi_n_process, " processes") } } + if (procs$show_stdout_messages()) { + cat(paste0(start_msg, "...\n\n")) + } + # We can always set these as non threaded programs will ignore them + Sys.setenv("STAN_NUM_THREADS" = as.integer(self$args$num_threads)) + # Windows environment variables have to be explicitly exported to WSL + if (os_is_wsl()) { + Sys.setenv("WSLENV"="STAN_NUM_THREADS/u") + } start_time <- Sys.time() - chains <- procs$proc_ids() - chain_ind <- 1 + chain_id <- 1 while (!all(procs$is_finished() | procs$is_failed())) { while (procs$active_procs() != procs$parallel_procs() && procs$any_queued()) { - chain_id <- chains[chain_ind] procs$new_proc( id = chain_id, command = self$command(), - args = self$command_args()[[chain_id]], + args = self$command_args(chain_id), wd = dirname(self$exe_file()), mpi_cmd = mpi_cmd, mpi_args = mpi_args ) procs$mark_proc_start(chain_id) procs$set_active_procs(procs$active_procs() + 1) - chain_ind <- chain_ind + 1 + chain_id <- chain_id + 1 } start_active_procs <- procs$active_procs() - while (procs$active_procs() == start_active_procs && procs$active_procs() > 0) { procs$wait(0.1) procs$poll(0) - for (chain_id in chains) { - if (!procs$is_queued(chain_id)) { - procs$process_output(chain_id) - procs$process_error_output(chain_id) + for (chain_iter in seq_len(chain_id)) { + if (!procs$is_queued(chain_iter)) { + procs$process_output(chain_iter) + procs$process_error_output(chain_iter) } } procs$set_active_procs(procs$num_alive()) @@ -420,30 +480,20 @@ CmdStanRun$set("private", name = "run_sample_", value = .run_sample) .run_generate_quantities <- function() { procs <- self$procs on.exit(procs$cleanup(), add = TRUE) - if (procs$num_procs() == 1) { - start_msg <- "Running standalone generated quantities after 1 MCMC chain" - } else if (procs$num_procs() == procs$parallel_procs()) { - start_msg <- paste0("Running standalone generated quantities after ", procs$num_procs(), " MCMC chains, all chains in parallel ") + start_msg <- "Running standalone generated quantities with " + start_msg <- paste0(start_msg, self$args$num_threads) + if (self$args$num_threads > 1) { + start_msg <- paste0(start_msg, " threads") } else { - if (procs$parallel_procs() == 1) { - start_msg <- paste0("Running standalone generated quantities after ", procs$num_procs(), " MCMC chains, 1 chain at a time ") - } else { - start_msg <- paste0("Running standalone generated quantities after ", procs$num_procs(), " MCMC chains, ", procs$parallel_procs(), " chains at a time ") - } + start_msg <- paste0(start_msg, " thread") } - if (is.null(procs$threads_per_proc())) { - if (procs$show_stdout_messages()) { - cat(paste0(start_msg, "...\n\n")) - } - } else { - if (procs$show_stdout_messages()) { - cat(paste0(start_msg, ", with ", procs$threads_per_proc(), " thread(s) per chain...\n\n")) - } - Sys.setenv("STAN_NUM_THREADS" = as.integer(procs$threads_per_proc())) - # Windows environment variables have to be explicitly exported to WSL - if (os_is_wsl()) { - Sys.setenv("WSLENV"="STAN_NUM_THREADS/u") - } + if (procs$show_stdout_messages()) { + cat(paste0(start_msg, "...\n\n")) + } + Sys.setenv("STAN_NUM_THREADS" = as.integer(self$args$num_threads)) + # Windows environment variables have to be explicitly exported to WSL + if (os_is_wsl()) { + Sys.setenv("WSLENV"="STAN_NUM_THREADS/u") } start_time <- Sys.time() chains <- procs$proc_ids() @@ -454,7 +504,7 @@ CmdStanRun$set("private", name = "run_sample_", value = .run_sample) procs$new_proc( id = chain_id, command = self$command(), - args = self$command_args()[[chain_id]], + args = self$command_args(chain_id), wd = dirname(self$exe_file()) ) procs$mark_proc_start(chain_id) @@ -484,19 +534,18 @@ CmdStanRun$set("private", name = "run_generate_quantities_", value = .run_genera .run_other <- function() { procs <- self$procs - if (!is.null(procs$threads_per_proc())) { - Sys.setenv("STAN_NUM_THREADS" = as.integer(procs$threads_per_proc())) - # Windows environment variables have to be explicitly exported to WSL - if (os_is_wsl()) { - Sys.setenv("WSLENV"="STAN_NUM_THREADS/u") - } + # We can always set these as non threaded programs will ignore them + Sys.setenv("STAN_NUM_THREADS" = as.integer(self$args$num_threads)) + # Windows environment variables have to be explicitly exported to WSL + if (os_is_wsl()) { + Sys.setenv("WSLENV"="STAN_NUM_THREADS/u") } start_time <- Sys.time() id <- 1 procs$new_proc( id = id, command = self$command(), - args = self$command_args()[[id]], + args = self$command_args(), wd = dirname(self$exe_file()) ) procs$set_active_procs(1) @@ -532,7 +581,7 @@ CmdStanRun$set("private", name = "run_generate_quantities_", value = .run_genera } procs$mark_proc_stop(id) procs$set_total_time(as.double((Sys.time() - start_time), units = "secs")) - procs$report_time() + procs$report_time(id) } CmdStanRun$set("private", name = "run_optimize_", value = .run_other) CmdStanRun$set("private", name = "run_laplace_", value = .run_other) @@ -541,16 +590,14 @@ CmdStanRun$set("private", name = "run_pathfinder_", value = .run_other) .run_diagnose <- function() { procs <- self$procs - if (!is.null(procs$threads_per_proc())) { - Sys.setenv("STAN_NUM_THREADS" = as.integer(procs$threads_per_proc())) - # Windows environment variables have to be explicitly exported to WSL - if (os_is_wsl()) { - Sys.setenv("WSLENV"="STAN_NUM_THREADS/u") - } + # We can always set these as non threaded programs will ignore them + Sys.setenv("STAN_NUM_THREADS" = as.integer(self$args$num_threads)) + # Windows environment variables have to be explicitly exported to WSL + if (os_is_wsl()) { + Sys.setenv("WSLENV"="STAN_NUM_THREADS/u") } stdout_file <- tempfile() stderr_file <- tempfile() - withr::with_path( c( toolchain_PATH_env_var(), @@ -558,7 +605,7 @@ CmdStanRun$set("private", name = "run_pathfinder_", value = .run_other) ), ret <- wsl_compatible_run( command = self$command(), - args = self$command_args()[[1]], + args = self$command_args(), wd = dirname(self$exe_file()), stderr = stderr_file, stdout = stdout_file, @@ -596,8 +643,6 @@ CmdStanRun$set("private", name = "run_diagnose_", value = .run_diagnose) #' to 1. #' @param parallel_procs The maximum number of processes to run in parallel. #' Currently for non-sampling this must be set to 1. -#' @param threads_per_proc The number of threads to use per process to run -#' parallel sections of model. #' CmdStanProcs <- R6::R6Class( classname = "CmdStanProcs", @@ -616,9 +661,8 @@ CmdStanProcs <- R6::R6Class( } else { private$parallel_procs_ <- as.integer(parallel_procs) } - private$threads_per_proc_ <- as.integer(threads_per_proc) - private$threads_per_proc_ <- threads_per_proc private$active_procs_ <- 0 + private$threads_per_proc_ <- as.integer(threads_per_proc) private$proc_ids_ <- seq_len(num_procs) zeros <- rep(0, num_procs) names(zeros) <- private$proc_ids_ diff --git a/R/utils.R b/R/utils.R index 9764ec36..c670ef89 100644 --- a/R/utils.R +++ b/R/utils.R @@ -54,6 +54,10 @@ os_is_linux <- function() { isTRUE(Sys.info()[["sysname"]] == "Linux") } +os_use_single_process <- function() { + return(os_is_wsl() || os_is_linux()); +} + is_rtools43_toolchain <- function() { os_is_windows() && R.version$major == "4" && R.version$minor >= "3.0" } @@ -217,15 +221,14 @@ generate_file_names <- stamp <- base::format(Sys.time(), "%Y%m%d%H%M") new_names <- paste0(new_names, "-", stamp) } - if (!is.null(ids)) { - new_names <- paste0(new_names, "-", ids) - } if (random) { rand_num_pid <- as.integer(stats::runif(1, min = 0, max = 1E7)) + Sys.getpid() rand <- base::format(as.hexmode(rand_num_pid), width = 6) new_names <- paste0(new_names, "-", rand) } - + if (!is.null(ids)) { + new_names <- paste0(new_names, "_", ids) + } if (length(ext)) { ext <- if (startsWith(ext, ".")) ext else paste0(".", ext) new_names <- paste0(new_names, ext) diff --git a/cmdstanr.Rproj b/cmdstanr.Rproj index 270314b8..a55289d7 100644 --- a/cmdstanr.Rproj +++ b/cmdstanr.Rproj @@ -17,5 +17,5 @@ StripTrailingWhitespace: Yes BuildType: Package PackageUseDevtools: Yes -PackageInstallArgs: --no-multiarch --with-keep.source +PackageInstallArgs: --no-multiarch --with-keep.source --debug PackageRoxygenize: rd,collate,namespace diff --git a/man-roxygen/model-common-args.R b/man-roxygen/model-common-args.R index ca609ebe..038d86c6 100644 --- a/man-roxygen/model-common-args.R +++ b/man-roxygen/model-common-args.R @@ -99,3 +99,8 @@ #' [`$output()`][fit-method-output] method of the resulting fit object can be #' used to display the silenced messages. #' +#' @param threads (positive integer) If the model was +#' [compiled][model-method-compile] with threading support, the number of +#' threads to use in parallelized sections (e.g., when for multiple chains +#' running in parallel and for using the Stan functions +#' `reduce_sum()` or `map_rect()`). diff --git a/man-roxygen/model-sample-args.R b/man-roxygen/model-sample-args.R index 240300a8..b743e0ef 100644 --- a/man-roxygen/model-sample-args.R +++ b/man-roxygen/model-sample-args.R @@ -1,22 +1,12 @@ #' @param chains (positive integer) The number of Markov chains to run. The #' default is 4. -#' @param parallel_chains (positive integer) The _maximum_ number of MCMC chains -#' to run in parallel. If `parallel_chains` is not specified then the default -#' is to look for the option `"mc.cores"`, which can be set for an entire \R -#' session by `options(mc.cores=value)`. If the `"mc.cores"` option has not -#' been set then the default is `1`. +#' @param parallel_chains (positive integer) Deprecated. Please use the `chains` +#' argument instead. #' @param chain_ids (integer vector) A vector of chain IDs. Must contain as many #' unique positive integers as the number of chains. If not set, the default #' chain IDs are used (integers starting from `1`). -#' @param threads_per_chain (positive integer) If the model was -#' [compiled][model-method-compile] with threading support, the number of -#' threads to use in parallelized sections _within_ an MCMC chain (e.g., when -#' using the Stan functions `reduce_sum()` or `map_rect()`). This is in -#' contrast with `parallel_chains`, which specifies the number of chains to -#' run in parallel. The actual number of CPU cores used is -#' `parallel_chains*threads_per_chain`. For an example of using threading see -#' the Stan case study -#' [Reduce Sum: A Minimal Example](https://mc-stan.org/users/documentation/case-studies/reduce_sum_tutorial.html). +#' @param threads_per_chain (positive integer) Deprecated. Please use the +#' threads argument instead. #' #' @param iter_sampling (positive integer) The number of post-warmup iterations #' to run per chain. Note: in the CmdStan User's Guide this is referred to as diff --git a/man/CmdStanModel.Rd b/man/CmdStanModel.Rd index 55200357..493360ec 100644 --- a/man/CmdStanModel.Rd +++ b/man/CmdStanModel.Rd @@ -82,7 +82,7 @@ fit_mcmc <- mod$sample( data = stan_data, seed = 123, chains = 2, - parallel_chains = 2 + threads = 2 ) # Use 'posterior' package for summaries diff --git a/man/cmdstan_model.Rd b/man/cmdstan_model.Rd index b0432abc..001df430 100644 --- a/man/cmdstan_model.Rd +++ b/man/cmdstan_model.Rd @@ -68,7 +68,7 @@ fit_mcmc <- mod$sample( data = stan_data, seed = 123, chains = 2, - parallel_chains = 2 + threads = 2 ) # Use 'posterior' package for summaries diff --git a/man/cmdstanr-package.Rd b/man/cmdstanr-package.Rd index c98b0b3a..1b041e88 100644 --- a/man/cmdstanr-package.Rd +++ b/man/cmdstanr-package.Rd @@ -95,7 +95,7 @@ fit_mcmc <- mod$sample( data = stan_data, seed = 123, chains = 2, - parallel_chains = 2 + threads = 2 ) # Use 'posterior' package for summaries diff --git a/man/model-method-check_syntax.Rd b/man/model-method-check_syntax.Rd index a646a5e1..68366fb5 100644 --- a/man/model-method-check_syntax.Rd +++ b/man/model-method-check_syntax.Rd @@ -86,8 +86,8 @@ Other CmdStanModel methods: \code{\link{model-method-laplace}}, \code{\link{model-method-optimize}}, \code{\link{model-method-pathfinder}}, -\code{\link{model-method-sample_mpi}}, \code{\link{model-method-sample}}, +\code{\link{model-method-sample_mpi}}, \code{\link{model-method-variables}}, \code{\link{model-method-variational}} } diff --git a/man/model-method-compile.Rd b/man/model-method-compile.Rd index d295eedc..dd01fe5a 100644 --- a/man/model-method-compile.Rd +++ b/man/model-method-compile.Rd @@ -11,7 +11,7 @@ compile( pedantic = FALSE, include_paths = NULL, user_header = NULL, - cpp_options = list(), + cpp_options = list(stan_threads = os_use_single_process()), stanc_options = list(), force_recompile = getOption("cmdstanr_force_recompile", default = FALSE), compile_model_methods = FALSE, @@ -157,8 +157,8 @@ Other CmdStanModel methods: \code{\link{model-method-laplace}}, \code{\link{model-method-optimize}}, \code{\link{model-method-pathfinder}}, -\code{\link{model-method-sample_mpi}}, \code{\link{model-method-sample}}, +\code{\link{model-method-sample_mpi}}, \code{\link{model-method-variables}}, \code{\link{model-method-variational}} } diff --git a/man/model-method-diagnose.Rd b/man/model-method-diagnose.Rd index 9a0acd31..99043501 100644 --- a/man/model-method-diagnose.Rd +++ b/man/model-method-diagnose.Rd @@ -129,8 +129,8 @@ Other CmdStanModel methods: \code{\link{model-method-laplace}}, \code{\link{model-method-optimize}}, \code{\link{model-method-pathfinder}}, -\code{\link{model-method-sample_mpi}}, \code{\link{model-method-sample}}, +\code{\link{model-method-sample_mpi}}, \code{\link{model-method-variables}}, \code{\link{model-method-variational}} } diff --git a/man/model-method-expose_functions.Rd b/man/model-method-expose_functions.Rd index a62f7bb8..b7d42231 100644 --- a/man/model-method-expose_functions.Rd +++ b/man/model-method-expose_functions.Rd @@ -77,8 +77,8 @@ Other CmdStanModel methods: \code{\link{model-method-laplace}}, \code{\link{model-method-optimize}}, \code{\link{model-method-pathfinder}}, -\code{\link{model-method-sample_mpi}}, \code{\link{model-method-sample}}, +\code{\link{model-method-sample_mpi}}, \code{\link{model-method-variables}}, \code{\link{model-method-variational}} } diff --git a/man/model-method-format.Rd b/man/model-method-format.Rd index 2aa34f18..d24010a4 100644 --- a/man/model-method-format.Rd +++ b/man/model-method-format.Rd @@ -106,8 +106,8 @@ Other CmdStanModel methods: \code{\link{model-method-laplace}}, \code{\link{model-method-optimize}}, \code{\link{model-method-pathfinder}}, -\code{\link{model-method-sample_mpi}}, \code{\link{model-method-sample}}, +\code{\link{model-method-sample_mpi}}, \code{\link{model-method-variables}}, \code{\link{model-method-variational}} } diff --git a/man/model-method-generate-quantities.Rd b/man/model-method-generate-quantities.Rd index bf25602e..cb12433a 100644 --- a/man/model-method-generate-quantities.Rd +++ b/man/model-method-generate-quantities.Rd @@ -13,7 +13,7 @@ generate_quantities( output_basename = NULL, sig_figs = NULL, parallel_chains = getOption("mc.cores", 1), - threads_per_chain = NULL, + threads = NULL, opencl_ids = NULL ) } @@ -85,21 +85,14 @@ values with 6 significant figures. The upper limit for \code{sig_figs} is 18. Increasing this value will result in larger output CSV files and thus an increased usage of disk space.} -\item{parallel_chains}{(positive integer) The \emph{maximum} number of MCMC chains -to run in parallel. If \code{parallel_chains} is not specified then the default -is to look for the option \code{"mc.cores"}, which can be set for an entire \R -session by \code{options(mc.cores=value)}. If the \code{"mc.cores"} option has not -been set then the default is \code{1}.} +\item{parallel_chains}{(positive integer) Deprecated. Please use the \code{chains} +argument instead.} -\item{threads_per_chain}{(positive integer) If the model was +\item{threads}{(positive integer) If the model was \link[=model-method-compile]{compiled} with threading support, the number of -threads to use in parallelized sections \emph{within} an MCMC chain (e.g., when -using the Stan functions \code{reduce_sum()} or \code{map_rect()}). This is in -contrast with \code{parallel_chains}, which specifies the number of chains to -run in parallel. The actual number of CPU cores used is -\code{parallel_chains*threads_per_chain}. For an example of using threading see -the Stan case study -\href{https://mc-stan.org/users/documentation/case-studies/reduce_sum_tutorial.html}{Reduce Sum: A Minimal Example}.} +threads to use in parallelized sections (e.g., when for multiple chains +running in parallel and for using the Stan functions +\code{reduce_sum()} or \code{map_rect()}).} \item{opencl_ids}{(integer vector of length 2) The platform and device IDs of the OpenCL device to use for fitting. The model must @@ -178,8 +171,8 @@ Other CmdStanModel methods: \code{\link{model-method-laplace}}, \code{\link{model-method-optimize}}, \code{\link{model-method-pathfinder}}, -\code{\link{model-method-sample_mpi}}, \code{\link{model-method-sample}}, +\code{\link{model-method-sample_mpi}}, \code{\link{model-method-variables}}, \code{\link{model-method-variational}} } diff --git a/man/model-method-laplace.Rd b/man/model-method-laplace.Rd index b033fbe3..841b61aa 100644 --- a/man/model-method-laplace.Rd +++ b/man/model-method-laplace.Rd @@ -108,8 +108,9 @@ increased usage of disk space.} \item{threads}{(positive integer) If the model was \link[=model-method-compile]{compiled} with threading support, the number of -threads to use in parallelized sections (e.g., when -using the Stan functions \code{reduce_sum()} or \code{map_rect()}).} +threads to use in parallelized sections (e.g., when for multiple chains +running in parallel and for using the Stan functions +\code{reduce_sum()} or \code{map_rect()}).} \item{opencl_ids}{(integer vector of length 2) The platform and device IDs of the OpenCL device to use for fitting. The model must @@ -214,8 +215,8 @@ Other CmdStanModel methods: \code{\link{model-method-generate-quantities}}, \code{\link{model-method-optimize}}, \code{\link{model-method-pathfinder}}, -\code{\link{model-method-sample_mpi}}, \code{\link{model-method-sample}}, +\code{\link{model-method-sample_mpi}}, \code{\link{model-method-variables}}, \code{\link{model-method-variational}} } diff --git a/man/model-method-optimize.Rd b/man/model-method-optimize.Rd index dcf77444..afb7c60f 100644 --- a/man/model-method-optimize.Rd +++ b/man/model-method-optimize.Rd @@ -122,8 +122,9 @@ increased usage of disk space.} \item{threads}{(positive integer) If the model was \link[=model-method-compile]{compiled} with threading support, the number of -threads to use in parallelized sections (e.g., when -using the Stan functions \code{reduce_sum()} or \code{map_rect()}).} +threads to use in parallelized sections (e.g., when for multiple chains +running in parallel and for using the Stan functions +\code{reduce_sum()} or \code{map_rect()}).} \item{opencl_ids}{(integer vector of length 2) The platform and device IDs of the OpenCL device to use for fitting. The model must @@ -219,7 +220,7 @@ fit_mcmc <- mod$sample( data = stan_data, seed = 123, chains = 2, - parallel_chains = 2 + threads = 2 ) # Use 'posterior' package for summaries @@ -332,8 +333,8 @@ Other CmdStanModel methods: \code{\link{model-method-generate-quantities}}, \code{\link{model-method-laplace}}, \code{\link{model-method-pathfinder}}, -\code{\link{model-method-sample_mpi}}, \code{\link{model-method-sample}}, +\code{\link{model-method-sample_mpi}}, \code{\link{model-method-variables}}, \code{\link{model-method-variational}} } diff --git a/man/model-method-pathfinder.Rd b/man/model-method-pathfinder.Rd index 85fc9236..532f7ad7 100644 --- a/man/model-method-pathfinder.Rd +++ b/man/model-method-pathfinder.Rd @@ -15,7 +15,7 @@ pathfinder( output_basename = NULL, sig_figs = NULL, opencl_ids = NULL, - num_threads = NULL, + threads = NULL, init_alpha = NULL, tol_obj = NULL, tol_rel_obj = NULL, @@ -130,10 +130,11 @@ device IDs of the OpenCL device to use for fitting. The model must be compiled with \code{cpp_options = list(stan_opencl = TRUE)} for this argument to have an effect.} -\item{num_threads}{(positive integer) If the model was +\item{threads}{(positive integer) If the model was \link[=model-method-compile]{compiled} with threading support, the number of -threads to use in parallelized sections (e.g., for multi-path pathfinder -as well as \code{reduce_sum}).} +threads to use in parallelized sections (e.g., when for multiple chains +running in parallel and for using the Stan functions +\code{reduce_sum()} or \code{map_rect()}).} \item{init_alpha}{(positive real) The initial step size parameter.} @@ -244,7 +245,7 @@ fit_mcmc <- mod$sample( data = stan_data, seed = 123, chains = 2, - parallel_chains = 2 + threads = 2 ) # Use 'posterior' package for summaries @@ -357,8 +358,8 @@ Other CmdStanModel methods: \code{\link{model-method-generate-quantities}}, \code{\link{model-method-laplace}}, \code{\link{model-method-optimize}}, -\code{\link{model-method-sample_mpi}}, \code{\link{model-method-sample}}, +\code{\link{model-method-sample_mpi}}, \code{\link{model-method-variables}}, \code{\link{model-method-variational}} } diff --git a/man/model-method-sample.Rd b/man/model-method-sample.Rd index 526c9c88..5abcf45b 100644 --- a/man/model-method-sample.Rd +++ b/man/model-method-sample.Rd @@ -15,9 +15,8 @@ sample( output_basename = NULL, sig_figs = NULL, chains = 4, - parallel_chains = getOption("mc.cores", 1), chain_ids = seq_len(chains), - threads_per_chain = NULL, + threads = getOption("mc.cores", 1), opencl_ids = NULL, iter_warmup = NULL, iter_sampling = NULL, @@ -42,6 +41,8 @@ sample( num_chains = NULL, num_warmup = NULL, num_samples = NULL, + threads_per_chain = NULL, + parallel_chains = NULL, validate_csv = NULL, save_extra_diagnostics = NULL, max_depth = NULL, @@ -141,25 +142,15 @@ increased usage of disk space.} \item{chains}{(positive integer) The number of Markov chains to run. The default is 4.} -\item{parallel_chains}{(positive integer) The \emph{maximum} number of MCMC chains -to run in parallel. If \code{parallel_chains} is not specified then the default -is to look for the option \code{"mc.cores"}, which can be set for an entire \R -session by \code{options(mc.cores=value)}. If the \code{"mc.cores"} option has not -been set then the default is \code{1}.} - \item{chain_ids}{(integer vector) A vector of chain IDs. Must contain as many unique positive integers as the number of chains. If not set, the default chain IDs are used (integers starting from \code{1}).} -\item{threads_per_chain}{(positive integer) If the model was +\item{threads}{(positive integer) If the model was \link[=model-method-compile]{compiled} with threading support, the number of -threads to use in parallelized sections \emph{within} an MCMC chain (e.g., when -using the Stan functions \code{reduce_sum()} or \code{map_rect()}). This is in -contrast with \code{parallel_chains}, which specifies the number of chains to -run in parallel. The actual number of CPU cores used is -\code{parallel_chains*threads_per_chain}. For an example of using threading see -the Stan case study -\href{https://mc-stan.org/users/documentation/case-studies/reduce_sum_tutorial.html}{Reduce Sum: A Minimal Example}.} +threads to use in parallelized sections (e.g., when for multiple chains +running in parallel and for using the Stan functions +\code{reduce_sum()} or \code{map_rect()}).} \item{opencl_ids}{(integer vector of length 2) The platform and device IDs of the OpenCL device to use for fitting. The model must @@ -269,6 +260,12 @@ available via the \code{diagnostics} argument but can be checked after fitting using the \code{\link[=fit-method-summary]{$summary()}} method.} \item{cores, num_cores, num_chains, num_warmup, num_samples, save_extra_diagnostics, max_depth, stepsize, validate_csv}{Deprecated and will be removed in a future release.} + +\item{threads_per_chain}{(positive integer) Deprecated. Please use the +threads argument instead.} + +\item{parallel_chains}{(positive integer) Deprecated. Please use the \code{chains} +argument instead.} } \value{ A \code{\link{CmdStanMCMC}} object. @@ -313,7 +310,7 @@ fit_mcmc <- mod$sample( data = stan_data, seed = 123, chains = 2, - parallel_chains = 2 + threads = 2 ) # Use 'posterior' package for summaries diff --git a/man/model-method-sample_mpi.Rd b/man/model-method-sample_mpi.Rd index ca7203da..4b8033cc 100644 --- a/man/model-method-sample_mpi.Rd +++ b/man/model-method-sample_mpi.Rd @@ -21,6 +21,7 @@ sample_mpi( iter_sampling = NULL, save_warmup = FALSE, thin = NULL, + threads = NULL, max_treedepth = NULL, adapt_engaged = TRUE, adapt_delta = NULL, @@ -152,6 +153,12 @@ is \code{FALSE}.} \item{thin}{(positive integer) The period between saved samples. This should typically be left at its default (no thinning) unless memory is a problem.} +\item{threads}{(positive integer) If the model was +\link[=model-method-compile]{compiled} with threading support, the number of +threads to use in parallelized sections (e.g., when for multiple chains +running in parallel and for using the Stan functions +\code{reduce_sum()} or \code{map_rect()}).} + \item{max_treedepth}{(positive integer) The maximum allowed tree depth for the NUTS engine. See the \emph{Tree Depth} section of the CmdStan User's Guide for more details.} diff --git a/man/model-method-variables.Rd b/man/model-method-variables.Rd index dc80ed9a..87e9d73e 100644 --- a/man/model-method-variables.Rd +++ b/man/model-method-variables.Rd @@ -46,8 +46,8 @@ Other CmdStanModel methods: \code{\link{model-method-laplace}}, \code{\link{model-method-optimize}}, \code{\link{model-method-pathfinder}}, -\code{\link{model-method-sample_mpi}}, \code{\link{model-method-sample}}, +\code{\link{model-method-sample_mpi}}, \code{\link{model-method-variational}} } \concept{CmdStanModel methods} diff --git a/man/model-method-variational.Rd b/man/model-method-variational.Rd index 3678f11e..33bd7d7f 100644 --- a/man/model-method-variational.Rd +++ b/man/model-method-variational.Rd @@ -123,8 +123,9 @@ increased usage of disk space.} \item{threads}{(positive integer) If the model was \link[=model-method-compile]{compiled} with threading support, the number of -threads to use in parallelized sections (e.g., when using the Stan -functions \code{reduce_sum()} or \code{map_rect()}).} +threads to use in parallelized sections (e.g., when for multiple chains +running in parallel and for using the Stan functions +\code{reduce_sum()} or \code{map_rect()}).} \item{opencl_ids}{(integer vector of length 2) The platform and device IDs of the OpenCL device to use for fitting. The model must @@ -219,7 +220,7 @@ fit_mcmc <- mod$sample( data = stan_data, seed = 123, chains = 2, - parallel_chains = 2 + threads = 2 ) # Use 'posterior' package for summaries @@ -333,8 +334,8 @@ Other CmdStanModel methods: \code{\link{model-method-laplace}}, \code{\link{model-method-optimize}}, \code{\link{model-method-pathfinder}}, -\code{\link{model-method-sample_mpi}}, \code{\link{model-method-sample}}, +\code{\link{model-method-sample_mpi}}, \code{\link{model-method-variables}} } \concept{CmdStanModel methods} diff --git a/tests/testthat/helper-custom-expectations.R b/tests/testthat/helper-custom-expectations.R index fd8d5565..cddb726f 100644 --- a/tests/testthat/helper-custom-expectations.R +++ b/tests/testthat/helper-custom-expectations.R @@ -44,16 +44,21 @@ expect_no_recompilation <- function(mod, ...) { invisible(mod) } -expect_sample_output <- function(object, num_chains = NULL) { +expect_sample_output <- function(object, num_chains = NULL, threads = 1) { output <- "Running MCMC with" if (!is.null(num_chains)) { if (num_chains == 1) { output <- paste(output, num_chains, "chain") } else { - output <- paste(output, num_chains, "sequential chain") + output <- paste(output, num_chains, "chains") } } + if (threads == 1) { + output <- paste(output, "and", threads, "thread") + } else { + output <- paste(output, "and", threads, "threads") + } expect_output(object, output) } @@ -78,14 +83,21 @@ expect_vb_output <- function(object) { ) } -expect_gq_output <- function(object, num_chains = NULL) { +expect_gq_output <- function(object, num_chains = NULL, num_threads = NULL) { - output <- "Running standalone generated quantities after " + output <- "Running standalone generated quantities with " if (!is.null(num_chains)) { if (num_chains == 1) { output <- paste(output, num_chains, "chain") } else { - output <- paste(output, num_chains, "sequential chain") + output <- paste(output, num_chains, "4 chains") + } + } + if (!is.null(num_threads)) { + if (num_threads == 1) { + output <- paste(output, num_threads, "thread") + } else { + output <- paste(output, num_threads, "threads") } } expect_output(object, output) diff --git a/tests/testthat/resources/stan/parameter_types.stan b/tests/testthat/resources/stan/parameter_types.stan new file mode 100644 index 00000000..c54fede6 --- /dev/null +++ b/tests/testthat/resources/stan/parameter_types.stan @@ -0,0 +1,15 @@ +parameters { + real real_p; + vector[2] vector_p; + matrix[2, 2] matrix_p; + array[2] matrix[2, 2] array_matrix_p; + corr_matrix[2] corr_p; + array[2, 2] real array_array_real_p; + array[2, 2] vector[3] array_array_vector_p; + array[2, 2] matrix[3, 3] array_array_matrix_p; +// complex complex_p; +// complex_matrix[2, 2] complex_matrix_p; +// complex_vector[4] complex_vector_p; +// tuple(real, vector[3], array[2] matrix[2, 2], complex) tuple_int_vector_arraymatrix_complex_p; +// array[2] tuple(real, tuple(vector[2], array[2] tuple(real, complex, matrix[2, 2]))) arraytuple_big_p; +} diff --git a/tests/testthat/test-csv.R b/tests/testthat/test-csv.R index 6dc331fe..e36a354a 100644 --- a/tests/testthat/test-csv.R +++ b/tests/testthat/test-csv.R @@ -477,7 +477,8 @@ test_that("returning time works for read_cmdstan_csv", { csv_data <- read_cmdstan_csv(csv_files) expect_equal(csv_data$time$total, NA_integer_) expect_equal(csv_data$time$chains, data.frame( - chain_id = 2, + # Cmdstan Bug #1257, replace with 2 when fixed + chain_id = 1, warmup = 0.017041, sampling = 0.022068, total = 0.039109 @@ -511,13 +512,13 @@ test_that("returning time works for read_cmdstan_csv", { csv_data <- read_cmdstan_csv(csv_files) expect_null(csv_data$time$chains) }) - +fit_bernoulli_thin_1 <- testing_fit("bernoulli", method = "sample", + seed = 123, chains = 2, iter_sampling = 1000, iter_warmup = 1000, thin = 1) test_that("time from read_cmdstan_csv matches time from fit$time()", { fit <- fit_bernoulli_thin_1 - expect_equivalent( - read_cmdstan_csv(fit$output_files())$time$chains, - fit$time()$chains - ) + csv_times = read_cmdstan_csv(fit$output_files())$time$chains + fit_times = fit$time()$chains + expect_equivalent(csv_times, fit_times) }) test_that("as_cmdstan_fit creates fitted model objects from csv", { diff --git a/tests/testthat/test-example.R b/tests/testthat/test-example.R index 8d14d5d0..818484fb 100644 --- a/tests/testthat/test-example.R +++ b/tests/testthat/test-example.R @@ -6,7 +6,7 @@ test_that("cmdstanr_example works", { expect_equal(fit_mcmc$num_chains(), 2) expect_output(cmdstanr_example("logistic", chains = 2, quiet = FALSE), - "Running MCMC with 2 sequential chains") + "Running MCMC with 2 chains and 1 thread...") fit_mle <- cmdstanr_example("logistic", method = "optimize") checkmate::expect_r6(fit_mle, "CmdStanMLE") diff --git a/tests/testthat/test-failed-chains.R b/tests/testthat/test-failed-chains.R index 7ec86c3e..1c20c1fe 100644 --- a/tests/testthat/test-failed-chains.R +++ b/tests/testthat/test-failed-chains.R @@ -14,54 +14,19 @@ make_all_fail <- function(x) { all_fail } -make_some_fail <- function(x, seed = 0) { - num_files <- 0 - attempt <- 1 - set.seed(seed) - while (num_files == 0 || num_files == 4) { - utils::capture.output( - check_some_fail <- x$sample( - data = list(pr_fail = 0.5), - save_latent_dynamics = TRUE, - seed = base::sample(.Machine$integer.max, 4) - ) - ) - num_files <- length(check_some_fail$output_files(include_failed = FALSE)) - attempt <- attempt + 1 - } - check_some_fail -} - # called here and also in tests below suppressWarnings( utils::capture.output( - fit_all_fail <- make_all_fail(mod), - fit_some_fail <- make_some_fail(mod) + fit_all_fail <- make_all_fail(mod) ) ) test_that("correct warnings are thrown when all chains fail", { expect_warning( make_all_fail(mod), - "All chains finished unexpectedly!" + "Chain 1 finished unexpectedly!" ) - for (i in 1:4) { - expect_output(fit_all_fail$output(i), "Location parameter is inf") - } -}) - -test_that("correct warnings are thrown when some chains fail", { - fit_tmp <- suppressWarnings(make_some_fail(mod, seed = 2022)) - expect_warning( - make_some_fail(mod, seed = 2022), - paste(4 - length(fit_tmp$output_files(include_failed = FALSE)), "chain(s) finished unexpectedly"), - fixed = TRUE - ) - - failed <- !fit_some_fail$runset$procs$is_finished() - for (i in which(failed)) { - expect_output(fit_some_fail$output(i), "Location parameter is inf") - } + expect_match(paste0(fit_all_fail$output()), "Location parameter is inf") }) test_that("$output_files() and latent_dynamic_files() returns path to all files regardless of chain failure", { @@ -73,10 +38,6 @@ test_that("$output_files() and latent_dynamic_files() returns path to all files length(fit_all_fail$output_files(include_failed = FALSE)), 0 ) - expect_equal( - length(fit_some_fail$output_files(include_failed = TRUE)), - 4 - ) expect_equal( length(fit_all_fail$latent_dynamics_files(include_failed = TRUE)), 4 @@ -85,10 +46,6 @@ test_that("$output_files() and latent_dynamic_files() returns path to all files length(fit_all_fail$latent_dynamics_files(include_failed = FALSE)), 0 ) - expect_equal( - length(fit_some_fail$latent_dynamics_files(include_failed = TRUE)), - 4 - ) expect_equal( length(fit_all_fail$output_files()), 0 @@ -104,19 +61,10 @@ test_that("$save_* methods save all files regardless of chain failure", { fit_all_fail$save_output_files(dir = tempdir()), "Moved 4 files" ) - expect_message( - fit_some_fail$save_output_files(dir = tempdir()), - "Moved 4 files" - ) - expect_message( fit_all_fail$save_latent_dynamics_files(dir = tempdir()), "Moved 4 files" ) - expect_message( - fit_some_fail$save_latent_dynamics_files(dir = tempdir()), - "Moved 4 files" - ) }) test_that("errors when using draws after all chains fail", { @@ -131,14 +79,6 @@ test_that("errors when using draws after all chains fail", { expect_error(fit_all_fail$inv_metric(), "No chains finished successfully") }) -test_that("can use draws after some chains fail", { - expect_s3_class(fit_some_fail$summary(), "draws_summary") - expect_s3_class(fit_some_fail$draws(), "draws_array") - expect_output(fit_some_fail$cmdstan_summary(), "Inference for Stan model") - expect_output(fit_some_fail$cmdstan_diagnose(), "Processing complete") - expect_output(fit_some_fail$print(), "variable") -}) - test_that("init warnings are shown", { suppressWarnings( expect_message( @@ -211,3 +151,5 @@ test_that("gq chains error on wrong input CSV", { fixed = TRUE ) }) + + diff --git a/tests/testthat/test-fit-mcmc.R b/tests/testthat/test-fit-mcmc.R index 122dc503..7f986932 100644 --- a/tests/testthat/test-fit-mcmc.R +++ b/tests/testthat/test-fit-mcmc.R @@ -171,7 +171,7 @@ test_that("time() method works after mcmc", { run_times$chains, any.missing = FALSE, types = c("integer", "numeric"), - nrows = fit_mcmc$runset$num_procs(), + nrows = fit_mcmc$chains, ncols = 4 ) @@ -180,7 +180,7 @@ test_that("time() method works after mcmc", { checkmate::expect_data_frame(run_times_0$chains, any.missing = TRUE, types = c("integer", "numeric"), - nrows = fit_mcmc_0$runset$num_procs(), + nrows = fit_mcmc_0$num_chains(), ncols = 4) for (j in 1:nrow(run_times_0$chains)) { checkmate::expect_number(run_times_0$chains$warmup[j]) @@ -279,7 +279,7 @@ test_that("loo method works with moment-matching", { # Moment-matching needs model-methods, so make sure hpp is available mod <- cmdstan_model(testing_stan_file("loo_moment_match"), force_recompile = TRUE) data_list <- testing_data("loo_moment_match") - fit <- mod$sample(data = data_list, chains = 1, seed = 1000) + utils::capture.output(fit <- mod$sample(data = data_list, chains = 1, seed = 1000)) # Regular loo should warn that some pareto-k are "too high" expect_warning( @@ -392,7 +392,7 @@ test_that("diagnostic_summary() works", { expect_equal(fit$diagnostic_summary(NULL), list()) }) -test_that("metadata()$time has chains rowss", { +test_that("metadata()$time has chains rows", { expect_equal(nrow(fit_mcmc$metadata()$time), fit_mcmc$num_chains()) expect_equal(nrow(fit_mcmc_0$metadata()$time), fit_mcmc_0$num_chains()) expect_equal(nrow(fit_mcmc_1$metadata()$time), fit_mcmc_1$num_chains()) diff --git a/tests/testthat/test-fit-mle.R b/tests/testthat/test-fit-mle.R index 96ef2b95..be1c9750 100644 --- a/tests/testthat/test-fit-mle.R +++ b/tests/testthat/test-fit-mle.R @@ -52,7 +52,7 @@ test_that("time is reported after optimization", { }) test_that("no error when checking estimates after failure", { - fit <- cmdstanr_example("schools", method = "optimize", seed = 123) # optim ålways fails for this + suppressWarnings(fit <- cmdstanr_example("schools", method = "optimize", seed = 123)) # optim ålways fails for this expect_error(fit$summary(), "Fitting failed. Unable to retrieve the draws.") }) diff --git a/tests/testthat/test-fit-shared.R b/tests/testthat/test-fit-shared.R index 691db31c..b33c5c91 100644 --- a/tests/testthat/test-fit-shared.R +++ b/tests/testthat/test-fit-shared.R @@ -37,8 +37,8 @@ test_that("saving csv output files works", { expect_true(all(file.size(paths) > 0)) should_match <- paste0("testing-output-", - base::format(Sys.time(), "%Y%m%d%H%M"), - "-", + base::format(Sys.time(), "%Y%m%d%H%M"), "-", + gsub("_[0-9].csv", "", gsub(".*-", replacement = "", paths)), "_", seq_len(fit$num_procs())) for (j in seq_along(paths)) { expect_match(paths[j], should_match[j]) @@ -72,8 +72,8 @@ test_that("saving diagnostic csv output works", { expect_true(all(file.size(paths) > 0)) should_match <- paste0("testing-output-diagnostic-", - base::format(Sys.time(), "%Y%m%d%H%M"), - "-", + base::format(Sys.time(), "%Y%m%d%H%M"), "-", + gsub("_[0-9].csv", "", gsub(".*-", replacement = "", paths)), "_", seq_len(fit$num_procs())) for (j in seq_along(paths)) { @@ -212,11 +212,11 @@ test_that("return_codes method works properly", { expect_equal(fits[["variational"]]$return_codes(), 0) expect_equal(fits[["optimize"]]$return_codes(), 0) expect_equal(fits[["laplace"]]$return_codes(), 0) - expect_equal(fits[["sample"]]$return_codes(), c(0,0,0,0)) + expect_equal(fits[["sample"]]$return_codes(), 0) expect_equal(fits[["generate_quantities"]]$return_codes(), c(0,0,0,0)) # non-zero - non_zero <- testing_fit("schools", method = "optimize", seed = 123) + utils::capture.output(suppressWarnings(non_zero <- testing_fit("schools", method = "optimize", seed = 123))) expect_gt(non_zero$return_codes(), 0) }) @@ -455,7 +455,7 @@ test_that("sampling with inits works with include_paths", { file.remove(exe) } - mod_w_include <- cmdstan_model(stan_file = stan_program_w_include, quiet = FALSE, + mod_w_include <- cmdstan_model(stan_file = stan_program_w_include, quiet = FALSE, include_paths = test_path("resources", "stan")) data_list <- list(N = 10, y = c(0,1,0,0,0,0,0,0,0,1)) @@ -464,10 +464,12 @@ test_that("sampling with inits works with include_paths", { data = data_list, seed = 123, chains = 4, - parallel_chains = 4, + threads = 4, refresh = 500, init = list(list(theta = 0.25), list(theta = 0.25), list(theta = 0.25), list(theta = 0.25)) ) + # This is to check if the above throws so if no error give success + expect_true(TRUE) }) test_that("CmdStanModel created with exe_file works", { @@ -553,3 +555,4 @@ test_that("code() warns if model not created with Stan file", { fixed = TRUE ) }) + diff --git a/tests/testthat/test-init-fit.R b/tests/testthat/test-init-fit.R new file mode 100644 index 00000000..3453714e --- /dev/null +++ b/tests/testthat/test-init-fit.R @@ -0,0 +1,72 @@ +library(cmdstanr) +set_cmdstan_path() + +mod_params <- testing_model("parameter_types") +mod_schools <- testing_model("schools") +mod_logistic <- testing_model("logistic") +data_list_schools <- testing_data("schools") +data_list_logistic <- testing_data("logistic") +test_inits <- function(mod, fit_init, data_list = NULL) { + utils::capture.output(fit_sample <- mod$sample(data = data_list, chains = 1, + init = fit_init, iter_sampling = 100, iter_warmup = 100, refresh = 0, + seed = 1234, threads = 1)) + utils::capture.output(fit_sample <- mod$sample(data = data_list, chains = 5, + init = fit_init, iter_sampling = 100, iter_warmup = 100, refresh = 0, + seed = 1234, threads = 1)) + utils::capture.output(fit_vb <- mod$variational(data = data_list, refresh = 0, + seed = 1234, threads = 1, init = fit_init, algorithm = "fullrank")) + utils::capture.output(fit_path <- mod$pathfinder(data = data_list, seed=1234, + refresh = 0, num_paths = 4, threads = 4, init = fit_init)) + utils::capture.output(fit_laplace <- mod$laplace(data = data_list, + seed = 1234, refresh=0, init=fit_init)) + utils::capture.output(fit_ml <- mod$optimize(data = data_list, seed = 1234, + refresh = 0, init = fit_init, history_size = 400, jacobian = TRUE, + algorithm = "lbfgs", tol_param = 1e-12, tol_rel_grad = 1e-12, + tol_grad = 1e-12, tol_rel_obj = 1e-12, tol_obj = 1e-12, init_alpha = 1e-4, + iter = 400)) + draws = posterior::as_draws_rvars(fit_sample$draws()) + utils::capture.output(fit_sample <- mod$sample(data = data_list, chains = 1, + init = draws, iter_sampling = 100, iter_warmup = 100, refresh = 0, + seed = 1234, threads = 1)) + return(0) +} + +test_that("Sample method works as init", { + utils::capture.output(fit_sample_init <- mod_params$sample(chains = 1, + iter_warmup = 100, iter_sampling = 100, refresh = 0, seed = 1234, + threads = 1)) + utils::capture.output(fit_sample_multi_init <- mod_params$sample(chains = 4, + iter_warmup = 100, iter_sampling = 100, refresh = 0, seed = 1234, + threads = 1)) + expect_no_error(test_inits(mod_params, fit_sample_init)) + expect_no_error(test_inits(mod_params, fit_sample_multi_init)) +}) + +test_that("Pathfinder method works as init", { + utils::capture.output(fit_path_init <- mod_params$pathfinder(seed=1234, + refresh = 0, num_paths = 4, threads = 4)) + expect_no_error(test_inits(mod_params, fit_path_init)) + utils::capture.output(fit_path_init <- mod_params$pathfinder(seed=1234, + refresh = 0, num_paths = 1, threads = 4)) + expect_no_error(test_inits(mod_params, fit_path_init)) +}) + +test_that("Laplace method works as init", { + utils::capture.output(fit_laplace_init <- mod_logistic$laplace( + data = data_list_logistic, seed = 1234, refresh=0)) + expect_no_error(test_inits(mod_logistic, fit_laplace_init, + data_list_logistic)) +}) + +test_that("Variational method works as init", { + utils::capture.output(fit_vb_init <- mod_logistic$variational( + data = data_list_logistic, seed=1234, refresh = 0)) + expect_no_error(test_inits(mod_logistic, fit_vb_init, data_list_logistic)) +}) + +test_that("Optimization method works as init", { + utils::capture.output(fit_ml_init <- mod_logistic$optimize( + data = data_list_logistic, seed=1234, refresh = 0)) + expect_no_error(test_inits(mod_logistic, fit_ml_init, data_list_logistic)) +}) + diff --git a/tests/testthat/test-model-generate_quantities.R b/tests/testthat/test-model-generate_quantities.R index 7df7f2b7..9f4a8b5d 100644 --- a/tests/testthat/test-model-generate_quantities.R +++ b/tests/testthat/test-model-generate_quantities.R @@ -88,7 +88,7 @@ test_that("generate_quantities works with VB and draws_matrix", { test_that("generate_quantities() warns if threads specified but not enabled", { expect_warning( - expect_gq_output(fit_gq <- mod_gq$generate_quantities(data = data_list, fitted_params = fit, threads_per_chain = 4)), + expect_gq_output(fit_gq <- mod_gq$generate_quantities(data = data_list, fitted_params = fit, threads = 4)), "'threads_per_chain' will have no effect" ) }) diff --git a/tests/testthat/test-model-init.R b/tests/testthat/test-model-init.R index 8a802448..20a175b6 100644 --- a/tests/testthat/test-model-init.R +++ b/tests/testthat/test-model-init.R @@ -310,3 +310,4 @@ test_that("Initial values for single-element containers treated correctly", { ) ) }) + diff --git a/tests/testthat/test-model-pathfinder.R b/tests/testthat/test-model-pathfinder.R index 0860ba27..996482a3 100644 --- a/tests/testthat/test-model-pathfinder.R +++ b/tests/testthat/test-model-pathfinder.R @@ -105,6 +105,7 @@ test_that("Pathfinder Runs", { expect_is(fit, "CmdStanPathfinder") }) + test_that("pathfinder() method works with data files", { expect_pathfinder_output(fit_r <- mod$pathfinder(data = data_file_r)) expect_is(fit_r, "CmdStanPathfinder") diff --git a/tests/testthat/test-model-sample-metric.R b/tests/testthat/test-model-sample-metric.R index 422442fa..314843c7 100644 --- a/tests/testthat/test-model-sample-metric.R +++ b/tests/testthat/test-model-sample-metric.R @@ -50,7 +50,7 @@ test_that("sample() method works with provided inv_metrics", { expect_sample_output(fit_r <- mod$sample(data = data_list, chains = 3, - parallel_chains = 2, + threads = 2, metric = "dense_e", metric_file = inv_metric_matrix_r, seed = 123)) @@ -146,7 +146,7 @@ test_that("sample() method works with lists of inv_metrics", { expect_error(fit_r <- mod$sample(data = data_list, chains = 3, - parallel_chains = 2, + threads = 2, metric = "diag_e", inv_metric = list(inv_metric_vector, inv_metric_vector)), "2 metric\\(s\\) provided. Must provide 1 or 3 metric\\(s\\) for 3 chain\\(s\\)") @@ -168,7 +168,7 @@ test_that("sample() method works with lists of inv_metrics", { expect_error(fit_r <- mod$sample(data = data_list, chains = 3, - parallel_chains = 2, + threads = 2, metric = "diag_e", metric_file = c(inv_metric_vector_json, inv_metric_vector_json)), "2 metric\\(s\\) provided. Must provide 1 or 3 metric\\(s\\) for 3 chain\\(s\\)") diff --git a/tests/testthat/test-model-sample.R b/tests/testthat/test-model-sample.R index dbb691d0..bdb41ba6 100644 --- a/tests/testthat/test-model-sample.R +++ b/tests/testthat/test-model-sample.R @@ -10,13 +10,12 @@ mod_fp <- testing_model("bernoulli_fp") data_list <- testing_data("bernoulli") data_file_r <- test_path("resources", "data", "bernoulli.data.R") data_file_json <- test_path("resources", "data", "bernoulli.data.json") - # these are all valid for sample() ok_arg_values <- list( data = data_list, output_dir = tempdir(), chains = 2, - parallel_chains = 1, + threads = 1, iter_warmup = 50, iter_sampling = 100, save_warmup = FALSE, @@ -40,7 +39,7 @@ bad_arg_values <- list( data = "NOT_A_FILE", output_dir = "NOT_A_DIRECTORY", chains = -1, - parallel_chains = -1, + threads = -1, iter_warmup = -1, iter_sampling = -1, save_warmup = "NO", @@ -63,7 +62,7 @@ bad_arg_values_2 <- list( data = matrix(1:10), output_dir = 1, chains = "NOT_A_NUMBER", - parallel_chains = "NOT_A_NUMBER", + threads = "NOT_A_NUMBER", init = "NOT_A_FILE", seed = 1:10, step_size = 1:10, @@ -162,13 +161,13 @@ test_that("sample works for warmup-only run", { test_that("sampling in parallel works", { expect_output( - mod$sample(data = data_list, chains = 2, parallel_chains = 2), + mod$sample(data = data_list, chains = 2, threads = 2), "Running MCMC with 2 parallel chains", fixed = TRUE ) expect_output( - mod$sample(data = data_list, chains = 2, parallel_chains = 2), + mod$sample(data = data_list, chains = 2, threads = 2), "Both chains finished successfully", fixed = TRUE ) diff --git a/tests/testthat/test-threads.R b/tests/testthat/test-threads.R index 1a333e82..c55dd2a1 100644 --- a/tests/testthat/test-threads.R +++ b/tests/testthat/test-threads.R @@ -11,56 +11,44 @@ test_that("using threads_per_chain without stan_threads set in compile() warns", mod <- cmdstan_model(stan_program) expect_warning( expect_output( - mod$sample(data = data_file_json, threads_per_chain = 4), - "Running MCMC with 4 sequential chains", + mod$sample(data = data_file_json, threads = 4), + "Running MCMC with 4 chains and 4 threads", fixed = TRUE ), - "'threads_per_chain' is set but the model was not compiled with 'cpp_options = list(stan_threads = TRUE)' so 'threads_per_chain' will have no effect!", + "'threads' is set but the model was not compiled with 'cpp_options = list(stan_threads = TRUE)' so 'threads_per_chain' will have no effect!", fixed = TRUE) }) test_that("threading works with sample()", { mod <- cmdstan_model(stan_program, cpp_options = list(stan_threads = TRUE), force_recompile = TRUE) - expect_error( - mod$sample(data = data_file_json), - "The model was compiled with 'cpp_options = list(stan_threads = TRUE)' but 'threads_per_chain' was not set!", - fixed = TRUE - ) - expect_output( - f <- mod$sample(data = data_file_json, parallel_chains = 4, threads_per_chain = 1), - "Running MCMC with 4 parallel chains, with 1 thread(s) per chain..", + f <- mod$sample(data = data_file_json, chains = 4, threads = 4), + "Running MCMC with 4 chains and 4 threads..", fixed = TRUE ) - expect_equal(as.integer(Sys.getenv("STAN_NUM_THREADS")), 1) - expect_equal(f$metadata()$threads_per_chain, 1) + expect_equal(as.integer(Sys.getenv("STAN_NUM_THREADS")), 4) + expect_equal(f$metadata()$threads, 4) expect_output( - f <- mod$sample(data = data_file_json, parallel_chains = 4, threads_per_chain = 2), - "Running MCMC with 4 parallel chains, with 2 thread(s) per chain..", + f <- mod$sample(data = data_file_json, chains = 4, threads = 2), + "Running MCMC with 4 chains and 2 threads..", fixed = TRUE ) expect_equal(as.integer(Sys.getenv("STAN_NUM_THREADS")), 2) - expect_equal(f$metadata()$threads_per_chain, 2) + expect_equal(f$metadata()$threads, 2) expect_output( - f <- mod$sample(data = data_file_json, parallel_chains = 4, threads_per_chain = 4), - "Running MCMC with 4 parallel chains, with 4 thread(s) per chain..", + f <- mod$sample(data = data_file_json, chains = 4, threads = 16), + "Running MCMC with 4 chains and 16 threads..", fixed = TRUE ) - expect_equal(as.integer(Sys.getenv("STAN_NUM_THREADS")), 4) - expect_equal(f$metadata()$threads_per_chain, 4) + expect_equal(as.integer(Sys.getenv("STAN_NUM_THREADS")), 16) + expect_equal(f$metadata()$threads, 16) }) test_that("threading works with optimize()", { mod <- cmdstan_model(stan_program, cpp_options = list(stan_threads = TRUE), force_recompile = TRUE) - expect_error( - mod$optimize(data = data_file_json), - "The model was compiled with 'cpp_options = list(stan_threads = TRUE)' but 'threads' was not set!", - fixed = TRUE - ) - expect_output( f <- mod$optimize(data = data_file_json, threads = 1, seed = 123), "Optimization terminated normally", @@ -89,12 +77,6 @@ test_that("threading works with optimize()", { test_that("threading works with variational()", { mod <- cmdstan_model(stan_program, cpp_options = list(stan_threads = TRUE), force_recompile = TRUE) - expect_error( - mod$variational(data = data_file_json), - "The model was compiled with 'cpp_options = list(stan_threads = TRUE)' but 'threads' was not set!", - fixed = TRUE - ) - expect_output( f <- mod$variational(data = data_file_json, threads = 1, seed = 123), "EXPERIMENTAL ALGORITHM", @@ -124,15 +106,18 @@ test_that("threading works with generate_quantities()", { mod <- cmdstan_model(stan_program, cpp_options = list(stan_threads = TRUE), force_recompile = TRUE) mod_gq <- cmdstan_model(stan_gq_program, cpp_options = list(stan_threads = TRUE), force_recompile = TRUE) expect_output( - f <- mod$sample(data = data_file_json, parallel_chains = 4, threads_per_chain = 1), - "Running MCMC with 4 parallel chains, with 1 thread(s) per chain..", - fixed = TRUE - ) - expect_error( - mod_gq$generate_quantities(fitted_params = f, data = data_file_json), - "The model was compiled with 'cpp_options = list(stan_threads = TRUE)' but 'threads_per_chain' was not set!", + f <- mod$sample(data = data_file_json, chains = 4, threads = 1), + "Running MCMC with 4 chains and 1 thread..", fixed = TRUE ) + if (FALSE) { + expect_error( + mod_gq$generate_quantities(fitted_params = f, data = data_file_json), + "The model was compiled with 'cpp_options = list(stan_threads = TRUE)' but 'threads_per_chain' was not set!", + fixed = TRUE + ) + + } expect_output( f_gq <- mod_gq$generate_quantities(fitted_params = f, data = data_file_gq_json, threads_per_chain = 1, seed = 123), "Running standalone generated quantities after 4 MCMC chains", @@ -142,16 +127,16 @@ test_that("threading works with generate_quantities()", { expect_equal(f_gq$metadata()$threads_per_chain, 1) expect_output( - f_gq <- mod_gq$generate_quantities(fitted_params = f, data = data_file_gq_json, threads_per_chain = 2, seed = 123), - "Running standalone generated quantities after 4 MCMC chains", + f_gq <- mod_gq$generate_quantities(fitted_params = f, data = data_file_gq_json, threads = 2, seed = 123), + "Running standalone generated quantities with 2 threads", fixed = TRUE ) expect_equal(as.integer(Sys.getenv("STAN_NUM_THREADS")), 2) expect_equal(f_gq$metadata()$threads_per_chain, 2) expect_output( - f_gq <- mod_gq$generate_quantities(fitted_params = f, data = data_file_gq_json, threads_per_chain = 4, seed = 123), - "Running standalone generated quantities after 4 MCMC chains", + f_gq <- mod_gq$generate_quantities(fitted_params = f, data = data_file_gq_json, threads = 4, seed = 123), + "Running standalone generated quantities with 4 threads", fixed = TRUE ) expect_equal(as.integer(Sys.getenv("STAN_NUM_THREADS")), 4) @@ -162,19 +147,19 @@ test_that("correct output when stan_threads not TRUE", { mod <- cmdstan_model(stan_program, cpp_options = list(stan_threads = FALSE), force_recompile = TRUE) expect_output( mod$sample(data = data_file_json), - "Running MCMC with 4 sequential chains", + "Running MCMC with 4 chains and 1 thread", fixed = TRUE ) mod <- cmdstan_model(stan_program, cpp_options = list(stan_threads = "dummy string"), force_recompile = TRUE) expect_output( mod$sample(data = data_file_json), - "Running MCMC with 4 sequential chains", + "Running MCMC with 4 chains and 1 thread", fixed = TRUE ) mod <- cmdstan_model(stan_program, cpp_options = list(stan_threads = FALSE), force_recompile = TRUE) expect_warning( - mod$sample(data = data_file_json, threads_per_chain = 4), - "'threads_per_chain' is set but the model was not compiled with 'cpp_options = list(stan_threads = TRUE)' so 'threads_per_chain' will have no effect!", + mod$sample(data = data_file_json, thread = 4), + "'threads' is set but the model was not compiled with 'cpp_options = list(stan_threads = TRUE)' so 'threads_per_chain' will have no effect!", fixed = TRUE ) }) diff --git a/vignettes/cmdstanr.Rmd b/vignettes/cmdstanr.Rmd index b9b2e341..bbec41cc 100644 --- a/vignettes/cmdstanr.Rmd +++ b/vignettes/cmdstanr.Rmd @@ -159,7 +159,7 @@ fit <- mod$sample( data = data_list, seed = 123, chains = 4, - parallel_chains = 4, + threads = 4, refresh = 500 # print update every 500 iters ) ```