From 91bf70178444972071b09b6914ec7e925603010a Mon Sep 17 00:00:00 2001 From: Jouni Helske Date: Mon, 7 Oct 2024 15:43:41 +0300 Subject: [PATCH] faster simulation of nhmms, document bootstrap, add progress bar --- DESCRIPTION | 1 + R/RcppExports.R | 16 ++++ R/bootstrap.R | 26 +++++- R/simulate_mnhmm.R | 120 ++++++++++----------------- R/simulate_nhmm.R | 90 +++++++++----------- man/bootstrap.Rd | 41 ++++++++++ src/RcppExports.cpp | 74 +++++++++++++++++ src/simulate_nhmm.cpp | 185 ++++++++++++++++++++++++++++++++++++++++++ 8 files changed, 419 insertions(+), 134 deletions(-) create mode 100644 man/bootstrap.Rd create mode 100644 src/simulate_nhmm.cpp diff --git a/DESCRIPTION b/DESCRIPTION index 4f1c2013..8e488cef 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -51,6 +51,7 @@ Imports: numDeriv, patchwork, Rcpp (>= 0.12.0), + RcppHungarian, rlang, stats, TraMineR (>= 2.2-7), diff --git a/R/RcppExports.R b/R/RcppExports.R index 337c84b3..64589904 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -197,6 +197,22 @@ objectivex <- function(transition, emission, init, obs, ANZ, BNZ, INZ, nSymbols, .Call(`_seqHMM_objectivex`, transition, emission, init, obs, ANZ, BNZ, INZ, nSymbols, coef, X, numberOfStates, threads) } +simulate_nhmm_singlechannel <- function(eta_pi, X_i, eta_A, X_s, eta_B, X_o) { + .Call(`_seqHMM_simulate_nhmm_singlechannel`, eta_pi, X_i, eta_A, X_s, eta_B, X_o) +} + +simulate_nhmm_multichannel <- function(eta_pi, X_i, eta_A, X_s, eta_B, X_o, M) { + .Call(`_seqHMM_simulate_nhmm_multichannel`, eta_pi, X_i, eta_A, X_s, eta_B, X_o, M) +} + +simulate_mnhmm_singlechannel <- function(eta_pi, X_i, eta_A, X_s, eta_B, X_o, eta_omega, X_d) { + .Call(`_seqHMM_simulate_mnhmm_singlechannel`, eta_pi, X_i, eta_A, X_s, eta_B, X_o, eta_omega, X_d) +} + +simulate_mnhmm_multichannel <- function(eta_pi, X_i, eta_A, X_s, eta_B, X_o, eta_omega, X_d, M) { + .Call(`_seqHMM_simulate_mnhmm_multichannel`, eta_pi, X_i, eta_A, X_s, eta_B, X_o, eta_omega, X_d, M) +} + softmax <- function(x) { .Call(`_seqHMM_softmax`, x) } diff --git a/R/bootstrap.R b/R/bootstrap.R index 9df4feca..5c26c466 100644 --- a/R/bootstrap.R +++ b/R/bootstrap.R @@ -43,6 +43,18 @@ permute_states <- function(gammas_boot, gammas_mle) { } gammas_boot } +#' Bootstrap Sampling of NHMM Coefficients +#' +#' @param model An `nhmm` or `mnhmm` object. +#' @param B number of bootstrap samples. +#' @param method Either `"nonparametric"` or `"parametric"`, to define whether +#' nonparametric or parametric bootstrap should be used. The former samples +#' sequences with replacement, whereas the latter simulates new datasets based +#' on the model. +#' @param A penalty term for model estimation. By default, same penalty is used +#' as was in model estimation by `estimate_nhmm` or `estimate_mnhmm`. +#' @param verbose Should the progress bar be displayed? Default is `FALSE`. +#' @rdname bootstrap #' @export bootstrap_coefs.nhmm <- function(model, B = 1000, method = c("nonparametric", "parametric"), @@ -59,12 +71,13 @@ bootstrap_coefs.nhmm <- function(model, B = 1000, gammas_mle <- model$gammas coefs <- matrix(NA, length(unlist(gammas_mle)), B) + pb <- utils::txtProgressBar(min = 0, max = 100, style = 3) if (method == "nonparametric") { for (i in seq_len(B)) { mod <- bootstrap_model(model) fit <- fit_nhmm(mod, init, 0, 0, 1, penalty, ...) coefs[, i] <- unlist(permute_states(fit$gammas, gammas_mle)) - if(verbose) print(paste0("Bootstrap replication ", i, " complete.")) + if (verbose) setTxtProgressBar(pb, i) } } else { N <- model$n_sequences @@ -83,11 +96,14 @@ bootstrap_coefs.nhmm <- function(model, B = 1000, data = d, time, id, init)$model fit <- fit_nhmm(mod, init, 0, 0, 1, penalty, ...) coefs[, i] <- unlist(permute_states(fit$gammas, gammas_mle)) - print(paste0("Bootstrap replication ", i, " complete.")) + if (verbose) setTxtProgressBar(pb, i) } } + close(pb) return(coefs) } +#' @inheritParams bootstrap_coefs.nhmm +#' @rdname bootstrap #' @export bootstrap_coefs.mnhmm <- function(model, B = 1000, method = c("nonparametric", "parametric"), @@ -101,13 +117,14 @@ bootstrap_coefs.mnhmm <- function(model, B = 1000, if (missing(penalty)) { penalty <- model$estimation_results$penalty } + pb <- utils::txtProgressBar(min = 0, max = 100, style = 3) if (method == "nonparametric") { coefs <- matrix(NA, length(unlist(init)), B) for (i in seq_len(B)) { mod <- bootstrap_model(model) fit <- fit_mnhmm(mod, init, 0, 0, 1, penalty, FALSE) coefs[, i] <- unlist(fit$coefficients) - print(paste0("Bootstrap replication ", i, " complete.")) + if (verbose) setTxtProgressBar(pb, i) } } else { coefs <- matrix(NA, length(unlist(init)), B) @@ -129,8 +146,9 @@ bootstrap_coefs.mnhmm <- function(model, B = 1000, data = d, time, id, init)$model fit <- fit_mnhmm(mod, init, 0, 0, 1, penalty, FALSE) coefs[, i] <- unlist(fit$coefficients) - print(paste0("Bootstrap replication ", i, " complete.")) + if (verbose) setTxtProgressBar(pb, i) } } + close(pb) return(coefs) } diff --git a/R/simulate_mnhmm.R b/R/simulate_mnhmm.R index ca4c5d0f..298aad5e 100644 --- a/R/simulate_mnhmm.R +++ b/R/simulate_mnhmm.R @@ -77,99 +77,63 @@ simulate_mnhmm <- function( if (is.null(coefs$emission_probs)) coefs$emission_probs <- NULL if (is.null(coefs$cluster_probs)) coefs$cluster_probs <- NULL } - K_i <- nrow(model$X_initial) - K_s <- nrow(model$X_transition) - K_o <- nrow(model$X_emission) - K_d <- nrow(model$X_cluster) model$etas <- create_initial_values( - coefs, n_states, n_symbols, init_sd, K_i, K_s, K_o, K_d, n_clusters + coefs, model$n_states, model$n_symbols, init_sd, nrow(model$X_initial), + nrow(model$X_transition), nrow(model$X_emission), nrow(model$X_cluster), + n_clusters ) - model$gammas$pi <- c(eta_to_gamma_mat_field( - model$etas$pi - )) - model$gammas$A <- c(eta_to_gamma_cube_field( - model$etas$A - )) if (n_channels == 1L) { - model$gammas$B <- c(eta_to_gamma_cube_field( - model$etas$B - )) + out <- simulate_mnhmm_singlechannel( + model$etas$pi, model$X_initial, + model$etas$A, model$X_transition, + model$etas$B, model$X_emission, + model$etas$omega, model$X_cluster + ) } else { - l <- lengths(model$etas$B) - gamma_B <- c(eta_to_gamma_cube_field(unlist(model$etas$B, recursive = FALSE))) - model$gammas$B <- split(gamma_B, rep(seq_along(l), l)) - } - model$gammas$omega <- eta_to_gamma_mat( - model$etas$omega - ) - probs <- get_probs(model) - states <- array(NA_character_, c(max(sequence_lengths), n_sequences)) - obs <- array(NA_character_, c(max(sequence_lengths), n_channels, n_sequences)) - ids <- unique(data[[id]]) - times <- sort(unique(data[[time]])) - clusters <- character(n_sequences) - cluster_names <- model$cluster_names - state_names <- paste0( - rep(cluster_names, each = model$n_states), ": ", unlist(model$state_names) - ) - for (i in seq_len(n_sequences)) { - p_cluster <- probs$cluster[ - probs$cluster[[time]] == time[1] & probs$cluster[[id]] == ids[i], - "probability" - ] - clusters[i] <- sample(model$cluster_names, 1, prob = p_cluster) - p_init <- probs$initial[ - probs$initial[[time]] == time[1] & probs$initial[[id]] == ids[i] & - probs$initial$cluster == clusters[i], - "probability" - ] - states[1, i] <- sample(state_names, 1, prob = p_init) - for (k in seq_len(n_channels)) { - p_emission <- probs$emission[ - probs$emission[[time]] == time[1] & probs$emission[[id]] == ids[i] & - probs$emission$cluster == clusters[i] & - probs$emission$state == states[1, i] & probs$emission$channel == k, - "probability" - ] - obs[1, k, i] <- sample(symbol_names[[k]], 1, prob = p_emission) - } + out <- simulate_mnhmm_multichannel( + model$etas$pi, model$X_initial, + model$etas$A, model$X_transition, + unlist(model$etas$B, recursive = FALSE), model$X_emission, + model$etas$omega, model$X_cluster, + model$n_symbols + ) } - - for (i in seq_len(n_sequences)) { - for (t in 2:sequence_lengths[i]) { - p_transition <- probs$transition[ - probs$transition[[time]] == times[t] & probs$transition[[id]] == ids[i] & - probs$transition$cluster == clusters[i] & - probs$transition$state_from == states[t - 1, i], "probability" - ] - states[t, i] <- sample(state_names, 1, prob = p_transition) - for (k in seq_len(n_channels)) { - p_emission <- probs$emission[ - probs$emission[[time]] == time[t] & probs$emission[[id]] == ids[i] & - probs$emission$cluster == clusters[i] & - probs$emission$state == states[t, i] & probs$emission$channel == k, - "probability" - ] - obs[t, k, i] <- sample(symbol_names[[k]], 1, prob = p_emission) - } + T_ <- model$length_of_sequences + for (i in seq_len(model$n_sequences)) { + Ti <- sequence_lengths[i] + if (Ti < T_) { + out$states[(Ti + 1):T_, i] <- NA + out$observations[(Ti + 1):T_, i] <- NA } } + state_names <- paste0( + rep(model$cluster_names, each = model$n_states), + ": ", unlist(model$state_names) + ) + symbol_names <- model$symbol_names + out$states[] <- state_names[c(out$states) + 1] states <- suppressWarnings(suppressMessages( seqdef( matrix( - t(states), + t(out$states), n_sequences, max(sequence_lengths) ), alphabet = state_names ) )) - obs <- lapply(seq_len(n_channels), function(i) { - suppressWarnings(suppressMessages( - seqdef(t(obs[, i, ]), alphabet = symbol_names[[i]]) + if (n_channels == 1) { + out$observations[] <- symbol_names[c(out$observations) + 1] + model$observations <- suppressWarnings(suppressMessages( + seqdef(t(out$observations), alphabet = symbol_names) )) - }) - names(obs) <- model$channel_names - if (n_channels == 1) obs <- obs[[1]] - model$observations <- obs + } else { + model$observations <- lapply(seq_len(n_channels), function(i) { + out$observations[, , i] <- symbol_names[[i]][c(out$observations[, , i]) + 1] + suppressWarnings(suppressMessages( + seqdef(t(out$observations[, , i]), alphabet = symbol_names[[i]]) + )) + }) + names(model$observations) <- model$channel_names + } list(model = model, states = states) } diff --git a/R/simulate_nhmm.R b/R/simulate_nhmm.R index 2e2817da..cf726f79 100644 --- a/R/simulate_nhmm.R +++ b/R/simulate_nhmm.R @@ -69,71 +69,57 @@ simulate_nhmm <- function( if (is.null(coefs$transition_probs)) coefs$transition_probs <- NULL if (is.null(coefs$emission_probs)) coefs$emission_probs <- NULL } - K_i <- nrow(model$X_initial) - K_s <- nrow(model$X_transition) - K_o <- nrow(model$X_emission) - model$gammas$pi <- eta_to_gamma_mat(model$etas$pi) - model$gammas$A <- eta_to_gamma_cube(model$etas$A) + model$etas <- create_initial_values( + coefs, model$n_states, model$n_symbols, init_sd, nrow(model$X_initial), + nrow(model$X_transition), nrow(model$X_emission) + ) if (n_channels == 1L) { - model$gammas$B <- eta_to_gamma_cube(model$etas$B) + out <- simulate_nhmm_singlechannel( + model$etas$pi, model$X_initial, + model$etas$A, model$X_transition, + model$etas$B, model$X_emission + ) } else { - model$gammas$B <- eta_to_gamma_cube_field(model$etas$B) - } - probs <- get_probs(model) - states <- array(NA_character_, c(max(sequence_lengths), n_sequences)) - obs <- array(NA_character_, c(max(sequence_lengths), n_channels, n_sequences)) - ids <- unique(data[[id]]) - times <- sort(unique(data[[time]])) - state_names <- model$state_names - for (i in seq_len(n_sequences)) { - p_init <- probs$initial[ - probs$initial[[time]] == time[1] & probs$initial[[id]] == ids[i], - "probability" - ] - states[1, i] <- sample(state_names, 1, prob = p_init) - for (k in seq_len(n_channels)) { - p_emission <- probs$emission[ - probs$emission[[time]] == time[1] & probs$emission[[id]] == ids[i] & - probs$emission$state == states[1, i] & probs$emission$channel == k, - "probability" - ] - obs[1, k, i] <- sample(symbol_names[[k]], 1, prob = p_emission) - } + out <- simulate_nhmm_multichannel( + model$etas$pi, model$X_initial, + model$etas$A, model$X_transition, + model$etas$B, model$X_emission, + model$n_symbols + ) } - - for (i in seq_len(n_sequences)) { - for (t in 2:sequence_lengths[i]) { - p_transition <- probs$transition[ - probs$transition[[time]] == times[t] & probs$transition[[id]] == ids[i] & - probs$transition$state_from == states[t - 1, i], "probability" - ] - states[t, i] <- sample(state_names, 1, prob = p_transition) - for (k in seq_len(n_channels)) { - p_emission <- probs$emission[ - probs$emission[[time]] == time[t] & probs$emission[[id]] == ids[i] & - probs$emission$state == states[t, i] & probs$emission$channel == k, - "probability" - ] - obs[t, k, i] <- sample(symbol_names[[k]], 1, prob = p_emission) - } + T_ <- model$length_of_sequences + for (i in seq_len(model$n_sequences)) { + Ti <- sequence_lengths[i] + if (Ti < T_) { + out$states[(Ti + 1):T_, i] <- NA + out$observations[(Ti + 1):T_, i] <- NA } } + state_names <- model$state_names + symbol_names <- model$symbol_names + out$states[] <- state_names[c(out$states) + 1] states <- suppressWarnings(suppressMessages( seqdef( matrix( - t(states), + t(out$states), n_sequences, max(sequence_lengths) ), alphabet = state_names ) )) - obs <- lapply(seq_len(n_channels), function(i) { - suppressWarnings(suppressMessages( - seqdef(t(obs[, i, ]), alphabet = symbol_names[[i]]) + if (n_channels == 1) { + out$observations[] <- symbol_names[c(out$observations) + 1] + model$observations <- suppressWarnings(suppressMessages( + seqdef(t(out$observations), alphabet = symbol_names) )) - }) - names(obs) <- model$channel_names - if (n_channels == 1) obs <- obs[[1]] - model$observations <- obs + } else { + model$observations <- lapply(seq_len(n_channels), function(i) { + out$observations[, , i] <- symbol_names[[i]][c(out$observations[, , i]) + 1] + suppressWarnings(suppressMessages( + seqdef(t(out$observations[, , i]), alphabet = symbol_names[[i]]) + )) + }) + names(model$observations) <- model$channel_names + } list(model = model, states = states) } diff --git a/man/bootstrap.Rd b/man/bootstrap.Rd new file mode 100644 index 00000000..b2770cae --- /dev/null +++ b/man/bootstrap.Rd @@ -0,0 +1,41 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/bootstrap.R +\name{bootstrap_coefs.nhmm} +\alias{bootstrap_coefs.nhmm} +\alias{bootstrap_coefs.mnhmm} +\title{Bootstrap Sampling of NHMM Coefficients} +\usage{ +bootstrap_coefs.nhmm( + model, + B = 1000, + method = c("nonparametric", "parametric"), + penalty, + verbose = FALSE, + ... +) + +bootstrap_coefs.mnhmm( + model, + B = 1000, + method = c("nonparametric", "parametric"), + verbose = FALSE +) +} +\arguments{ +\item{model}{An \code{nhmm} or \code{mnhmm} object.} + +\item{B}{number of bootstrap samples.} + +\item{method}{Either \code{"nonparametric"} or \code{"parametric"}, to define whether +nonparametric or parametric bootstrap should be used. The former samples +sequences with replacement, whereas the latter simulates new datasets based +on the model.} + +\item{verbose}{Should the progress bar be displayed? Default is \code{FALSE}.} + +\item{A}{penalty term for model estimation. By default, same penalty is used +as was in model estimation by \code{estimate_nhmm} or \code{estimate_mnhmm}.} +} +\description{ +Bootstrap Sampling of NHMM Coefficients +} diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 28d5f2c1..92482580 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -820,6 +820,76 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } +// simulate_nhmm_singlechannel +Rcpp::List simulate_nhmm_singlechannel(const arma::mat& eta_pi, const arma::mat& X_i, const arma::cube& eta_A, const arma::cube& X_s, const arma::cube& eta_B, const arma::cube& X_o); +RcppExport SEXP _seqHMM_simulate_nhmm_singlechannel(SEXP eta_piSEXP, SEXP X_iSEXP, SEXP eta_ASEXP, SEXP X_sSEXP, SEXP eta_BSEXP, SEXP X_oSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const arma::mat& >::type eta_pi(eta_piSEXP); + Rcpp::traits::input_parameter< const arma::mat& >::type X_i(X_iSEXP); + Rcpp::traits::input_parameter< const arma::cube& >::type eta_A(eta_ASEXP); + Rcpp::traits::input_parameter< const arma::cube& >::type X_s(X_sSEXP); + Rcpp::traits::input_parameter< const arma::cube& >::type eta_B(eta_BSEXP); + Rcpp::traits::input_parameter< const arma::cube& >::type X_o(X_oSEXP); + rcpp_result_gen = Rcpp::wrap(simulate_nhmm_singlechannel(eta_pi, X_i, eta_A, X_s, eta_B, X_o)); + return rcpp_result_gen; +END_RCPP +} +// simulate_nhmm_multichannel +Rcpp::List simulate_nhmm_multichannel(const arma::mat& eta_pi, const arma::mat& X_i, const arma::cube& eta_A, const arma::cube& X_s, const arma::field& eta_B, const arma::cube& X_o, const arma::uvec& M); +RcppExport SEXP _seqHMM_simulate_nhmm_multichannel(SEXP eta_piSEXP, SEXP X_iSEXP, SEXP eta_ASEXP, SEXP X_sSEXP, SEXP eta_BSEXP, SEXP X_oSEXP, SEXP MSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const arma::mat& >::type eta_pi(eta_piSEXP); + Rcpp::traits::input_parameter< const arma::mat& >::type X_i(X_iSEXP); + Rcpp::traits::input_parameter< const arma::cube& >::type eta_A(eta_ASEXP); + Rcpp::traits::input_parameter< const arma::cube& >::type X_s(X_sSEXP); + Rcpp::traits::input_parameter< const arma::field& >::type eta_B(eta_BSEXP); + Rcpp::traits::input_parameter< const arma::cube& >::type X_o(X_oSEXP); + Rcpp::traits::input_parameter< const arma::uvec& >::type M(MSEXP); + rcpp_result_gen = Rcpp::wrap(simulate_nhmm_multichannel(eta_pi, X_i, eta_A, X_s, eta_B, X_o, M)); + return rcpp_result_gen; +END_RCPP +} +// simulate_mnhmm_singlechannel +Rcpp::List simulate_mnhmm_singlechannel(const arma::field& eta_pi, const arma::mat& X_i, const arma::field& eta_A, const arma::cube& X_s, const arma::field& eta_B, const arma::cube& X_o, const arma::mat& eta_omega, const arma::mat& X_d); +RcppExport SEXP _seqHMM_simulate_mnhmm_singlechannel(SEXP eta_piSEXP, SEXP X_iSEXP, SEXP eta_ASEXP, SEXP X_sSEXP, SEXP eta_BSEXP, SEXP X_oSEXP, SEXP eta_omegaSEXP, SEXP X_dSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const arma::field& >::type eta_pi(eta_piSEXP); + Rcpp::traits::input_parameter< const arma::mat& >::type X_i(X_iSEXP); + Rcpp::traits::input_parameter< const arma::field& >::type eta_A(eta_ASEXP); + Rcpp::traits::input_parameter< const arma::cube& >::type X_s(X_sSEXP); + Rcpp::traits::input_parameter< const arma::field& >::type eta_B(eta_BSEXP); + Rcpp::traits::input_parameter< const arma::cube& >::type X_o(X_oSEXP); + Rcpp::traits::input_parameter< const arma::mat& >::type eta_omega(eta_omegaSEXP); + Rcpp::traits::input_parameter< const arma::mat& >::type X_d(X_dSEXP); + rcpp_result_gen = Rcpp::wrap(simulate_mnhmm_singlechannel(eta_pi, X_i, eta_A, X_s, eta_B, X_o, eta_omega, X_d)); + return rcpp_result_gen; +END_RCPP +} +// simulate_mnhmm_multichannel +Rcpp::List simulate_mnhmm_multichannel(const arma::field& eta_pi, const arma::mat& X_i, const arma::field& eta_A, const arma::cube& X_s, const arma::field& eta_B, const arma::cube& X_o, const arma::mat& eta_omega, const arma::mat& X_d, const arma::uvec& M); +RcppExport SEXP _seqHMM_simulate_mnhmm_multichannel(SEXP eta_piSEXP, SEXP X_iSEXP, SEXP eta_ASEXP, SEXP X_sSEXP, SEXP eta_BSEXP, SEXP X_oSEXP, SEXP eta_omegaSEXP, SEXP X_dSEXP, SEXP MSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const arma::field& >::type eta_pi(eta_piSEXP); + Rcpp::traits::input_parameter< const arma::mat& >::type X_i(X_iSEXP); + Rcpp::traits::input_parameter< const arma::field& >::type eta_A(eta_ASEXP); + Rcpp::traits::input_parameter< const arma::cube& >::type X_s(X_sSEXP); + Rcpp::traits::input_parameter< const arma::field& >::type eta_B(eta_BSEXP); + Rcpp::traits::input_parameter< const arma::cube& >::type X_o(X_oSEXP); + Rcpp::traits::input_parameter< const arma::mat& >::type eta_omega(eta_omegaSEXP); + Rcpp::traits::input_parameter< const arma::mat& >::type X_d(X_dSEXP); + Rcpp::traits::input_parameter< const arma::uvec& >::type M(MSEXP); + rcpp_result_gen = Rcpp::wrap(simulate_mnhmm_multichannel(eta_pi, X_i, eta_A, X_s, eta_B, X_o, eta_omega, X_d, M)); + return rcpp_result_gen; +END_RCPP +} // softmax arma::vec softmax(const arma::vec& x); RcppExport SEXP _seqHMM_softmax(SEXP xSEXP) { @@ -999,6 +1069,10 @@ static const R_CallMethodDef CallEntries[] = { {"_seqHMM_log_objectivex", (DL_FUNC) &_seqHMM_log_objectivex, 12}, {"_seqHMM_objective", (DL_FUNC) &_seqHMM_objective, 9}, {"_seqHMM_objectivex", (DL_FUNC) &_seqHMM_objectivex, 12}, + {"_seqHMM_simulate_nhmm_singlechannel", (DL_FUNC) &_seqHMM_simulate_nhmm_singlechannel, 6}, + {"_seqHMM_simulate_nhmm_multichannel", (DL_FUNC) &_seqHMM_simulate_nhmm_multichannel, 7}, + {"_seqHMM_simulate_mnhmm_singlechannel", (DL_FUNC) &_seqHMM_simulate_mnhmm_singlechannel, 8}, + {"_seqHMM_simulate_mnhmm_multichannel", (DL_FUNC) &_seqHMM_simulate_mnhmm_multichannel, 9}, {"_seqHMM_softmax", (DL_FUNC) &_seqHMM_softmax, 1}, {"_seqHMM_varcoef", (DL_FUNC) &_seqHMM_varcoef, 2}, {"_seqHMM_viterbi", (DL_FUNC) &_seqHMM_viterbi, 4}, diff --git a/src/simulate_nhmm.cpp b/src/simulate_nhmm.cpp new file mode 100644 index 00000000..0caf76a4 --- /dev/null +++ b/src/simulate_nhmm.cpp @@ -0,0 +1,185 @@ +// simulate NHMMs +#include +#include "get_parameters.h" +#include "eta_to_gamma.h" + +// [[Rcpp::export]] +Rcpp::List simulate_nhmm_singlechannel( + const arma::mat& eta_pi, const arma::mat& X_i, + const arma::cube& eta_A, const arma::cube& X_s, + const arma::cube& eta_B, const arma::cube& X_o) { + unsigned int N = X_s.n_slices; + unsigned int T = X_s.n_cols; + unsigned int S = eta_A.n_slices; + unsigned int M = eta_B.n_rows + 1; + arma::umat y(T, N); + arma::umat z(T, N); + arma::mat gamma_pi = eta_to_gamma(eta_pi); + arma::cube gamma_A = eta_to_gamma(eta_A); + arma::cube gamma_B = eta_to_gamma(eta_B); + arma::vec Pi(S); + arma::cube A(S, S, T); + arma::cube B(S, M, T); + arma::uvec seqS = arma::linspace(0, S - 1, S); + arma::uvec seqM = arma::linspace(0, M - 1, M); + for (unsigned int i = 0; i < N; i++) { + Pi = get_pi(gamma_pi, X_i.col(i)); + A = get_A(gamma_A, X_s.slice(i)); + B = get_B(gamma_B, X_o.slice(i), false); + z(0, i) = arma::as_scalar(Rcpp::RcppArmadillo::sample(seqS, 1, false, Pi)); + y(0, i) = arma::as_scalar(Rcpp::RcppArmadillo::sample(seqM, 1, false, B.slice(0).row(z(0, i)).t())); + for (unsigned int t = 1; t < T; t++) { + z(t, i) = arma::as_scalar(Rcpp::RcppArmadillo::sample(seqS, 1, false, A.slice(t).row(z(t - 1, i)).t())); + y(t, i) = arma::as_scalar(Rcpp::RcppArmadillo::sample(seqM, 1, false, B.slice(t).row(z(t, i)).t())); + } + } + return Rcpp::List::create( + Rcpp::Named("observations") = Rcpp::wrap(y), + Rcpp::Named("states") = Rcpp::wrap(z) + ); +} +// [[Rcpp::export]] +Rcpp::List simulate_nhmm_multichannel( + const arma::mat& eta_pi, const arma::mat& X_i, + const arma::cube& eta_A, const arma::cube& X_s, + const arma::field& eta_B, const arma::cube& X_o, + const arma::uvec& M) { + unsigned int N = X_s.n_slices; + unsigned int T = X_s.n_cols; + unsigned int S = eta_A.n_slices; + unsigned int C = M.n_elem; + + arma::ucube y(C, T, N); + arma::umat z(T, N); + + arma::mat gamma_pi = eta_to_gamma(eta_pi); + arma::cube gamma_A = eta_to_gamma(eta_A); + arma::field gamma_B = eta_to_gamma(eta_B); + arma::vec Pi(S); + arma::cube A(S, S, T); + arma::field B(C); + arma::uvec seqS = arma::linspace(0, S - 1, S); + arma::field seqM(C); + for (unsigned int c = 0; c < C; c++) { + seqM(c) = arma::linspace(0, M(c) - 1, M(c)); + B(c) = arma::cube(M(c) - 1, X_o.n_rows, S); + } + for (unsigned int i = 0; i < N; i++) { + Pi = get_pi(gamma_pi, X_i.col(i)); + A = get_A(gamma_A, X_s.slice(i)); + B = get_B(gamma_B, X_o.slice(i), M, false); + z(0, i) = arma::as_scalar(Rcpp::RcppArmadillo::sample(seqS, 1, false, Pi)); + for (unsigned int c = 0; c < C; c++) { + y(c, 0, i) = arma::as_scalar(Rcpp::RcppArmadillo::sample(seqM(c), 1, false, B(c).slice(0).row(z(0, i)).t())); + } + for (unsigned int t = 1; t < T; t++) { + z(t, i) = arma::as_scalar(Rcpp::RcppArmadillo::sample(seqS, 1, false, A.slice(t).row(z(t - 1, i)).t())); + for (unsigned int c = 0; c < C; c++) { + y(c, t, i) = arma::as_scalar(Rcpp::RcppArmadillo::sample(seqM(c), 1, false, B(c).slice(t).row(z(t, i)).t())); + } + } + } + return Rcpp::List::create( + Rcpp::Named("observations") = Rcpp::wrap(y), + Rcpp::Named("states") = Rcpp::wrap(z) + ); +} + +// [[Rcpp::export]] +Rcpp::List simulate_mnhmm_singlechannel( + const arma::field& eta_pi, const arma::mat& X_i, + const arma::field& eta_A, const arma::cube& X_s, + const arma::field& eta_B, const arma::cube& X_o, + const arma::mat& eta_omega, const arma::mat& X_d) { + unsigned int N = X_s.n_slices; + unsigned int T = X_s.n_cols; + unsigned int S = eta_A.n_slices; + unsigned int M = eta_B.n_rows + 1; + unsigned int D = eta_omega.n_rows + 1; + arma::umat y(T, N); + arma::umat z(T, N); + + arma::mat gamma_omega = eta_to_gamma(eta_omega); + arma::field gamma_pi = eta_to_gamma(eta_pi); + arma::field gamma_A = eta_to_gamma(eta_A); + arma::field gamma_B = eta_to_gamma(eta_B); + arma::vec omega(D); + arma::vec Pi(S); + arma::cube A(S, S, T); + arma::cube B(S, M, T); + arma::uvec seqS = arma::linspace(0, S - 1, S); + arma::uvec seqM = arma::linspace(0, M - 1, M); + arma::uvec seqD = arma::linspace(0, D - 1, D); + for (unsigned int i = 0; i < N; i++) { + omega = get_omega(gamma_omega, X_d.col(i)); + unsigned int cluster = arma::as_scalar(Rcpp::RcppArmadillo::sample(seqD, 1, false, omega)); + Pi = get_pi(gamma_pi(cluster), X_i.col(i)); + A = get_A(gamma_A(cluster), X_s.slice(i)); + B = get_B(gamma_B(cluster), X_o.slice(i), false); + z(0, i) = arma::as_scalar(Rcpp::RcppArmadillo::sample(seqS, 1, false, Pi)); + y(0, i) = arma::as_scalar(Rcpp::RcppArmadillo::sample(seqM, 1, false, B.slice(0).row(z(0, i)).t())); + for (unsigned int t = 1; t < T; t++) { + z(t, i) = arma::as_scalar(Rcpp::RcppArmadillo::sample(seqS, 1, false, A.slice(t).row(z(t - 1, i)).t())); + y(t, i) = arma::as_scalar(Rcpp::RcppArmadillo::sample(seqM, 1, false, B.slice(t).row(z(t, i)).t())); + } + z.col(i) += cluster * S; + } + return Rcpp::List::create( + Rcpp::Named("observations") = Rcpp::wrap(y), + Rcpp::Named("states") = Rcpp::wrap(z) + ); +} + + +// [[Rcpp::export]] +Rcpp::List simulate_mnhmm_multichannel( + const arma::field& eta_pi, const arma::mat& X_i, + const arma::field& eta_A, const arma::cube& X_s, + const arma::field& eta_B, const arma::cube& X_o, + const arma::mat& eta_omega, const arma::mat& X_d, + const arma::uvec& M) { + unsigned int N = X_s.n_slices; + unsigned int T = X_s.n_cols; + unsigned int S = eta_A(0).n_slices; + unsigned int D = eta_omega.n_rows + 1; + unsigned int C = M.n_elem; + arma::ucube y(C, T, N); + arma::umat z(T, N); + arma::mat gamma_omega = eta_to_gamma(eta_omega); + arma::field gamma_pi = eta_to_gamma(eta_pi); + arma::field gamma_A = eta_to_gamma(eta_A); + arma::field gamma_B = eta_to_gamma(eta_B); + arma::vec omega(D); + arma::vec Pi(S); + arma::cube A(S, S, T); + arma::field B(C); + arma::field seqM(C); + for (unsigned int c = 0; c < C; c++) { + seqM(c) = arma::linspace(0, M(c) - 1, M(c)); + B(c) = arma::cube(M(c) - 1, X_o.n_rows, S); + } + arma::uvec seqS = arma::linspace(0, S - 1, S); + arma::uvec seqD = arma::linspace(0, D - 1, D); + for (unsigned int i = 0; i < N; i++) { + omega = get_omega(gamma_omega, X_d.col(i)); + unsigned int cluster = arma::as_scalar(Rcpp::RcppArmadillo::sample(seqD, 1, false, omega)); + Pi = get_pi(gamma_pi(cluster), X_i.col(i)); + A = get_A(gamma_A(cluster), X_s.slice(i)); + B = get_B(gamma_B.rows(cluster * C, (cluster + 1) * C - 1), X_o.slice(i), M, false); + z(0, i) = arma::as_scalar(Rcpp::RcppArmadillo::sample(seqS, 1, false, Pi)); + for (unsigned int c = 0; c < C; c++) { + y(c, 0, i) = arma::as_scalar(Rcpp::RcppArmadillo::sample(seqM(c), 1, false, B(c).slice(0).row(z(0, i)).t())); + } + for (unsigned int t = 1; t < T; t++) { + z(t, i) = arma::as_scalar(Rcpp::RcppArmadillo::sample(seqS, 1, false, A.slice(t).row(z(t - 1, i)).t())); + for (unsigned int c = 0; c < C; c++) { + y(c, t, i) = arma::as_scalar(Rcpp::RcppArmadillo::sample(seqM(c), 1, false, B(c).slice(t).row(z(t, i)).t())); + } + } + z.col(i) += cluster * S; + } + return Rcpp::List::create( + Rcpp::Named("observations") = Rcpp::wrap(y), + Rcpp::Named("states") = Rcpp::wrap(z) + ); +} \ No newline at end of file