diff --git a/DESCRIPTION b/DESCRIPTION index 74bc857c..86fa2b15 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: faux Title: Simulation Functions -Version: 0.0.0.9005 -Date: 2019-04-29 -Authors@R: person("Lisa", "DeBruine", email = "debruine@gmail.com", role = c("aut", "cre")) +Version: 0.0.0.9006 +Date: 2019-05-02 +Authors@R: person(given = "Lisa", family = "DeBruine", role = c("aut", "cre"), email = "debruine@gmail.com", comment = c(ORCID = "0000-0002-7523-5539")) Description: Provides functions for simulating multiple variables with specified relationships. Depends: R (>= 3.2.4) @@ -19,7 +19,7 @@ Imports: ggplot2 License: MIT + file LICENSE Suggests: - testthat, + testthat (>= 2.1.0), knitr, rmarkdown VignetteBuilder: knitr diff --git a/NAMESPACE b/NAMESPACE index a4f021d0..2bada45d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -5,12 +5,16 @@ export(check_design) export(check_sim_stats) export(cormat) export(cormat_from_triangle) +export(get_design_long) export(is_pos_def) +export(long2wide) +export(make_id) export(pos_def_limits) export(rnorm_multi) export(rnorm_pre) export(select_num_grp) export(sim_design) -export(simdf) -export(simdf_mixed) +export(sim_df) +export(sim_mixed_df) +export(wide2long) importFrom(magrittr,"%>%") diff --git a/NEWS.md b/NEWS.md index fafedd47..940dca9c 100644 --- a/NEWS.md +++ b/NEWS.md @@ -5,4 +5,9 @@ # faux 0.0.0.9005 -* Bug fixes for `sim_design()` (failed when within or between factor number was 0) \ No newline at end of file +* Bug fixes for `sim_design()` (failed when within or between factor number was 0) + +# faux 0.0.0.9006 + +* Changes to argument order and names (more consistent, but may break old scripts) +* Updated vignettes \ No newline at end of file diff --git a/R/check_design.R b/R/check_design.R new file mode 100644 index 00000000..33db726b --- /dev/null +++ b/R/check_design.R @@ -0,0 +1,323 @@ +#' Validate design +#' +#' \code{check_design} validates the specified within and between design +#' +#' @param within a list of the within-subject factors +#' @param between a list of the between-subject factors +#' @param n the number of samples required +#' @param mu a vector giving the means of the variables (numeric vector of length 1 or vars) +#' @param sd the standard deviations of the variables (numeric vector of length 1 or vars) +#' @param r the correlations among the variables (can be a single number, vars\*vars matrix, vars\*vars vector, or a vars\*(vars-1)/2 vector) +#' +#' @return list +#' +#' @examples +#' +#' within <- list(time = c("day", "night")) +#' between <- list(pet = c("dog", "cat")) +#' check_design(within, between) +#' +#' @export +#' +check_design <- function(within = list(), between = list(), + n = 100, mu = 0, sd = 1, r = 0) { + # name anonymous factors + if (is.numeric(within) && within %in% 2:10 %>% mean() == 1) { # vector of level numbers + within_names <- LETTERS[1:length(within)] + within <- purrr::map2(within_names, within, ~paste0(.x, 1:.y)) + names(within) <- within_names + } + if (is.numeric(between) && between %in% 2:10 %>% mean() == 1) { # vector of level numbers + between_names <- LETTERS[(length(within)+1):(length(within)+length(between))] + between <- purrr::map2(between_names, between, ~paste0(.x, 1:.y)) + names(between) <- between_names + } + + # check factor specification + if (!is.list(within) || !is.list(between)) { + stop("within and between must be lists") + } else if (length(within) == 0 && length(between) == 0) { + stop("You must specify at least one factor") + } + + # if within or between factors are named vectors, + # use their names as column names and values as labels for plots + between_labels <- purrr::map(between, fix_name_labels) + between <- lapply(between_labels, names) + within_labels <- purrr::map(within, fix_name_labels) + within <- lapply(within_labels, names) + + within_factors <- names(within) + between_factors <- names(between) + + # handle no w/in or btwn + if (length(between_factors) == 0) between_factors <- ".tmpvar." + if (length(within_factors) == 0) within_factors <- ".tmpvar." + + # check for duplicate factor names + factor_overlap <- intersect(within_factors, between_factors) + if (length(factor_overlap)) { + stop("You have multiple factors with the same name (", + paste(factor_overlap, collapse = ", "), + "). Please give all factors unique names.") + } + + # check for duplicate level names within any factor + dupes <- c(within, between) %>% + lapply(duplicated) %>% + lapply(sum) %>% + lapply(as.logical) %>% + unlist() + + if (sum(dupes)) { + dupelevels <- c(within, between) %>% + names() %>% + magrittr::extract(dupes) %>% + paste(collapse = ", ") + stop("You have duplicate levels for factor(s): ", dupelevels) + } + + # define columns + if (length(within) == 0) { + cells_w = "val" + } else { + cells_w <- do.call(expand.grid, within) %>% + tidyr::unite("b", 1:ncol(.)) %>% dplyr::pull("b") + } + if (length(between) == 0) { + cells_b = ".tmpvar." + } else { + cells_b <- do.call(expand.grid, between) %>% + tidyr::unite("b", 1:ncol(.)) %>% dplyr::pull("b") + } + + # convert n, mu and sd from vector/list formats + cell_n <- convert_param(n, cells_w, cells_b, "Ns") + cell_mu <- convert_param(mu, cells_w, cells_b, "means") + cell_sd <- convert_param(sd, cells_w, cells_b, "SDs") + + # figure out number of subjects and their IDs + sub_n <- sum(cell_n[1,]) + sub_id <- make_id(sub_n) + + # set up cell correlations from r (number, vector, matrix or list styles) + cell_r <- list() + if (length(within)) { + for (cell in cells_b) { + cell_cor <- if(is.list(r)) r[[cell]] else r + cell_r[[cell]] <- cormat(cell_cor, length(cells_w)) + } + } + + list( + within = within, + between = between, + within_factors = within_factors, + between_factors = between_factors, + within_labels = within_labels, + between_labels = between_labels, + cells_w = cells_w, + cells_b = cells_b, + cell_n = cell_n, + cell_mu = cell_mu, + cell_sd = cell_sd, + cell_r = cell_r, + sub_id = sub_id + ) +} + +#' Convert parameter +#' +#' Converts parameter specification from vector or list format +#' +#' @param param the parameter (mu, sd, or n) +#' @param cells_w a list of within-subject cells combinations +#' @param cells_b a list of between-subject cell combinations +#' @param type the name of the parameter (for error messages) +#' +#' @return a data frame +#' @keywords internal +#' +convert_param <- function (param, cells_w, cells_b, type = "this parameter") { + w_n <- length(cells_w) + b_n <- length(cells_b) + all_n <- b_n*w_n + + if (is.data.frame(param)) { # convert to list first + # check for row/column confusion + cols_are_b <- setdiff(names(param), cells_b) %>% length() == 0 + rows_are_w <- setdiff(rownames(param), cells_w) %>% length() == 0 + cols_are_w <- setdiff(names(param), cells_w) %>% length() == 0 + rows_are_b <- setdiff(rownames(param), cells_b) %>% length() == 0 + if (cols_are_b && rows_are_w) { + # check this first in case rows and cols are the same labels + param <- as.list(param) %>% lapply(magrittr::set_names, rownames(param)) + } else if (cols_are_w && rows_are_b) { + param <- t(param) %>% as.data.frame() + param <- as.list(param) %>% lapply(magrittr::set_names, rownames(param)) + } else { + stop("The ", type, " dataframe is misspecified.") + } + } + + if (is.list(param)) { + param2 <- c() + # add param in right order + for (f in cells_b) { + if (length(param[[f]]) == 1) { + new_param <- rep(param[[f]], w_n) + } else if (length(param[[f]]) != w_n) { + stop("The number of ", type, " for cell ", f, + " is not correct. Please specify either 1 or a vector of ", + w_n, " per cell") + } else if (setdiff(cells_w, names(param[[f]])) %>% length() == 0) { + new_param <- param[[f]][cells_w] # add named parameters in the right order + } else { + new_param <- param[[f]] # parameters are not or incorrectly named, add in this order + } + param2 <- c(param2, new_param) + } + + if (length(cells_b) == 0) { # no between-subject factors + message("no between-subject factors") + if (length(param) == 1) { + param2 <- rep(param, w_n) + } else if (length(param) != w_n) { + stop("The number of ", type, + " is not correct. Please specify either 1 or a vector of ", + w_n, " per cell") + } else if (setdiff(cells_w, names(param)) %>% length() == 0) { + param2 <- param[cells_w] # add named parameters in the right order + } else { + param2 <- param # parameters are not or incorrectly named, add in this order + } + } + } else if (is.numeric(param)) { + if (length(param) == 1) { + param2 <- rep(param, all_n) + } else if (length(param) == all_n) { + param2 <- param + } else { + stop("The number of ", type, " is not correct. Please specify 1, a vector of ", + all_n , ", or use the list format") + } + } + + dd <- matrix(param2, ncol = b_n) %>% as.data.frame() + names(dd) <- cells_b + rownames(dd) <- cells_w + + dd +} + + +#' Get design from long data +#' +#' Makes a best guess at the design of a long-format data frame. +#' Finds all columns that contain a single value per unit of analysis (between factors), +#' all columns that contain the same values per unit of analysis (within factors), and +#' all columns that differ over units of analysis (dv, continuous factors) +#' +#' @param .data the data frame (in long format) +#' @param id the column name(s) that identify a unit of analysis +#' @param dv the column name that identifies the DV +#' +#' @return the data frame in long format +#' +#' @export +#' +get_design_long <- function(.data, id = "sub_id", dv = "val") { + between_factors <- .data %>% + dplyr::group_by_at(dplyr::vars(tidyselect::one_of(id))) %>% + dplyr::summarise_all(dplyr::n_distinct) %>% + dplyr::ungroup() %>% + dplyr::select(-tidyselect::one_of(id)) %>% + dplyr::summarise_all(max) %>% + dplyr::select_if(~ . == 1) %>% + names() + + within_factors <- .data %>% + dplyr::select(-tidyselect::one_of(between_factors)) %>% + dplyr::group_by_at(dplyr::vars(tidyselect::one_of(id))) %>% + dplyr::summarise_all(paste0, collapse = ",") %>% + dplyr::ungroup() %>% + dplyr::select(-tidyselect::one_of(id)) %>% + dplyr::summarise_all(dplyr::n_distinct) %>% + dplyr::select_if(~ . == 1) %>% + names() + + within <- .data %>% + dplyr::select(tidyselect::one_of(within_factors)) %>% + dplyr::mutate_all(as.factor) %>% + dplyr::summarise_all(~levels(.) %>% paste0(collapse = ".|.")) %>% + as.list() %>% + sapply(strsplit, split=".|.", fixed = TRUE) + + between <- .data %>% + dplyr::select(tidyselect::one_of(between_factors)) %>% + dplyr::mutate_all(as.factor) %>% + dplyr::summarise_all(~levels(.) %>% paste0(collapse = ".|.")) %>% + as.list() %>% + sapply(strsplit, split=".|.", fixed = TRUE) + + between_labels <- purrr::map(between, fix_name_labels) + within_labels <- purrr::map(within, fix_name_labels) + + cells_b <- do.call(expand.grid, between) %>% + tidyr::unite("b", 1:ncol(.)) %>% dplyr::pull("b") + + cells_w <- do.call(expand.grid, within) %>% + tidyr::unite("b", 1:ncol(.)) %>% dplyr::pull("b") + + # get n, mu, sd, r per cell + chk <- check_sim_stats(.data, between_factors, within_factors, dv, id) + + n <- chk %>% + tidyr::unite(".within", tidyselect::one_of(between_factors)) %>% + dplyr::select(.within, var, n) %>% + tidyr::spread(var, n) %>% + tibble::column_to_rownames(".within") + + mu <- chk %>% + tidyr::unite(".within", tidyselect::one_of(between_factors)) %>% + dplyr::select(.within, var, mean) %>% + tidyr::spread(var, mean) %>% + tibble::column_to_rownames(".within") + + sd <- chk %>% + tidyr::unite(".within", tidyselect::one_of(between_factors)) %>% + dplyr::select(.within, var, sd) %>% + tidyr::spread(var, sd) %>% + tibble::column_to_rownames(".within") + + cors <- chk %>% + tidyr::unite(".between", tidyselect::one_of(between_factors)) %>% + dplyr::select(tidyselect::one_of(c(".between", "var", cells_w))) %>% + dplyr::mutate(var = forcats::fct_relevel(var, cells_w)) %>% + dplyr::arrange(var) %>% + dplyr::group_by(.between) %>% + tidyr::nest(.key = "r") %>% + as.list() + + r <- purrr::map(cors$r, ~tibble::column_to_rownames(., "var")) + names(r) <- cors$.between + + design <- list( + within = within, + between = between, + within_factors = within_factors, + between_factors = between_factors, + within_labels = within_labels, + between_labels = between_labels, + cells_w = cells_w, + cells_b = cells_b, + cell_n = n, + cell_mu = mu, + cell_sd = sd, + cell_r = r, + sub_id = id + ) + + design +} + diff --git a/R/check_sim_stats.R b/R/check_sim_stats.R index c3839dfa..7f82ad3b 100644 --- a/R/check_sim_stats.R +++ b/R/check_sim_stats.R @@ -2,18 +2,30 @@ #' #' \code{check_sim_stats} Generates a table of the correlations and means of numeric columns in a data frame #' -#' @param dat the existing dataframe -#' @param grp_by an optional list of column names to group by +#' @param .data the existing tbl +#' @param between a vector of column names for between-subject factors +#' @param within a vector of column names for within-subject factors (if data is long) +#' @param dv the column name of the dv (if data is long) +#' @param id the column name(s) of the subject ID (if data is long) #' @param digits how many digits to round to (default = 2) #' @param usekable logical. If TRUE, output with knitr::kable #' -#' @return tibble or kable +#' @return a tbl or kable #' @examples #' check_sim_stats(iris, "Species") #' @export +#' -check_sim_stats <- function(dat, grp_by = NULL, digits = 2, usekable = FALSE) { - grpdat <- select_num_grp(dat, grp_by) +check_sim_stats <- function(.data, between = c(), within = c(), dv = c(), id = c(), + digits = 2, usekable = FALSE) { + + if (length(within) && length(dv) && length(id)) { + # convert long to wide + .data <- long2wide(.data, within, between, dv, id) %>% + dplyr::select(-tidyselect::one_of(id)) + } + + grpdat <- select_num_grp(.data, between) grpvars <- dplyr::group_vars(grpdat) numvars <- names(grpdat)[!names(grpdat) %in% grpvars] @@ -27,14 +39,14 @@ check_sim_stats <- function(dat, grp_by = NULL, digits = 2, usekable = FALSE) { tidyr::spread(stat, val) stats <- grpdat %>% - tidyr::nest(dplyr::one_of(numvars), .key = "multisim_data") %>% + tidyr::nest(tidyselect::one_of(numvars), .key = "multisim_data") %>% dplyr::mutate(multisim_cor = purrr::map(multisim_data, function(d) { cor(d) %>% round(digits) %>% tibble::as_tibble(rownames = "var") })) %>% dplyr::select(-multisim_data) %>% tidyr::unnest(multisim_cor) %>% - dplyr::left_join(descriptives, by = c(grp_by, "var")) %>% - dplyr::select(tidyselect::one_of(c(grp_by, "n", "var", numvars, "mean", "sd"))) + dplyr::left_join(descriptives, by = c(between, "var")) %>% + dplyr::select(tidyselect::one_of(c(between, "n", "var", numvars, "mean", "sd"))) if (usekable) { return(knitr::kable(stats)) diff --git a/R/long2wide.R b/R/long2wide.R new file mode 100644 index 00000000..d2c4e7b1 --- /dev/null +++ b/R/long2wide.R @@ -0,0 +1,53 @@ +#' Long to wide format +#' +#' Converts data from long format to wide +#' +#' @param .data the tbl in long format +#' @param within the names of the within column(s) +#' @param between the names of between column(s) (optional) +#' @param dv the name of the DV (value) column +#' @param id the names of the column(s) for grouping observations +#' +#' @return a tbl in wide format +#' +#' @examples +#' df_long <- sim_design(2, 2, long = TRUE) +#' long2wide(df_long, "A", "B", "val", "sub_id") +#' +#' @export +#' +long2wide <- function(.data, within = c(), between = c(), dv = "val", id = "sub_id") { + .data %>% + dplyr::select(tidyselect::one_of(c(id, between, within, dv))) %>% + tidyr::unite(".tmpwithin.", tidyselect::one_of(within)) %>% + dplyr::group_by_at(dplyr::vars(tidyselect::one_of(between))) %>% + tidyr::spread(".tmpwithin.", !!dplyr::quo(dv)) %>% + dplyr::ungroup() +} + +#' Wide to long format +#' +#' Converts data from wide format to long +#' +#' @param .data the tbl in wide format +#' @param within_factors the names of the within factors +#' @param within_cols the names (or indices) of the within-subject (value) columns +#' @param sep Separator for within-columns (see tidyr::separate) +#' +#' @return a tbl in long format +#' +#' @examples +#' wide2long(iris, c("Feature", "Measure"), 1:4) +#' +#' @export +#' +wide2long <- function(.data, within_factors = c(), within_cols = c(), sep = "[^[:alnum:]]+") { + if (is.numeric(within_cols)) { + within_cols <- names(.data)[within_cols] + } + + .data %>% + tidyr::gather(".tmpwithin.", "val", tidyselect::one_of(within_cols)) %>% + tidyr::separate(".tmpwithin.", within_factors, sep = sep) +} + diff --git a/R/make_id.R b/R/make_id.R new file mode 100644 index 00000000..2748032f --- /dev/null +++ b/R/make_id.R @@ -0,0 +1,27 @@ +#' Make ID +#' +#' Make IDs with fixed length and a letter prefix for random effects (e.g., S001, S002, ..., S100). +#' @param n the number of IDs to generate (or a vector of numbers) +#' @param prefix the letter prefix to the number +#' @param digits the number of digits to use for the numeric part. Only used if this is larger than the number of digits in n. +#' +#' @return a vector of IDs +#' @export +#' +#' @examples +#' +#' make_id(20, "SUBJECT_") +#' make_id(10:30, digits = 3) +#' +make_id <- function(n = 100, prefix = "S", digits = 0) { + # set max digits to the larger of digits in `n`` or `digits` + if (length(n) == 1) { + max_n <- n + n <- 1:max_n + } else { + max_n <- max(n) + } + + max_digits <- max(floor(log10(max_n))+1, digits) + paste0(prefix, formatC(n, width = max_digits, flag = "0")) +} diff --git a/R/rnorm_multi.R b/R/rnorm_multi.R index 6f171d8b..6e44c8ce 100644 --- a/R/rnorm_multi.R +++ b/R/rnorm_multi.R @@ -1,26 +1,35 @@ #' Multiple Normally Distributed Vectors #' -#' \code{rnorm_multi} makes multiple normally distributed vectors with specified relationships +#' \code{rnorm_multi()} makes multiple normally distributed vectors with specified relationships. #' #' @param n the number of samples required #' @param vars the number of variables to return -#' @param cors the correlations among the variables (can be a single number, vars\*vars matrix, vars\*vars vector, or a vars\*(vars-1)/2 vector) #' @param mu a vector giving the means of the variables (numeric vector of length 1 or vars) #' @param sd the standard deviations of the variables (numeric vector of length 1 or vars) -#' @param varnames optional names for the variables (string vector of length vars) defaults if cors is a matrix with column names -#' @param empirical logical. If true, mu, sd and cors specify the empirical not population mean, sd and covariance +#' @param r the correlations among the variables (can be a single number, vars\*vars matrix, vars\*vars vector, or a vars\*(vars-1)/2 vector) +#' @param varnames optional names for the variables (string vector of length vars) defaults if r is a matrix with column names +#' @param empirical logical. If true, mu, sd and r specify the empirical not population mean, sd and covariance #' @param as.matrix logical. If true, returns a matrix +#' @param cors (deprecated; use r) +#' +#' @return a tbl of vars vectors #' -#' @return dataframe of vars vectors #' @examples -#' rnorm_multi(100, 3, c(0.2, 0.4, 0.5), varnames=c("A", "B", "C")) -#' rnorm_multi(100, 3, c(1, 0.2, -0.5, 0.2, 1, 0.5, -0.5, 0.5, 1), varnames=c("A", "B", "C")) +#' rnorm_multi(100, 3, 0, 1, c(0.2, 0.4, 0.5), varnames=c("A", "B", "C")) +#' rnorm_multi(100, 3, 0, 1, c(1, 0.2, -0.5, 0.2, 1, 0.5, -0.5, 0.5, 1), varnames=c("A", "B", "C")) +#' #' @export -rnorm_multi <- function(n, vars = 3, cors = 0, mu = 0, sd = 1, +rnorm_multi <- function(n, vars = 3, mu = 0, sd = 1, r = 0, varnames = NULL, empirical = FALSE, - as.matrix = FALSE) { + as.matrix = FALSE, + cors = NULL) { + if (!is.null(cors)) { + warning("cors is deprecated, please use r") + if (r == 0) r = cors # set r to cors if r is not set + } + # error handling if ( !is.numeric(n) || n %% 1 > 0 || n < 3 ) { stop("n must be an integer > 2") @@ -42,7 +51,7 @@ rnorm_multi <- function(n, vars = 3, cors = 0, mu = 0, sd = 1, stop("the length of sd must be 1 or vars"); } - cor_mat <- cormat(cors, vars) + cor_mat <- cormat(r, vars) sigma <- (sd %*% t(sd)) * cor_mat bvn <- MASS::mvrnorm(n, mu, sigma, empirical = empirical) @@ -51,7 +60,7 @@ rnorm_multi <- function(n, vars = 3, cors = 0, mu = 0, sd = 1, if (length(varnames) == vars) { colnames(bvn) <- varnames } else if (!is.null(colnames(cor_mat))) { - # if cors was a matrix with names, use that + # if r was a matrix with names, use that colnames(bvn) <- colnames(cor_mat) } diff --git a/R/rnorm_pre.R b/R/rnorm_pre.R index f8709878..9a1d19fc 100644 --- a/R/rnorm_pre.R +++ b/R/rnorm_pre.R @@ -3,18 +3,18 @@ #' \code{rnorm_pre} Produces a random normally distributed vector with the specified correlation to an existing vector #' #' @param x the existing vector -#' @param rho desired correlation between existing and returned vectors -#' @param ymean desired mean of returned vector -#' @param ysd desired SD of returned vector +#' @param mu desired mean of returned vector +#' @param sd desired SD of returned vector +#' @param r desired correlation between existing and returned vectors #' #' @return vector #' @examples #' v1 <- rnorm(10) -#' v2 <- rnorm_pre(v1, 0.5, 0, 1) +#' v2 <- rnorm_pre(v1, 0, 1, 0.5) #' cor(v1, v2) #' @export -rnorm_pre <- function (x, rho=0, ymean=0, ysd=1) { +rnorm_pre <- function (x, mu=0, sd=1, r=0) { # error checking if (!is.vector(x)) stop("x must be a vector") if (!is.numeric(x)) stop("x must be numeric") @@ -22,9 +22,9 @@ rnorm_pre <- function (x, rho=0, ymean=0, ysd=1) { n <- length(x) y <- stats::rnorm(n) - z <- rho * scale(x)[,1] + sqrt(1 - rho^2) * + z <- r * scale(x)[,1] + sqrt(1 - r^2) * scale(stats::resid(stats::lm(y ~ x)))[,1] - yresult <- ymean + ysd * z + yresult <- mu + sd * z return(yresult) } diff --git a/R/select_num_grp.R b/R/select_num_grp.R index 8a1076e5..ac6c0a50 100644 --- a/R/select_num_grp.R +++ b/R/select_num_grp.R @@ -2,52 +2,52 @@ #' #' \code{select_num_grp} Select grouping and (optionally specified) numeric columns and group #' -#' @param dat the existing dataframe -#' @param grp_by an optional list of column names to group by +#' @param .data the existing tbl +#' @param between an optional list of column names to group by #' @param cols an optional list of column names to return (default of NULL returns all numeric columns) #' -#' @return tibble +#' @return a tbl #' @examples #' select_num_grp(iris, "Species") #' @export -select_num_grp <- function(dat, grp_by = NULL, cols = NULL) { +select_num_grp <- function(.data, between = c(), cols = NULL) { # error checking ----------------- - if (is.matrix(dat)) { - dat = as.data.frame(dat) - } else if (!is.data.frame(dat)) { - stop("dat must be a data frame or matrix") + if (is.matrix(.data)) { + .data = as.data.frame(.data) + } else if (!is.data.frame(.data)) { + stop(".data must be a data frame or matrix") } # select only grouping and numeric columns ----------------- - if (is.null(grp_by)) { + if (is.null(between)) { # no grouping, so select all numeric columns - numdat <- dplyr::select_if(dat, is.numeric) + numdat <- dplyr::select_if(.data, is.numeric) grpdat <- numdat - } else if (is.numeric(grp_by) || is.character(grp_by)) { + } else if (is.numeric(between) || is.character(between)) { # get grouping column names if specified by index - if (is.numeric(grp_by)) grp_by <- names(dat)[grp_by] + if (is.numeric(between)) between <- names(.data)[between] # numeric columns, excluding grouping columns - numdat <- dat %>% - dplyr::select(-tidyselect::one_of(grp_by)) %>% + numdat <- .data %>% + dplyr::select(-tidyselect::one_of(between)) %>% dplyr::select_if(is.numeric) # get grouping columns, add remaining numeric columns, and group - grpdat <- dat %>% - dplyr::select(tidyselect::one_of(grp_by)) %>% + grpdat <- .data %>% + dplyr::select(tidyselect::one_of(between)) %>% dplyr::bind_cols(numdat) %>% - dplyr::group_by_at(dplyr::vars(tidyselect::one_of(grp_by))) + dplyr::group_by_at(dplyr::vars(tidyselect::one_of(between))) } else { - stop("grp_by must be a numeric or character vector") + stop("between must be a numeric or character vector") } if (!is.null(cols)) { # return only grouping and cols - if (is.numeric(cols)) cols <- names(dat)[cols] + if (is.numeric(cols)) cols <- names(.data)[cols] grpdat <- grpdat %>% - dplyr::select(tidyselect::one_of(c(grp_by, cols))) + dplyr::select(tidyselect::one_of(c(between, cols))) } return(grpdat) diff --git a/R/sim_design.R b/R/sim_design.R index d172da6e..103d530a 100644 --- a/R/sim_design.R +++ b/R/sim_design.R @@ -1,27 +1,29 @@ #' Simulate Data from Design #' -#' \code{sim_design} generates a dataframe with a specified within and between design +#' \code{sim_design()} generates a data table with a specified within and between design. #' #' @param within a list of the within-subject factors #' @param between a list of the between-subject factors #' @param n the number of samples required -#' @param cors the correlations among the variables (can be a single number, vars\*vars matrix, vars\*vars vector, or a vars\*(vars-1)/2 vector) #' @param mu a vector giving the means of the variables (numeric vector of length 1 or vars) #' @param sd the standard deviations of the variables (numeric vector of length 1 or vars) -#' @param empirical logical. If true, mu, sd and cors specify the empirical not population mean, sd and covariance -#' @param frame_long Whether the returned dataframe is in wide (default = FALSE) or long (TRUE) format +#' @param r the correlations among the variables (can be a single number, vars\*vars matrix, vars\*vars vector, or a vars\*(vars-1)/2 vector) +#' @param empirical logical. If true, mu, sd and r specify the empirical not population mean, sd and covariance +#' @param long Whether the returned tbl is in wide (default = FALSE) or long (TRUE) format #' -#' @return dataframe +#' @return a tbl #' #' @export #' sim_design <- function(within = list(), between = list(), - n = 100, cors = 0, mu = 0, sd = 1, - empirical = FALSE, frame_long = FALSE) { + n = 100, mu = 0, sd = 1, r = 0, + empirical = FALSE, long = FALSE) { # check the design is specified correctly - design <- check_design(within, between, n, cors, mu, sd) + design <- check_design(within = within, between = between, + n = n, mu = mu, sd = sd, r = r) + # simulate the data - sim_design_(design, empirical = empirical, frame_long = frame_long) + sim_design_(design, empirical = empirical, long = long) } #' Fix name labels @@ -41,227 +43,27 @@ fix_name_labels <- function(x) { x } - -#' Convert parameter -#' -#' Converts parameter specification from vector or list format -#' -#' @param param the parameter (mu, sd, or n) -#' @param cells_b a list of between-subject cell combinations -#' @param cells_w a list of within-subject cells combinations -#' @param type the name of the parameter (for error messages) -#' -#' @return a data frame -#' -convert_param <- function (param, cells_b, cells_w, type = "this parameter") { - w_n <- length(cells_w) - b_n <- length(cells_b) - all_n <- b_n*w_n - - if (is.data.frame(param)) { # convert to list first - # check for row/column confusion - cols_are_b <- setdiff(names(param), cells_b) %>% length() == 0 - rows_are_w <- setdiff(rownames(param), cells_w) %>% length() == 0 - cols_are_w <- setdiff(names(param), cells_w) %>% length() == 0 - rows_are_b <- setdiff(rownames(param), cells_b) %>% length() == 0 - if (cols_are_b && rows_are_w) { - # check this first in case rows and cols are the same labels - param <- as.list(param) %>% lapply(magrittr::set_names, rownames(param)) - } else if (cols_are_w && rows_are_b) { - param <- t(param) %>% as.data.frame() - param <- as.list(param) %>% lapply(magrittr::set_names, rownames(param)) - } else { - stop("The ", type, " dataframe is misspecified.") - } - } - - if (is.list(param)) { - param2 <- c() - # add param in right order - for (f in cells_b) { - if (length(param[[f]]) == 1) { - new_param <- rep(param[[f]], w_n) - } else if (length(param[[f]]) != w_n) { - stop("The number of ", type, " for cell ", f, - " is not correct. Please specify either 1 or a vector of ", - w_n, " per cell") - } else if (setdiff(cells_w, names(param[[f]])) %>% length() == 0) { - new_param <- param[[f]][cells_w] # add named parameters in the right order - } else { - new_param <- param[[f]] # parameters are not or incorrectly named, add in this order - } - param2 <- c(param2, new_param) - } - - if (length(cells_b) == 0) { # no between-subject factors - message("no between-subject factors") - if (length(param) == 1) { - param2 <- rep(param, w_n) - } else if (length(param) != w_n) { - stop("The number of ", type, - " is not correct. Please specify either 1 or a vector of ", - w_n, " per cell") - } else if (setdiff(cells_w, names(param)) %>% length() == 0) { - param2 <- param[cells_w] # add named parameters in the right order - } else { - param2 <- param # parameters are not or incorrectly named, add in this order - } - } - } else if (is.numeric(param)) { - if (length(param) == 1) { - param2 <- rep(param, all_n) - } else if (length(param) == all_n) { - param2 <- param - } else { - stop("The number of ", type, " is not correct. Please specify 1, a vector of ", - all_n , ", or use the list format") - } - } - - dd <- matrix(param2, ncol = b_n) %>% as.data.frame() - names(dd) <- cells_b - rownames(dd) <- cells_w - - dd -} - -#' Validate design -#' -#' \code{check_design} validates the specified within and between design -#' -#' @param within a list of the within-subject factors -#' @param between a list of the between-subject factors -#' @param n the number of samples required -#' @param cors the correlations among the variables (can be a single number, vars\*vars matrix, vars\*vars vector, or a vars\*(vars-1)/2 vector) -#' @param mu a vector giving the means of the variables (numeric vector of length 1 or vars) -#' @param sd the standard deviations of the variables (numeric vector of length 1 or vars) -#' -#' @return list -#' -#' @examples -#' -#' within <- list(time = c("day", "night")) -#' between <- list(pet = c("dog", "cat")) -#' design <- check_design(within, between) -#' -#' @export -#' -check_design <- function(within = list(), between = list(), - n = 100, cors = 0, mu = 0, sd = 1) { - # error checking - if (!is.list(within) || !is.list(between)) { - stop("within and between must be lists") - } else if (length(within) == 0 && length(between) == 0) { - stop("You must specify at least one factor") - } - - # if within or between factors are named vectors, - # use their names as column names and values as labels for plots - between_labels <- purrr::map(between, fix_name_labels) - between <- lapply(between_labels, names) - within_labels <- purrr::map(within, fix_name_labels) - within <- lapply(within_labels, names) - - within_factors <- names(within) - between_factors <- names(between) - - # handle no w/in or btwn - if (length(between_factors) == 0) between_factors <- ".tmpvar." - if (length(within_factors) == 0) within_factors <- ".tmpvar." - - # check for duplicate factor names - factor_overlap <- intersect(within_factors, between_factors) - if (length(factor_overlap)) { - stop("You have multiple factors with the same name (", - paste(factor_overlap, collapse = ", "), - "). Please give all factors unique names.") - } - - # check for duplicate level names within any factor - dupes <- c(within, between) %>% - lapply(duplicated) %>% - lapply(sum) %>% - lapply(as.logical) %>% - unlist() - - if (sum(dupes)) { - dupelevels <- c(within, between) %>% - names() %>% - magrittr::extract(dupes) %>% - paste(collapse = ", ") - stop("You have duplicate levels for factor(s): ", dupelevels) - } - - # define columns - if (length(within) == 0) { - cells_w = "val" - } else { - cells_w <- do.call(expand.grid, within) %>% - tidyr::unite("b", 1:ncol(.)) %>% dplyr::pull("b") - } - if (length(between) == 0) { - cells_b = ".tmpvar." - } else { - cells_b <- do.call(expand.grid, between) %>% - tidyr::unite("b", 1:ncol(.)) %>% dplyr::pull("b") - } - - # convert n, mu and sd from vector/list formats - cell_n <- convert_param(n, cells_b, cells_w, "Ns") - cell_mu <- convert_param(mu, cells_b, cells_w, "means") - cell_sd <- convert_param(sd, cells_b, cells_w, "SDs") - - # figure out number of subjects and their IDs - sub_n <- sum(cell_n[1,]) - max_digits <- floor(log10(sub_n))+1 - sub_id <- paste0("S",formatC(1:sub_n, width = max_digits, flag = "0")) - - # set up cell correlations from cors (number, vector, matrix or list styles) - cell_cors <- list() - if (length(within)) { - for (cell in cells_b) { - cell_cor <- if(is.list(cors)) cors[[cell]] else cors - cell_cors[[cell]] <- cormat(cell_cor, length(cells_w)) - } - } - - list( - within = within, - between = between, - within_factors = within_factors, - between_factors = between_factors, - within_labels = within_labels, - between_labels = between_labels, - cell_n = cell_n, - cell_mu = cell_mu, - cell_sd = cell_sd, - cell_cors = cell_cors, - cells_w = cells_w, - cells_b = cells_b, - sub_id = sub_id - ) -} - #' Simulate Data from Design #' -#' \code{sim_from_design} generates a dataframe with a specified design +#' \code{sim_design_} generates a data table with a specified design #' #' @param design A list of design parameters created by check_design() -#' @param empirical logical. If true, mu, sd and cors specify the empirical not population mean, sd and covariance -#' @param frame_long Whether the returned dataframe is in wide (default = FALSE) or long (TRUE) format +#' @param empirical logical. If true, mu, sd and r specify the empirical not population mean, sd and covariance +#' @param long Whether the returned tbl is in wide (default = FALSE) or long (TRUE) format #' -#' @return dataframe +#' @return a tbl #' @keywords internal #' -sim_design_ <- function(design, empirical = FALSE, frame_long = FALSE) { +sim_design_ <- function(design, empirical = FALSE, long = FALSE) { list2env(design, envir = environment()) # simulate data for each between-cell for (cell in cells_b) { if (length(within)) { cell_vars <- rnorm_multi( - cell_n[1,cell], length(cells_w), cell_cors[[cell]], - cell_mu[[cell]], cell_sd[[cell]], cells_w, empirical + cell_n[1,cell], length(cells_w), + cell_mu[[cell]], cell_sd[[cell]], cell_r[[cell]], + cells_w, empirical ) %>% dplyr::mutate("btwn" = cell) } else { @@ -298,7 +100,7 @@ sim_design_ <- function(design, empirical = FALSE, frame_long = FALSE) { # df_wide[[f]] <- factor(df_wide[[f]], levels = between[[f]]) #} - if (frame_long == TRUE && length(within)) { + if (long == TRUE && length(within)) { # not necessary for fully between designs col_order <- c("sub_id", between_factors, within_factors, "val") %>% setdiff(".tmpvar.") diff --git a/R/sim_df.R b/R/sim_df.R new file mode 100644 index 00000000..a7d222c5 --- /dev/null +++ b/R/sim_df.R @@ -0,0 +1,48 @@ +#' Simulate an existing dataframe +#' +#' \code{sim_df} Produces a data table with the same distributions and correlations as an existing data table Only returns numeric columns and simulates all numeric variables from a continuous normal distribution (for now). +#' +#' @param .data the existing tbl (must be in wide format) +#' @param n the number of samples to return per group +#' @param between a list of the between-subject columns +#' @param empirical logical. Passed on to rnorm_multi +#' @param grp_by (deprecated; use between) +#' +#' @return a tbl +#' @examples +#' iris100 <- sim_df(iris, 100) +#' iris_species <- sim_df(iris, 100, between = "Species") +#' @export + +sim_df <- function (.data, n = 100, between = c(), empirical = FALSE, grp_by = NULL) { + # error checking + if ( !is.numeric(n) || n %% 1 > 0 || n < 3 ) { + stop("n must be an integer > 2") + } + + if (!is.null(grp_by)) { + warning("grp_by is deprecated, please use between") + if (between == c()) between = grp_by # set between to grp_by if between is not set + } + + grpdat <- select_num_grp(.data, between) + + simdat <- grpdat %>% + tidyr::nest() %>% + dplyr::mutate(newsim = purrr::map(data, function(data) { + rnorm_multi( + n = n, + vars = ncol(data), + mu = t(dplyr::summarise_all(data, mean)), + sd = t(dplyr::summarise_all(data, stats::sd)), + r = stats::cor(data), + varnames = names(data), + empirical = empirical + ) + })) %>% + dplyr::select(-data) %>% + tidyr::unnest(newsim) %>% + dplyr::ungroup() + + return(simdat) +} diff --git a/R/simdf_mixed.R b/R/sim_mixed_df.R similarity index 70% rename from R/simdf_mixed.R rename to R/sim_mixed_df.R index 6b084c31..646b59b1 100644 --- a/R/simdf_mixed.R +++ b/R/sim_mixed_df.R @@ -1,36 +1,36 @@ #' Generate a sample with random intercepts for subjects and items #' -#' \code{simdf_mixed} Produces a dataframe with the same distributions of by-subject and by-item random intercepts as an existing dataframe +#' \code{sim_mixed_df()} produces a data table with the same distributions of by-subject and by-item random intercepts as an existing data table. #' -#' @param dat the existing dataframe +#' @param .data the existing tbl #' @param sub_n the number of subjects to simulate #' @param item_n the number of items to simulate #' @param dv the column name or index containing the DV #' @param sub_id the column name or index for the subject IDs #' @param item_id the column name or index for the item IDs #' -#' @return tibble +#' @return a tbl #' @examples -#' \donttest{simdf_mixed(faceratings, 10, 10, "rating", "rater_id", "face_id")} +#' \donttest{sim_mixed_df(faceratings, 10, 10, "rating", "rater_id", "face_id")} #' @export -simdf_mixed <- function(dat, sub_n = 100, item_n = 25, +sim_mixed_df <- function(.data, sub_n = 100, item_n = 25, dv = 1, sub_id = 2, item_id = 3) { # error checking ------------------------------------------------------------- - if (is.matrix(dat)) { - dat = as.data.frame(dat) - } else if (!is.data.frame(dat)) { - stop("dat must be a data frame or matrix") + if (is.matrix(.data)) { + .data = as.data.frame(.data) + } else if (!is.data.frame(.data)) { + stop(".data must be a data frame or matrix") } # get column names if specified by index - if (is.numeric(dv)) dv <- names(dat)[dv] - if (is.numeric(sub_id)) sub_id <- names(dat)[sub_id] - if (is.numeric(item_id)) item_id <- names(dat)[item_id] + if (is.numeric(dv)) dv <- names(.data)[dv] + if (is.numeric(sub_id)) sub_id <- names(.data)[sub_id] + if (is.numeric(item_id)) item_id <- names(.data)[item_id] lmer_formula <- paste0(dv, " ~ 1 + (1 | ", sub_id, ") + (1 | ", item_id, ")") %>% stats::as.formula() - mod <- lme4::lmer(lmer_formula, data = dat) + mod <- lme4::lmer(lmer_formula, data = .data) grand_i <- lme4::fixef(mod) sds <- lme4::VarCorr(mod) %>% as.data.frame() diff --git a/R/simdf.R b/R/simdf.R deleted file mode 100644 index 1b02cc2d..00000000 --- a/R/simdf.R +++ /dev/null @@ -1,42 +0,0 @@ -#' Simulate an existing dataframe -#' -#' \code{simdf} Produces a dataframe with the same distributions and correlations as an existing dataframe. Only returns numeric columns and simulates all numeric variables from a continuous normal distribution (for now). -#' -#' @param dat the existing dataframe -#' @param n the number of samples to return per group -#' @param grp_by an optional list of column names to group by -#' @param empirical logical. Passed on to rnorm_multi -#' -#' @return tibble -#' @examples -#' iris100 <- simdf(iris, 100) -#' iris_species <- simdf(iris, 100, "Species") -#' @export - -simdf <- function (dat, n=100, grp_by=NULL, empirical = FALSE) { - # error checking - if ( !is.numeric(n) || n %% 1 > 0 || n < 3 ) { - stop("n must be an integer > 2") - } - - grpdat <- select_num_grp(dat, grp_by) - - simdat <- grpdat %>% - tidyr::nest() %>% - dplyr::mutate(newsim = purrr::map(data, function(data) { - rnorm_multi( - n = n, - vars = ncol(data), - cor = stats::cor(data), - mu = t(dplyr::summarise_all(data, mean)), - sd = t(dplyr::summarise_all(data, stats::sd)), - varnames = names(data), - empirical = empirical - ) - })) %>% - dplyr::select(-data) %>% - tidyr::unnest(newsim) %>% - dplyr::ungroup() - - return(simdat) -} diff --git a/README.Rmd b/README.Rmd index d90a152f..c2c1f805 100644 --- a/README.Rmd +++ b/README.Rmd @@ -5,192 +5,144 @@ always_allow_html: yes + +[![Lifecycle: experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://www.tidyverse.org/lifecycle/#experimental) + + + ```{r setup, include = FALSE} knitr::opts_chunk$set( + fig.width = 8, + fig.height = 5, collapse = TRUE, - comment = "#>", - fig.path = "man/figures/README-", - out.width = "100%" + comment = "#>" ) + +library(ggplot2) +library(dplyr) +library(tidyr) +library(faux) +theme_set(theme_bw()) +set.seed(200) ``` -# faux + It is useful to be able to simulate data with a specified structure. The `faux` package provides some functions to make this process easier. -## Installation +## sim_design -You can install the released version of faux from -[GitHub](https://github.com/debruine/faux) with: +This function creates a dataset with a specific between- and within-subjects design. [see vignette](articles/sim_design.html) -``` r -devtools::install_github("debruine/faux") -``` +For example, the following creates a 2w*2b design with 100 observations in each cell. The between-subject factor is `pet` with twolevels of `cat` and `dog`. The within-subject factor is `time` with two levels of `day` and `night`. The mean for the `cat_day` cell is 10, the mean for the `cat_night` cell is 20, the mean for the `dog_day` cell is 15, and the mean for the `dog_night` cell is 25. All cells have a SD of 5 and all within-subject cells are correlated r = 0.5. The resulting data has exactly these values (set `empirical = FALSE` to sample from a population with these values). -## Examples -```{r libraries, message=FALSE} -library(tidyverse) -library(faux) +```{r} +between <- list("pet" = c("cat", "dog")) +within <- list("time" = c("day", "night")) +mu <- data.frame( + cat = c(10, 20), + dog = c(15, 25), + row.names = within$time +) +df <- sim_design(within, between, + n = 100, mu = mu, sd = 5, r = .5, + empirical = TRUE) ``` +`r check_sim_stats(df, between = "pet", usekable = TRUE)` +Table: Sample `sim_design()` stats -### rnorm_multi +## rnorm_multi -This function makes multiple normally distributed vectors with specified parameters and relationships. - -For example, the following creates a sample that has 100 observations of 3 variables, drawn from a population where where A correlates with B and C with r = 0.5, and B and C correlate with r = 0.25. A has a mean of 0 and SD of 1, while B and C have means of 20 and SDs of 5. - -```{r set-seed, include=FALSE} -set.seed(200) -``` +This function makes multiple normally distributed vectors with specified parameters and relationships. [see vignette](articles/rnorm_multi.html) +For example, the following creates a sample that has 100 observations of 3 variables, drawn from a population where A has a mean of 0 and SD of 1, while B and C have means of 20 and SDs of 5. A correlates with B and C with r = 0.5, and B and C correlate with r = 0.25. ```{r multirnorm-example} -dat <- rnorm_multi(n = 100, - cors = c(0.5, 0.5, 0.25), - mu = c(0, 20, 20), - sd = c(1, 5, 5), - varnames = c("A", "B", "C"), - empirical = FALSE) +dat <- rnorm_multi( + n = 100, + mu = c(0, 20, 20), + sd = c(1, 5, 5), + r = c(0.5, 0.5, 0.25), + varnames = c("A", "B", "C"), + empirical = FALSE +) ``` `r check_sim_stats(dat, usekable = T)` -Table: Sample stats +Table: Sample `rnorm_multi()` stats -#### Specify `cors` -You can specify the correlations in one of four ways: +## sim_df -* A single r for all pairs -* A vars by vars matrix -* A vars\*vars length vector -* A vars\*(vars-1)/2 length vector +This function produces a dataframe with the same distributions and correlations as an existing dataframe. It only returns numeric columns and simulates all numeric variables from a continuous normal distribution (for now). [see vignette](articles/sim_df.html) -##### One Number - -If you want all the pairs to have the same correlation, just specify a single number. +For example, the following code creates a new sample from the built-in dataset `iris` with 50 observations of each species. ```{r} -bvn <- rnorm_multi(100, 5, .3, varnames = letters[1:5]) +new_iris <- sim_df(iris, 50, "Species") ``` -`r check_sim_stats(bvn, usekable = T)` -Table: Sample stats from a single rho - -##### Matrix - -If you already have a correlation matrix, such as the output of `cor()`, you can specify the simulated data with that. - -```{r vvmatrix} -cmat <- cor(iris[,1:4]) -bvn <- rnorm_multi(100, 4, cmat, - varnames = colnames(cmat)) +```{r plot-iris-sim, echo = FALSE, fig.cap="Simulated iris dataset"} +new_iris %>% + ggplot(aes(Sepal.Width, Sepal.Length, color = Species)) + + geom_point() + + geom_smooth(method = "lm") ``` -`r check_sim_stats(bvn, usekable = T)` -Table: Sample stats from a correlation matrix +## Additional functions -##### Vector (vars\*vars) +### check_sim_stats -You can specify your correlation matrix by hand as a vars\*vars length vector, which will include the correlations of 1 down the diagonal. +If you want to check your simulated stats or just describe an existing dataset, use `check_sim_stats()`. ```{r} -cmat <- c(1, .3, .5, - .3, 1, 0, - .5, 0, 1) -bvn <- rnorm_multi(100, 3, cmat, - varnames = c("first", "second", "third")) +check_sim_stats(iris) ``` -`r check_sim_stats(bvn, usekable = T)` -Table: Sample stats from a vars\*vars vector - -##### Vector (vars\*(vars-1)/2) - -You can specify your correlation matrix by hand as a vars\*(vars-1)/2 length vector, skipping the diagonal and lower left duplicate values. +You can also group your data and change the digits to round. Display the table using `knitr::kable()` by setting `usekable` to `TRUE` (remember to set `results='asis'` in the chunk header. -```{r} -rho1_2 <- .3 -rho1_3 <- .5 -rho1_4 <- .5 -rho2_3 <- .2 -rho2_4 <- 0 -rho3_4 <- -.3 -cmat <- c(rho1_2, rho1_3, rho1_4, rho2_3, rho2_4, rho3_4) -bvn <- rnorm_multi(100, 4, cmat, - varnames = letters[1:4]) +```{r, results='asis'} +check_sim_stats(iris, + between = "Species", + digits = 3, + usekable = TRUE) ``` -`r check_sim_stats(bvn, usekable = T)` -Table: Sample stats from a (vars\*(vars-1)/2) vector - -#### empirical +### make_id -If you want your samples to have the *exact* correlations, means, and SDs you entered, set `empirical` to TRUE. +It is useful for IDs for random effects (e.g., subjects or stimuli) to be character strings (so you don't accidentally include them as fixed effects) with the same length s(o you can sort them in order like S01, S02,..., S10 rather than S1, S10, S2, ...) This function returns a list of IDs that have the same string length and a specified prefix. ```{r} -bvn <- rnorm_multi(100, 5, .3, - varnames = letters[1:5], - empirical = T) +make_id(n = 10, prefix = "ITEM_") ``` -`r check_sim_stats(bvn, usekable = T)` -Table: Sample stats with empirical = TRUE - -### simdf +You can also manually set the number of digits and set `n` to a range of integers. -This function produces a dataframe with the same distributions and correlations as an existing dataframe. It only returns numeric columns and simulates all numeric variables from a continuous normal distribution (for now). - -For example, here is the relationship between speed and distance in the built-in dataset `cars`. - -```{r plot-cars-orig, fig.cap="Original cars dataset"} -cars %>% - ggplot(aes(speed, dist)) + - geom_point() + - geom_smooth(method = "lm") +```{r} +make_id(n = 10:20, digits = 3) ``` -You can create a new sample with the same parameters and 500 rows with the code `simdf(cars, 500)`. -```{r plot-cars-sim, fig.cap="Simulated cars dataset"} -simdf(cars, 500) %>% - ggplot(aes(speed, dist)) + - geom_point() + - geom_smooth(method = "lm") -``` - -#### Grouping Variables +### long2wide -You can also optionally add grouping variables. For example, here is the relationship between sepal length and width in the built-in dataset `iris`. +```{r} +between <- list("pet" = c("cat", "dog")) +within <- list("time" = c("day", "night")) +df_long <- sim_design(within, between, long = TRUE) -```{r plot-iris-orig, fig.cap="Original iris dataset"} -iris %>% - ggplot(aes(Sepal.Width, Sepal.Length, color = Species)) + - geom_point() + - geom_smooth(method = "lm") +df_wide <- long2wide(df_long, + within = "time", + between = "pet", + dv = "val", + id = "sub_id") ``` -And here is a new sample with 50 observations of each species, made with the code `simdf(iris, 100, "Species")`. -```{r plot-iris-sim, fig.cap="Simulated iris dataset"} -simdf(iris, 50, "Species") %>% - ggplot(aes(Sepal.Width, Sepal.Length, color = Species)) + - geom_point() + - geom_smooth(method = "lm") -``` -For now, the function only creates new variables sampled from a continuous normal distribution. I hope to add in other sampling distributions in the future. So you'd need to do any rounding or truncating yourself. - -```{r plot-iris-sim-round, fig.cap="Simulated iris dataset (rounded)"} -simdf(iris, 50, "Species") %>% - mutate_if(is.numeric, round, 1) %>% - ggplot(aes(Sepal.Width, Sepal.Length, color = Species)) + - geom_point() + - geom_smooth(method = "lm") -``` ### pos_def_limits @@ -216,6 +168,8 @@ lims <- pos_def_limits(.8, .2, 0, `r knitr::kable(lims)` +### is_pos_def() + If you have a full matrix and want to know if it is positive definite, you can use the following code: ```{r is_pos_def} @@ -235,50 +189,7 @@ matrix(c(1, .3, -.9, .2, is_pos_def() ``` -### check_sim_stats - -If you want to check your simulated stats or just describe an existing dataset, use `check_sim_stats()`. - -```{r} -check_sim_stats(iris) -``` - -You can also group your data and change the digits to round. Display the table using `knitr::kable()` by setting `usekable` to `TRUE` (remember to set `results='asis'` in the chunk header. - -```{r, results='asis'} -check_sim_stats(iris, - grp_by = "Species", - digits = 3, - usekable = TRUE) -``` - - -### sim_design -Simulate data by specifying a design structure. -**This function is under development and should be carefully checked!** -```{r} -between <- list("pet" = c("cat", "dog")) -within <- list("time" = c("day", "night")) -mu <- data.frame( - cat = c(10, 20), - dog = c(15, 25), - row.names = within$time -) -df <- sim_design(within, between, n = 100, cors = 0.5, mu = mu, sd = 5) -``` - -```{r echo = FALSE, result='asis'} -check_sim_stats(df, grp_by = "pet", usekable = TRUE) -``` -```{r, echo = FALSE, fig.width = 8, fig.height = 4} -gather(df, time, val, day:night) %>% - ggplot(aes(pet, val, fill = pet)) + - geom_violin(show.legend = FALSE) + - geom_boxplot(width = .2, fill = "white", show.legend = FALSE) + - facet_grid(~time) + - scale_fill_manual(values = c("dodgerblue", "red")) -``` \ No newline at end of file diff --git a/README.md b/README.md index bba184d3..f068d489 100644 --- a/README.md +++ b/README.md @@ -1,227 +1,149 @@ -faux -==== + +[![Lifecycle: experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://www.tidyverse.org/lifecycle/#experimental) It is useful to be able to simulate data with a specified structure. The `faux` package provides some functions to make this process easier. -Installation ------------- - -You can install the released version of faux from [GitHub](https://github.com/debruine/faux) with: +sim\_design +----------- -``` r -devtools::install_github("debruine/faux") -``` +This function creates a dataset with a specific between- and within-subjects design. [see vignette](articles/sim_design.html) -Examples --------- +For example, the following creates a 2w\*2b design with 100 observations in each cell. The between-subject factor is `pet` with twolevels of `cat` and `dog`. The within-subject factor is `time` with two levels of `day` and `night`. The mean for the `cat_day` cell is 10, the mean for the `cat_night` cell is 20, the mean for the `dog_day` cell is 15, and the mean for the `dog_night` cell is 25. All cells have a SD of 5 and all within-subject cells are correlated r = 0.5. The resulting data has exactly these values (set `empirical = FALSE` to sample from a population with these values). ``` r -library(tidyverse) -library(faux) +between <- list("pet" = c("cat", "dog")) +within <- list("time" = c("day", "night")) +mu <- data.frame( + cat = c(10, 20), + dog = c(15, 25), + row.names = within$time +) +df <- sim_design(within, between, + n = 100, mu = mu, sd = 5, r = .5, + empirical = TRUE) ``` -### rnorm\_multi +| pet | n| var | day| night| mean| sd| +|:----|----:|:------|----:|------:|-----:|----:| +| cat | 100| day | 1.0| 0.5| 10| 5| +| cat | 100| night | 0.5| 1.0| 20| 5| +| dog | 100| day | 1.0| 0.5| 15| 5| +| dog | 100| night | 0.5| 1.0| 25| 5| + +rnorm\_multi +------------ -This function makes multiple normally distributed vectors with specified parameters and relationships. +This function makes multiple normally distributed vectors with specified parameters and relationships. [see vignette](articles/rnorm_multi.html) -For example, the following creates a sample that has 100 observations of 3 variables, drawn from a population where where A correlates with B and C with r = 0.5, and B and C correlate with r = 0.25. A has a mean of 0 and SD of 1, while B and C have means of 20 and SDs of 5. +For example, the following creates a sample that has 100 observations of 3 variables, drawn from a population where A has a mean of 0 and SD of 1, while B and C have means of 20 and SDs of 5. A correlates with B and C with r = 0.5, and B and C correlate with r = 0.25. ``` r -dat <- rnorm_multi(n = 100, - cors = c(0.5, 0.5, 0.25), - mu = c(0, 20, 20), - sd = c(1, 5, 5), - varnames = c("A", "B", "C"), - empirical = FALSE) +dat <- rnorm_multi( + n = 100, + mu = c(0, 20, 20), + sd = c(1, 5, 5), + r = c(0.5, 0.5, 0.25), + varnames = c("A", "B", "C"), + empirical = FALSE +) ``` | n| var | A| B| C| mean| sd| |----:|:----|-----:|-----:|-----:|------:|-----:| -| 100| A | 1.00| 0.45| 0.49| 0.03| 0.99| -| 100| B | 0.45| 1.00| 0.33| 20.01| 4.89| -| 100| C | 0.49| 0.33| 1.00| 19.76| 4.02| - -#### Specify `cors` - -You can specify the correlations in one of four ways: +| 100| A | 1.00| 0.62| 0.46| -0.05| 1.08| +| 100| B | 0.62| 1.00| 0.19| 19.95| 5.38| +| 100| C | 0.46| 0.19| 1.00| 19.81| 5.15| -- A single r for all pairs -- A vars by vars matrix -- A vars\*vars length vector -- A vars\*(vars-1)/2 length vector +sim\_df +------- -##### One Number +This function produces a dataframe with the same distributions and correlations as an existing dataframe. It only returns numeric columns and simulates all numeric variables from a continuous normal distribution (for now). [see vignette](articles/sim_df.html) -If you want all the pairs to have the same correlation, just specify a single number. +For example, the following code creates a new sample from the built-in dataset `iris` with 50 observations of each species. ``` r -bvn <- rnorm_multi(100, 5, .3, varnames = letters[1:5]) +new_iris <- sim_df(iris, 50, "Species") ``` -| n| var | a| b| c| d| e| mean| sd| -|----:|:----|-----:|-----:|-----:|-----:|-----:|------:|-----:| -| 100| a | 1.00| 0.35| 0.22| 0.45| 0.37| -0.04| 1.09| -| 100| b | 0.35| 1.00| 0.19| 0.36| 0.28| -0.05| 0.83| -| 100| c | 0.22| 0.19| 1.00| 0.26| 0.20| 0.01| 1.08| -| 100| d | 0.45| 0.36| 0.26| 1.00| 0.24| 0.00| 1.00| -| 100| e | 0.37| 0.28| 0.20| 0.24| 1.00| 0.04| 0.97| +![Simulated iris dataset](README_files/figure-markdown_github/plot-iris-sim-1.png) -##### Matrix +Additional functions +-------------------- -If you already have a correlation matrix, such as the output of `cor()`, you can specify the simulated data with that. - -``` r -cmat <- cor(iris[,1:4]) -bvn <- rnorm_multi(100, 4, cmat, - varnames = colnames(cmat)) -``` - -| n| var | Sepal.Length| Sepal.Width| Petal.Length| Petal.Width| mean| sd| -|----:|:-------------|-------------:|------------:|-------------:|------------:|------:|-----:| -| 100| Sepal.Length | 1.00| -0.10| 0.88| 0.83| -0.01| 1.05| -| 100| Sepal.Width | -0.10| 1.00| -0.38| -0.29| -0.19| 1.09| -| 100| Petal.Length | 0.88| -0.38| 1.00| 0.96| -0.01| 1.02| -| 100| Petal.Width | 0.83| -0.29| 0.96| 1.00| -0.05| 0.98| - -##### Vector (vars\*vars) - -You can specify your correlation matrix by hand as a vars\*vars length vector, which will include the correlations of 1 down the diagonal. - -``` r -cmat <- c(1, .3, .5, - .3, 1, 0, - .5, 0, 1) -bvn <- rnorm_multi(100, 3, cmat, - varnames = c("first", "second", "third")) -``` - -| n| var | first| second| third| mean| sd| -|----:|:-------|------:|-------:|------:|------:|-----:| -| 100| first | 1.00| 0.33| 0.45| -0.12| 1.01| -| 100| second | 0.33| 1.00| -0.04| -0.01| 1.04| -| 100| third | 0.45| -0.04| 1.00| -0.11| 1.00| - -##### Vector (vars\*(vars-1)/2) +### check\_sim\_stats -You can specify your correlation matrix by hand as a vars\*(vars-1)/2 length vector, skipping the diagonal and lower left duplicate values. +If you want to check your simulated stats or just describe an existing dataset, use `check_sim_stats()`. ``` r -rho1_2 <- .3 -rho1_3 <- .5 -rho1_4 <- .5 -rho2_3 <- .2 -rho2_4 <- 0 -rho3_4 <- -.3 -cmat <- c(rho1_2, rho1_3, rho1_4, rho2_3, rho2_4, rho3_4) -bvn <- rnorm_multi(100, 4, cmat, - varnames = letters[1:4]) +check_sim_stats(iris) +#> # A tibble: 4 x 8 +#> n var Sepal.Length Sepal.Width Petal.Length Petal.Width mean sd +#> +#> 1 150 Sepa… 1 -0.12 0.87 0.82 5.84 0.83 +#> 2 150 Sepa… -0.12 1 -0.43 -0.37 3.06 0.44 +#> 3 150 Peta… 0.87 -0.43 1 0.96 3.76 1.77 +#> 4 150 Peta… 0.82 -0.37 0.96 1 1.2 0.76 ``` -| n| var | a| b| c| d| mean| sd| -|----:|:----|-----:|-----:|------:|------:|------:|-----:| -| 100| a | 1.00| 0.35| 0.55| 0.50| -0.13| 1.01| -| 100| b | 0.35| 1.00| 0.16| 0.09| -0.10| 1.05| -| 100| c | 0.55| 0.16| 1.00| -0.21| -0.19| 0.91| -| 100| d | 0.50| 0.09| -0.21| 1.00| 0.12| 0.97| - -#### empirical - -If you want your samples to have the *exact* correlations, means, and SDs you entered, set `empirical` to TRUE. +You can also group your data and change the digits to round. Display the table using `knitr::kable()` by setting `usekable` to `TRUE` (remember to set `results='asis'` in the chunk header. ``` r -bvn <- rnorm_multi(100, 5, .3, - varnames = letters[1:5], - empirical = T) +check_sim_stats(iris, + between = "Species", + digits = 3, + usekable = TRUE) ``` -| n| var | a| b| c| d| e| mean| sd| -|----:|:----|----:|----:|----:|----:|----:|-----:|----:| -| 100| a | 1.0| 0.3| 0.3| 0.3| 0.3| 0| 1| -| 100| b | 0.3| 1.0| 0.3| 0.3| 0.3| 0| 1| -| 100| c | 0.3| 0.3| 1.0| 0.3| 0.3| 0| 1| -| 100| d | 0.3| 0.3| 0.3| 1.0| 0.3| 0| 1| -| 100| e | 0.3| 0.3| 0.3| 0.3| 1.0| 0| 1| - -### simdf +| Species | n| var | Sepal.Length| Sepal.Width| Petal.Length| Petal.Width| mean| sd| +|:-----------|----:|:-------------|-------------:|------------:|-------------:|------------:|------:|------:| +| setosa | 50| Sepal.Length | 1.000| 0.743| 0.267| 0.278| 5.006| 0.352| +| setosa | 50| Sepal.Width | 0.743| 1.000| 0.178| 0.233| 3.428| 0.379| +| setosa | 50| Petal.Length | 0.267| 0.178| 1.000| 0.332| 1.462| 0.174| +| setosa | 50| Petal.Width | 0.278| 0.233| 0.332| 1.000| 0.246| 0.105| +| versicolor | 50| Sepal.Length | 1.000| 0.526| 0.754| 0.546| 5.936| 0.516| +| versicolor | 50| Sepal.Width | 0.526| 1.000| 0.561| 0.664| 2.770| 0.314| +| versicolor | 50| Petal.Length | 0.754| 0.561| 1.000| 0.787| 4.260| 0.470| +| versicolor | 50| Petal.Width | 0.546| 0.664| 0.787| 1.000| 1.326| 0.198| +| virginica | 50| Sepal.Length | 1.000| 0.457| 0.864| 0.281| 6.588| 0.636| +| virginica | 50| Sepal.Width | 0.457| 1.000| 0.401| 0.538| 2.974| 0.322| +| virginica | 50| Petal.Length | 0.864| 0.401| 1.000| 0.322| 5.552| 0.552| +| virginica | 50| Petal.Width | 0.281| 0.538| 0.322| 1.000| 2.026| 0.275| -This function produces a dataframe with the same distributions and correlations as an existing dataframe. It only returns numeric columns and simulates all numeric variables from a continuous normal distribution (for now). +### make\_id -For example, here is the relationship between speed and distance in the built-in dataset `cars`. +It is useful for IDs for random effects (e.g., subjects or stimuli) to be character strings (so you don't accidentally include them as fixed effects) with the same length s(o you can sort them in order like S01, S02,..., S10 rather than S1, S10, S2, ...) This function returns a list of IDs that have the same string length and a specified prefix. ``` r -cars %>% - ggplot(aes(speed, dist)) + - geom_point() + - geom_smooth(method = "lm") +make_id(n = 10, prefix = "ITEM_") +#> [1] "ITEM_01" "ITEM_02" "ITEM_03" "ITEM_04" "ITEM_05" "ITEM_06" "ITEM_07" +#> [8] "ITEM_08" "ITEM_09" "ITEM_10" ``` -Original cars dataset -

-Original cars dataset -

- -You can create a new sample with the same parameters and 500 rows with the code `simdf(cars, 500)`. +You can also manually set the number of digits and set `n` to a range of integers. ``` r -simdf(cars, 500) %>% - ggplot(aes(speed, dist)) + - geom_point() + - geom_smooth(method = "lm") +make_id(n = 10:20, digits = 3) +#> [1] "S010" "S011" "S012" "S013" "S014" "S015" "S016" "S017" "S018" "S019" +#> [11] "S020" ``` -Simulated cars dataset -

-Simulated cars dataset -

- -#### Grouping Variables - -You can also optionally add grouping variables. For example, here is the relationship between sepal length and width in the built-in dataset `iris`. +### long2wide ``` r -iris %>% - ggplot(aes(Sepal.Width, Sepal.Length, color = Species)) + - geom_point() + - geom_smooth(method = "lm") -``` - -Original iris dataset -

-Original iris dataset -

- -And here is a new sample with 50 observations of each species, made with the code `simdf(iris, 100, "Species")`. - -``` r -simdf(iris, 50, "Species") %>% - ggplot(aes(Sepal.Width, Sepal.Length, color = Species)) + - geom_point() + - geom_smooth(method = "lm") -``` - -Simulated iris dataset -

-Simulated iris dataset -

- -For now, the function only creates new variables sampled from a continuous normal distribution. I hope to add in other sampling distributions in the future. So you'd need to do any rounding or truncating yourself. +between <- list("pet" = c("cat", "dog")) +within <- list("time" = c("day", "night")) +df_long <- sim_design(within, between, long = TRUE) -``` r -simdf(iris, 50, "Species") %>% - mutate_if(is.numeric, round, 1) %>% - ggplot(aes(Sepal.Width, Sepal.Length, color = Species)) + - geom_point() + - geom_smooth(method = "lm") +df_wide <- long2wide(df_long, + within = "time", + between = "pet", + dv = "val", + id = "sub_id") ``` -Simulated iris dataset (rounded) -

-Simulated iris dataset (rounded) -

- ### pos\_def\_limits Not all correlation matrices are possible. For example, if variables A and B are correlated with r = 1.0, then the correlation between A and C can only be exactly equal to the correlation between B and C. @@ -250,6 +172,8 @@ lims <- pos_def_limits(.8, .2, 0, |:----|:----| | NA | NA | +### is\_pos\_def() + If you have a full matrix and want to know if it is positive definite, you can use the following code: ``` r @@ -270,68 +194,3 @@ matrix(c(1, .3, -.9, .2, is_pos_def() #> [1] FALSE ``` - -### check\_sim\_stats - -If you want to check your simulated stats or just describe an existing dataset, use `check_sim_stats()`. - -``` r -check_sim_stats(iris) -#> # A tibble: 4 x 8 -#> n var Sepal.Length Sepal.Width Petal.Length Petal.Width mean sd -#> -#> 1 150 Sepa… 1 -0.12 0.87 0.82 5.84 0.83 -#> 2 150 Sepa… -0.12 1 -0.43 -0.37 3.06 0.44 -#> 3 150 Peta… 0.87 -0.43 1 0.96 3.76 1.77 -#> 4 150 Peta… 0.82 -0.37 0.96 1 1.2 0.76 -``` - -You can also group your data and change the digits to round. Display the table using `knitr::kable()` by setting `usekable` to `TRUE` (remember to set `results='asis'` in the chunk header. - -``` r -check_sim_stats(iris, - grp_by = "Species", - digits = 3, - usekable = TRUE) -``` - -| Species | n| var | Sepal.Length| Sepal.Width| Petal.Length| Petal.Width| mean| sd| -|:-----------|----:|:-------------|-------------:|------------:|-------------:|------------:|------:|------:| -| setosa | 50| Sepal.Length | 1.000| 0.743| 0.267| 0.278| 5.006| 0.352| -| setosa | 50| Sepal.Width | 0.743| 1.000| 0.178| 0.233| 3.428| 0.379| -| setosa | 50| Petal.Length | 0.267| 0.178| 1.000| 0.332| 1.462| 0.174| -| setosa | 50| Petal.Width | 0.278| 0.233| 0.332| 1.000| 0.246| 0.105| -| versicolor | 50| Sepal.Length | 1.000| 0.526| 0.754| 0.546| 5.936| 0.516| -| versicolor | 50| Sepal.Width | 0.526| 1.000| 0.561| 0.664| 2.770| 0.314| -| versicolor | 50| Petal.Length | 0.754| 0.561| 1.000| 0.787| 4.260| 0.470| -| versicolor | 50| Petal.Width | 0.546| 0.664| 0.787| 1.000| 1.326| 0.198| -| virginica | 50| Sepal.Length | 1.000| 0.457| 0.864| 0.281| 6.588| 0.636| -| virginica | 50| Sepal.Width | 0.457| 1.000| 0.401| 0.538| 2.974| 0.322| -| virginica | 50| Petal.Length | 0.864| 0.401| 1.000| 0.322| 5.552| 0.552| -| virginica | 50| Petal.Width | 0.281| 0.538| 0.322| 1.000| 2.026| 0.275| - -### sim\_design - -Simulate data by specifying a design structure. - -**This function is under development and should be carefully checked!** - -``` r -between <- list("pet" = c("cat", "dog")) -within <- list("time" = c("day", "night")) -mu <- data.frame( - cat = c(10, 20), - dog = c(15, 25), - row.names = within$time -) -df <- sim_design(within, between, n = 100, cors = 0.5, mu = mu, sd = 5) -``` - -| pet | n| var | day| night| mean| sd| -|:----|----:|:------|-----:|------:|------:|-----:| -| cat | 100| day | 1.00| 0.55| 10.39| 4.69| -| cat | 100| night | 0.55| 1.00| 19.78| 4.67| -| dog | 100| day | 1.00| 0.46| 15.02| 4.18| -| dog | 100| night | 0.46| 1.00| 24.79| 4.74| - - diff --git a/README_files/figure-markdown_github/plot-iris-sim-1.png b/README_files/figure-markdown_github/plot-iris-sim-1.png new file mode 100644 index 00000000..819df6be Binary files /dev/null and b/README_files/figure-markdown_github/plot-iris-sim-1.png differ diff --git a/data-raw/speed_tests.R b/data-raw/speed_tests.R index 4dd0fd85..cfc6dcd5 100644 --- a/data-raw/speed_tests.R +++ b/data-raw/speed_tests.R @@ -1,35 +1,60 @@ +library(faux) +library(tidyverse) + + # speed test ---- -library(tidyverse) + within <- list( - "A" = c("A1", "A2") + "W" = c("W1", "W2"), + "X" = c("X1", "X2") ) -between <- list( - "B" = c("B1", "B2") -) +between <- list() + +mu <- c(W1_X1 = 10, W1_X2 = 12, W2_X1 = 10, W2_X2 = 10) -mu <- list( - "B1" = c(10, 10), - "B2" = c(10, 10) -) -func <- function(i) { - utils::setTxtProgressBar(pb, i) - df <- sim_design(within, between, n = 20, mu = mu, frame_long = TRUE) - suppressMessages( - anova <- afex::aov_4(val~B*(A|sub_id), data = df, check_contrasts = TRUE) - ) - anova$anova_table %>% - tibble::as_tibble(rownames = "factor") +anova_func <- function(i, v = "afex") { + #utils::setTxtProgressBar(pb, i) + df <- sim_design(within, between, n = 20, mu = mu, sd = 4, frame_long = TRUE) + + if (v == "afex") { + afex::aov_4(val~(X*W|sub_id), data = df, return = "aov") %>% + broom::tidy() + } else if (v == "aov") { + aov(val~(X*W)+Error(sub_id/(X*W)), data = df, contrasts = NULL) %>% + broom::tidy() + } } -reps <- 2 -pb <- txtProgressBar(max = reps) -timestamp() -sims <- purrr::map_df(1:reps, func) -timestamp() -close(pb) +anova_func2 <- function(i) { + df <- sim_design(within, between, n = 20, mu = mu, sd = 4, frame_long = TRUE) + + aov(val~(X*W)+Error(sub_id/(X*W)), data = df, contrasts = NULL) %>% + broom::tidy() +} + +reps <- 10000 +#pb <- utils::txtProgressBar(max = reps) +system.time( + sims_afex <- purrr::map_df(1:reps, anova_func, v = "afex") +) +system.time( + sims_aov <- purrr::map_df(1:reps, anova_func, v = "aov") +) +#close(pb) + +sims_afex %>% + filter(term != "Residuals") %>% + group_by(term) %>% + summarise(power = mean(p.value < .05)) + +sims_aov %>% + filter(term != "Residuals") %>% + group_by(term) %>% + summarise(power = mean(p.value < .05)) + alpha <- 0.05 @@ -48,3 +73,7 @@ sims %>% # df <- purrr::map(1:1e4, ~sim_design(within, between, n = 20)) ##------ Sun Apr 28 20:44:48 2019 ------## +##------ Mon Apr 29 16:59:53 2019 ------## +# > sims <- purrr::map_df(1:1e4, anova_func) +##------ Mon Apr 29 17:02:51 2019 ------## + diff --git a/docs/LICENSE-text.html b/docs/LICENSE-text.html index 8319144c..e30bd682 100644 --- a/docs/LICENSE-text.html +++ b/docs/LICENSE-text.html @@ -60,7 +60,7 @@ faux - 0.0.0.9005 + 0.0.0.9006 @@ -82,9 +82,6 @@ diff --git a/docs/articles/index.html b/docs/articles/index.html index 1b2dde89..edd0368b 100644 --- a/docs/articles/index.html +++ b/docs/articles/index.html @@ -60,7 +60,7 @@ faux - 0.0.0.9005 + 0.0.0.9006 @@ -82,9 +82,6 @@ @@ -123,10 +120,9 @@

All vignettes

diff --git a/docs/articles/intro.html b/docs/articles/intro.html index 4dbbf0d0..5a4af1c5 100644 --- a/docs/articles/intro.html +++ b/docs/articles/intro.html @@ -62,7 +62,7 @@ Simulate by Design
  • - Simulate from Existing Data + Simulate from Existing Data
  • @@ -85,7 +85,7 @@

    Introduction to faux

    Lisa DeBruine

    -

    2019-04-29

    +

    2019-05-01

    @@ -217,12 +217,12 @@

    -
    +

    -simdf

    -

    This function produces a dataframe with the same distributions and correlations as an existing dataframe. It only returns numeric columns and simulates all numeric variables from a continuous normal distribution (for now). see vignette

    +sim_df

    +

    This function produces a dataframe with the same distributions and correlations as an existing dataframe. It only returns numeric columns and simulates all numeric variables from a continuous normal distribution (for now). see vignette

    For example, the following code creates a new sample from the built-in dataset iris with 50 observations of each species.

    -
    new_iris <- simdf(iris, 50, "Species") 
    +
    new_iris <- sim_df(iris, 50, "Species") 
    Simulated iris dataset

    Simulated iris dataset @@ -246,7 +246,7 @@

    #> 4 150 Peta… 0.82 -0.37 0.96 1 1.2 0.76

    You can also group your data and change the digits to round. Display the table using knitr::kable() by setting usekable to TRUE (remember to set results='asis' in the chunk header.

    check_sim_stats(iris, 
    -                grp_by = "Species", 
    +                between = "Species", 
                     digits = 3, 
                     usekable = TRUE)
    @@ -397,6 +397,14 @@

    +
    +

    +make_id

    +

    It is useful for IDs for random effects (e.g., subjects or stimuli) to be character strings (so you don’t accidentally include them as fixed effects) with the same length s(o you can sort them in order like S01, S02,…, S10 rather than S1, S10, S2, …) This function returns a list of IDs that have the same string length and a specified prefix.

    +
    make_id(n = 10, prefix = "ITEM_")
    +#>  [1] "ITEM_01" "ITEM_02" "ITEM_03" "ITEM_04" "ITEM_05" "ITEM_06" "ITEM_07"
    +#>  [8] "ITEM_08" "ITEM_09" "ITEM_10"
    +

    pos_def_limits

    @@ -428,6 +436,10 @@

    NA +

    +
    +

    +is_pos_def()

    If you have a full matrix and want to know if it is positive definite, you can use the following code:

    c(.2, .3, .4, .2,
           .3, -.1, .2,
    @@ -453,7 +465,7 @@ 

    diff --git a/docs/articles/rnorm_multi.html b/docs/articles/rnorm_multi.html index 7e30208c..ee3f4a82 100644 --- a/docs/articles/rnorm_multi.html +++ b/docs/articles/rnorm_multi.html @@ -30,7 +30,7 @@ faux - 0.0.0.9005 + 0.0.0.9006
    @@ -53,16 +53,13 @@ @@ -85,7 +82,7 @@

    Simulate Correlated Variables

    Lisa DeBruine

    -

    2019-04-29

    +

    2019-05-03

    @@ -98,12 +95,12 @@

    2019-04-29

    Quick example

    -

    For example, the following creates a sample that has 100 observations of 3 variables, drawn from a population where where A correlates with B and C with r = 0.5, and B and C correlate with r = 0.25. A has a mean of 0 and SD of 1, while B and C have means of 20 and SDs of 5.

    +

    For example, the following creates a sample that has 100 observations of 3 variables, drawn from a population where A has a mean of 0 and SD of 1, while B and C have means of 20 and SDs of 5. A correlates with B and C with r = 0.5, and B and C correlate with r = 0.25.

    
     dat <- rnorm_multi(n = 100, 
    -                  cors = c(0.5, 0.5, 0.25), 
                       mu = c(0, 20, 20),
                       sd = c(1, 5, 5),
    +                  r = c(0.5, 0.5, 0.25), 
                       varnames = c("A", "B", "C"),
                       empirical = FALSE)
    @@ -147,10 +144,9 @@

    -
    +

    -Specify cors -

    +Specify correlations

    You can specify the correlations in one of four ways:

    • A single r for all pairs
    • @@ -162,7 +158,7 @@

      One Number

      If you want all the pairs to have the same correlation, just specify a single number.

      -
      bvn <- rnorm_multi(100, 5, .3, varnames = letters[1:5])
      +
      bvn <- rnorm_multi(100, 5, 0, 1, .3, varnames = letters[1:5])
      @@ -240,7 +236,7 @@

      Matrix

      If you already have a correlation matrix, such as the output of cor(), you can specify the simulated data with that.

      cmat <- cor(iris[,1:4])
      -bvn <- rnorm_multi(100, 4, cmat, 
      +bvn <- rnorm_multi(100, 4, 0, 1, cmat, 
                         varnames = colnames(cmat))
      Sample stats from a single rho
      @@ -305,7 +301,7 @@

      cmat <- c(1, .3, .5,
                 .3, 1, 0,
                 .5, 0, 1)
      -bvn <- rnorm_multi(100, 3, cmat, 
      +bvn <- rnorm_multi(100, 3, 0, 1, cmat, 
                         varnames = c("first", "second", "third"))

      Sample stats from a correlation matrix
      @@ -360,7 +356,7 @@

      rho2_4 <- 0 rho3_4 <- -.3 cmat <- c(rho1_2, rho1_3, rho1_4, rho2_3, rho2_4, rho3_4) -bvn <- rnorm_multi(100, 4, cmat, +bvn <- rnorm_multi(100, 4, 0, 1, cmat, varnames = letters[1:4])

      Sample stats from a vars*vars vector
      @@ -423,7 +419,7 @@

      empirical

      If you want your samples to have the exact correlations, means, and SDs you entered, set empirical to TRUE.

      -
      bvn <- rnorm_multi(100, 5, .3, 
      +
      bvn <- rnorm_multi(100, 5, 0, 1, .3, 
                         varnames = letters[1:5], 
                         empirical = T)
      Sample stats from a (vars*(vars-1)/2) vector
      @@ -498,6 +494,43 @@

    +
    +
    +

    +Pre-existing variable

    +

    Us rnorm_pre() to create a vector with a specified correlation to a pre-existing variable. The following code creates a vector called sl.5 with a mean of 10, SD of 2 and a correaltion of r = 0.5 to the Sepal.Length column in the built-in dataset iris. This function only creates a vector with the exact parameters specified.

    +
    sl <- iris$Sepal.Length
    +
    +sl.5.v1 <- rnorm_pre(sl, mu = 10, sd = 2, r = 0.5)
    +sl.5.v2 <- rnorm_pre(sl, mu = 10, sd = 2, r = 0.5)
    + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    slsl.5.v1sl.5.v2
    sl1.00.50000000.5000000
    sl.5.v10.51.00000000.1765035
    sl.5.v20.50.17650351.0000000
    @@ -507,6 +540,7 @@

    Contents

    diff --git a/docs/articles/sim_design.html b/docs/articles/sim_design.html index 2f1a48f0..fe2d0404 100644 --- a/docs/articles/sim_design.html +++ b/docs/articles/sim_design.html @@ -30,7 +30,7 @@ faux - 0.0.0.9005 + 0.0.0.9006 @@ -53,16 +53,13 @@ @@ -85,7 +82,7 @@

    Simulate by Design

    Lisa DeBruine

    -

    2019-04-29

    +

    2019-05-03

    @@ -106,7 +103,7 @@

    dog = c(15, 25), row.names = within$time ) -df <- sim_design(within, between, n = 100, cors = 0.5, mu = mu, sd = 5) +df <- sim_design(within, between, n = 100, mu = mu, sd = 5, r = 0.5, empirical = TRUE) @@ -122,37 +119,37 @@

    - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + +
    petcat 100 day1.000.5610.234.121.00.5105
    cat 100 night0.561.0020.024.880.51.0205
    dog 100 day1.000.5515.275.001.00.5155
    dog 100 night0.551.0025.315.360.51.0255
    @@ -164,26 +161,142 @@

    Factor and level names

    -

    First, list your between-subject and within-subject factors. You can specify them like this:

    +

    If you don’t feel like naming your factors and levels, you can just put in a vector of levels. So you can make a quick 2w*3w*2b with the following code.

    +
    df <- sim_design(within = c(2,3), between = c(2), n = 5, mu = 0, sd = 1, r = 0.5)
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    sub_idCA1_B1A2_B1A1_B2A2_B2A1_B3A2_B3
    S01C1-2.7038494-2.0031140-1.24636190.2395364-0.3551418-0.6613917
    S02C1-0.2574224-2.5189879-0.0945284-1.0963883-0.9231602-0.8312929
    S03C10.93269600.57413980.94616071.51748110.13368172.2414371
    S04C1-0.2846845-0.0003843-0.42058920.14290450.7309652-0.4047417
    S05C10.02619540.5607634-1.2736072-1.01079321.33379950.2854269
    S06C2-0.9116783-1.0939985-1.0655517-0.9173210-1.4847558-2.5493564
    S07C2-0.06900600.47418050.3974194-0.18882210.09631501.4528341
    S08C2-0.45911010.8723573-0.2708937-0.62128911.34803800.9535270
    S09C20.1287775-0.3451130-0.5767718-1.8759247-0.8973256-1.1960685
    S10C21.06233201.0175079-0.16751000.56205790.75133210.7814270
    +

    You can specify between-subject and within-subject factors as a list of vectors where the item names are the factor labels and the vectors are the level labels.

    between <- list(
       "pet" = c("cat", "dog")
     )
     within <- list(
       "time" = c("day", "night")
     )
    -

    Or like this:

    +

    You can also specify factors as a list of named vectors where the item names are the factor labels, the vector names are the level labels, and the vector items are the long labels you would use in a codebook or plot.

    between <- list(
       pet = c(cat = "Is a cat person", dog = "Is a dog person")
     )
     within <- list(
       time = c(day = "Tested during the day", night = "Tested at night")
     )
    -

    The long name for factor levels isn’t used yet, but will be used in future functions that create a codebook or plots.

    +

    The long label for factor levels isn’t used yet, but will be used in future functions that create a codebook or plots.

    N, mean, SD

    -

    You can specify the Ns, means and standard deviations for each cell as a single number, vector, list, or data frame.

    +

    You can specify the Ns, means and standard deviations for each cell as a single number, list of vectors, or data frame.

    You usually want to specify n as a single number. This is N per cell, not total sample size.

    n <- 100 # n per cell, not total

    The levels of the between-subject factors are the list names and the levels of the within-subject factors are the vector names.

    @@ -221,13 +334,13 @@

    Correlations

    If you have any within-subject factors, you need to set the correlation for each between-cell. Here, we only have two levels of one within-subject factor, so can only set one correlation per between-cell.

    -
    cors <- list(
    +
    r <- list(
       cat = .5,
       dog = .6
     )

    If you set empirical = TRUE, you will get the exact means, SDs and correlations you specified. If you set empirical = FALSE or omit that argument, your data will be sampled from a population with those parameters, but your dataset will not have exactly those values (just on average).

    df <- sim_design(within, between, n = 100, 
    -                 cors = cors, mu = mu, sd = sd,
    +                 mu = mu, sd = sd, r = r,
                      empirical = TRUE)
    @@ -304,32 +417,34 @@

    expert_night = c(10, 15, 12, 17, 14, 19) )

    You can set the correlation for each between-cell to a single number.

    -
    cors <- list(
    +
    r <- list(
       novice_day = 0.3,
       novice_night = 0.2,
       expert_day = 0.5,
       expert_night = 0.4
     )
    -

    Or you can set the full correlation matrix with a vector or matrix (see section Specify cors). Since we have 6 within-cells, this is a 6x6 matrix or a vector of the upper right 15 values.

    +

    Or you can set the full correlation matrix with a vector or matrix. Since we have 6 within-cells, this is a 6x6 matrix or a vector of the upper right 15 values.

    # upper right triangle correlation specification
     # inc and con have r = 0.5 within each difficultly level, 0.2 otherwise
    -#   ce,  ie,  cm,  im,  ch,  ih
    -r <-  c(0.5, 0.2, 0.2, 0.2, 0.2, #con_easy
    -             0.2, 0.2, 0.2, 0.2, #inc_easy
    -                  0.5, 0.2, 0.2, #con_med
    -                       0.2, 0.2, #inc_med
    -                            0.5) #con_hard
    -                                 #inc_hard
    +#          ce,  ie,  cm,  im,  ch,  ih
    +triangle <-  c(0.5, 0.2, 0.2, 0.2, 0.2, #con_easy
    +                    0.2, 0.2, 0.2, 0.2, #inc_easy
    +                         0.5, 0.2, 0.2, #con_med
    +                              0.2, 0.2, #inc_med
    +                                   0.5) #con_hard
    +                                        #inc_hard
     
    -cors <- list(
    -  novice_day = r,
    -  novice_night = r,
    -  expert_day = r,
    -  expert_night = r
    +r <- list(
    +  novice_day = triangle,
    +  novice_night = triangle,
    +  expert_day = triangle,
    +  expert_night = triangle
     )
    -

    You can set frame_long = TRUE to return the data frame in long format, which is usually easier for plotting.

    -
    df <- sim_design(within, between, n = 100, cors = cors, mu = mu, sd = 2, frame_long = TRUE)
    -

    +

    You can set long = TRUE to return the data frame in long format, which is usually easier for plotting.

    +
    df <- sim_design(within, between, n = 100, 
    +                 mu = mu, sd = 2, r = r, 
    +                 long = TRUE)
    +

    diff --git a/docs/articles/sim_design_files/figure-html/unnamed-chunk-17-1.png b/docs/articles/sim_design_files/figure-html/unnamed-chunk-17-1.png new file mode 100644 index 00000000..0082dcfe Binary files /dev/null and b/docs/articles/sim_design_files/figure-html/unnamed-chunk-17-1.png differ diff --git a/docs/articles/sim_design_files/figure-html/unnamed-chunk-3-1.png b/docs/articles/sim_design_files/figure-html/unnamed-chunk-3-1.png index 24ce9dbd..e4bb4a24 100644 Binary files a/docs/articles/sim_design_files/figure-html/unnamed-chunk-3-1.png and b/docs/articles/sim_design_files/figure-html/unnamed-chunk-3-1.png differ diff --git a/docs/articles/sim_df.html b/docs/articles/sim_df.html new file mode 100644 index 00000000..b970f643 --- /dev/null +++ b/docs/articles/sim_df.html @@ -0,0 +1,190 @@ + + + + + + + +Simulate from Existing Data • faux + + + + + + + + + +
    +
    + + + +
    +
    + + + + +

    The sim_df() function produces a dataframe with the same distributions and correlations as an existing dataframe. It only returns numeric columns and simulates all numeric variables from a continuous normal distribution (for now).

    +

    For example, here is the relationship between speed and distance in the built-in dataset cars.

    +
    cars %>%
    +  ggplot(aes(speed, dist)) + 
    +  geom_point() +
    +  geom_smooth(method = "lm")
    +
    +Original cars dataset

    +Original cars dataset +

    +
    +

    You can create a new sample with the same parameters and 500 rows with the code sim_df(cars, 500).

    +
    sim_df(cars, 500) %>%
    +  ggplot(aes(speed, dist)) + 
    +    geom_point() +
    +    geom_smooth(method = "lm")
    +
    +Simulated cars dataset

    +Simulated cars dataset +

    +
    +
    +

    +Between-subject variables

    +

    You can also optionally add between-subject variables. For example, here is the relationship between sepal length and width in the built-in dataset iris.

    +
    iris %>%
    +  ggplot(aes(Sepal.Width, Sepal.Length, color = Species)) +
    +  geom_point() +
    +  geom_smooth(method = "lm")
    +
    +Original iris dataset

    +Original iris dataset +

    +
    +

    And here is a new sample with 50 observations of each species, made with the code sim_df(iris, 100, "Species").

    +
    sim_df(iris, 50, "Species") %>%
    +  ggplot(aes(Sepal.Width, Sepal.Length, color = Species)) +
    +  geom_point() +
    +  geom_smooth(method = "lm")
    +
    +Simulated iris dataset

    +Simulated iris dataset +

    +
    +
    +
    +

    +Empirical

    +

    Set empirical = TRUE to return a data frame with exactly the same means, SDs, and correlations as the original dataset.

    +
    exact_iris <- sim_df(iris, 50, between = "Species", empirical = TRUE)
    +
    +
    +

    +Rounding

    +

    For now, the function only creates new variables sampled from a continuous normal distribution. I hope to add in other sampling distributions in the future. So you’d need to do any rounding or truncating yourself.

    +
    sim_df(iris, 50, "Species") %>%
    +  mutate_if(is.numeric, round, 1) %>%
    +  ggplot(aes(Sepal.Width, Sepal.Length, color = Species)) +
    +  geom_point() +
    +  geom_smooth(method = "lm")
    +
    +Simulated iris dataset (rounded)

    +Simulated iris dataset (rounded) +

    +
    +
    +
    + + + +
    + + +
    + +
    +

    Site built with pkgdown 1.3.0.

    +
    +
    +
    + + + + + diff --git a/docs/articles/sim_df_files/figure-html/plot-cars-orig-1.png b/docs/articles/sim_df_files/figure-html/plot-cars-orig-1.png new file mode 100644 index 00000000..6d1b7c08 Binary files /dev/null and b/docs/articles/sim_df_files/figure-html/plot-cars-orig-1.png differ diff --git a/docs/articles/sim_df_files/figure-html/plot-cars-sim-1.png b/docs/articles/sim_df_files/figure-html/plot-cars-sim-1.png new file mode 100644 index 00000000..7da148cf Binary files /dev/null and b/docs/articles/sim_df_files/figure-html/plot-cars-sim-1.png differ diff --git a/docs/articles/sim_df_files/figure-html/plot-iris-orig-1.png b/docs/articles/sim_df_files/figure-html/plot-iris-orig-1.png new file mode 100644 index 00000000..66379512 Binary files /dev/null and b/docs/articles/sim_df_files/figure-html/plot-iris-orig-1.png differ diff --git a/docs/articles/sim_df_files/figure-html/plot-iris-sim-1.png b/docs/articles/sim_df_files/figure-html/plot-iris-sim-1.png new file mode 100644 index 00000000..3f885c6f Binary files /dev/null and b/docs/articles/sim_df_files/figure-html/plot-iris-sim-1.png differ diff --git a/docs/articles/sim_df_files/figure-html/plot-iris-sim-round-1.png b/docs/articles/sim_df_files/figure-html/plot-iris-sim-round-1.png new file mode 100644 index 00000000..09ea4737 Binary files /dev/null and b/docs/articles/sim_df_files/figure-html/plot-iris-sim-round-1.png differ diff --git a/docs/authors.html b/docs/authors.html index 918f2b40..1c314be5 100644 --- a/docs/authors.html +++ b/docs/authors.html @@ -60,7 +60,7 @@ faux - 0.0.0.9005 + 0.0.0.9006 @@ -82,9 +82,6 @@ @@ -120,7 +117,7 @@

    Authors

    • -

      Lisa DeBruine. Author, maintainer. +

      Lisa DeBruine. Author, maintainer. ORCID

    diff --git a/docs/index.html b/docs/index.html index d494478a..6c9528ff 100644 --- a/docs/index.html +++ b/docs/index.html @@ -30,7 +30,7 @@ faux - 0.0.0.9005 + 0.0.0.9006 @@ -53,16 +53,13 @@ @@ -86,532 +83,146 @@ -
    - + +

    It is useful to be able to simulate data with a specified structure. The faux package provides some functions to make this process easier.

    -
    +

    -Installation

    -

    You can install the released version of faux from GitHub with:

    -
    devtools::install_github("debruine/faux")
    -
    -
    -

    -Examples

    -
    library(tidyverse)
    -library(faux)
    -
    -

    -rnorm_multi

    -

    This function makes multiple normally distributed vectors with specified parameters and relationships.

    -

    For example, the following creates a sample that has 100 observations of 3 variables, drawn from a population where where A correlates with B and C with r = 0.5, and B and C correlate with r = 0.25. A has a mean of 0 and SD of 1, while B and C have means of 20 and SDs of 5.

    -
    
    -dat <- rnorm_multi(n = 100, 
    -                  cors = c(0.5, 0.5, 0.25), 
    -                  mu = c(0, 20, 20),
    -                  sd = c(1, 5, 5),
    -                  varnames = c("A", "B", "C"),
    -                  empirical = FALSE)
    -

    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    Sample stats
    nvarABCmeansd
    100A1.000.450.490.030.99
    100B0.451.000.3320.014.89
    100C0.490.331.0019.764.02
    -
    -

    -Specify cors -

    -

    You can specify the correlations in one of four ways:

    -
      -
    • A single r for all pairs
    • -
    • A vars by vars matrix
    • -
    • A vars*vars length vector
    • -
    • A vars*(vars-1)/2 length vector
    • -
    -
    -
    -One Number
    -

    If you want all the pairs to have the same correlation, just specify a single number.

    -
    bvn <- rnorm_multi(100, 5, .3, varnames = letters[1:5])
    +sim_design

    +

    This function creates a dataset with a specific between- and within-subjects design. see vignette

    +

    For example, the following creates a 2w*2b design with 100 observations in each cell. The between-subject factor is pet with twolevels of cat and dog. The within-subject factor is time with two levels of day and night. The mean for the cat_day cell is 10, the mean for the cat_night cell is 20, the mean for the dog_day cell is 15, and the mean for the dog_night cell is 25. All cells have a SD of 5 and all within-subject cells are correlated r = 0.5. The resulting data has exactly these values (set empirical = FALSE to sample from a population with these values).

    +
    between <- list("pet" = c("cat", "dog"))
    +within <- list("time" = c("day", "night"))
    +mu <- data.frame(
    +  cat = c(10, 20),
    +  dog = c(15, 25),
    +  row.names = within$time
    +)
    +df <- sim_design(within, between, 
    +                 n = 100, mu = mu, sd = 5, r = .5,
    +                 empirical = TRUE)
    - + + - - - - - + + + - - - - - - - - + + + + + + - - - - - - - - + + + + + + - - - - - - - - + + + + + + - - - - - - - - - - - - - - - - - - - + + + + +
    Sample stats from a single rhoSample sim_design() stats
    pet n varabcdedaynight mean sd
    cat 100a1.000.350.220.450.37-0.041.09day1.00.5105
    cat 100b0.351.000.190.360.28-0.050.83night0.51.0205
    dog 100c0.220.191.000.260.200.011.08day1.00.5155
    dog 100d0.450.360.261.000.240.001.00
    100e0.370.280.200.241.000.040.97night0.51.0255
    -
    -
    -Matrix
    -

    If you already have a correlation matrix, such as the output of cor(), you can specify the simulated data with that.

    -
    cmat <- cor(iris[,1:4])
    -bvn <- rnorm_multi(100, 4, cmat, 
    -                  varnames = colnames(cmat))
    +
    +

    +rnorm_multi

    +

    This function makes multiple normally distributed vectors with specified parameters and relationships. see vignette

    +

    For example, the following creates a sample that has 100 observations of 3 variables, drawn from a population where A has a mean of 0 and SD of 1, while B and C have means of 20 and SDs of 5. A correlates with B and C with r = 0.5, and B and C correlate with r = 0.25.

    +
    
    +dat <- rnorm_multi(
    +  n = 100, 
    +  mu = c(0, 20, 20),
    +  sd = c(1, 5, 5),
    +  r = c(0.5, 0.5, 0.25), 
    +  varnames = c("A", "B", "C"),
    +  empirical = FALSE
    +)
    - + - - - - + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + - - - -
    Sample stats from a correlation matrixSample rnorm_multi() stats
    n varSepal.LengthSepal.WidthPetal.LengthPetal.WidthABC mean sd
    100Sepal.Length1.00-0.100.880.83-0.011.05
    100Sepal.Width-0.101.00-0.38-0.29-0.191.09
    100Petal.Length0.88-0.381.000.96-0.011.02
    100Petal.Width0.83-0.290.96A 1.000.620.46 -0.050.98
    -
    -
    -
    -Vector (vars*vars)
    -

    You can specify your correlation matrix by hand as a vars*vars length vector, which will include the correlations of 1 down the diagonal.

    -
    cmat <- c(1, .3, .5,
    -          .3, 1, 0,
    -          .5, 0, 1)
    -bvn <- rnorm_multi(100, 3, cmat, 
    -                  varnames = c("first", "second", "third"))
    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    Sample stats from a vars*vars vector
    nvarfirstsecondthirdmeansd
    100first1.000.330.45-0.121.01
    100second0.331.00-0.04-0.011.04
    100third0.45-0.041.00-0.111.00
    -
    -
    -
    -Vector (vars*(vars-1)/2)
    -

    You can specify your correlation matrix by hand as a vars*(vars-1)/2 length vector, skipping the diagonal and lower left duplicate values.

    -
    rho1_2 <- .3
    -rho1_3 <- .5
    -rho1_4 <- .5
    -rho2_3 <- .2
    -rho2_4 <- 0
    -rho3_4 <- -.3
    -cmat <- c(rho1_2, rho1_3, rho1_4, rho2_3, rho2_4, rho3_4)
    -bvn <- rnorm_multi(100, 4, cmat, 
    -                  varnames = letters[1:4])
    - - - - - - - - - - - - - - - - - - - - - - + - - + + - - - - + + + - - - - - - - - - - - - - - + + + - - - - -
    Sample stats from a (vars*(vars-1)/2) vector
    nvarabcdmeansd
    100a1.000.350.550.50-0.131.011.08
    100b0.35B0.62 1.000.160.09-0.101.050.1919.955.38
    100c0.550.161.00-0.21-0.190.91
    100d0.500.09-0.21C0.460.19 1.000.120.97
    -
    -
    -
    -

    -empirical

    -

    If you want your samples to have the exact correlations, means, and SDs you entered, set empirical to TRUE.

    -
    bvn <- rnorm_multi(100, 5, .3, 
    -                  varnames = letters[1:5], 
    -                  empirical = T)
    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +
    Sample stats with empirical = TRUE
    nvarabcdemeansd
    100a1.00.30.30.30.301
    100b0.31.00.30.30.301
    100c0.30.31.00.30.301
    100d0.30.30.31.00.301
    100e0.30.30.30.31.00119.815.15
    - -
    -

    -simdf

    -

    This function produces a dataframe with the same distributions and correlations as an existing dataframe. It only returns numeric columns and simulates all numeric variables from a continuous normal distribution (for now).

    -

    For example, here is the relationship between speed and distance in the built-in dataset cars.

    -
    cars %>%
    -  ggplot(aes(speed, dist)) + 
    -  geom_point() +
    -  geom_smooth(method = "lm")
    -
    -Original cars dataset

    -Original cars dataset -

    -
    -

    You can create a new sample with the same parameters and 500 rows with the code simdf(cars, 500).

    -
    simdf(cars, 500) %>%
    -  ggplot(aes(speed, dist)) + 
    -    geom_point() +
    -    geom_smooth(method = "lm")
    -
    -Simulated cars dataset

    -Simulated cars dataset -

    -
    -
    -

    -Grouping Variables

    -

    You can also optionally add grouping variables. For example, here is the relationship between sepal length and width in the built-in dataset iris.

    -
    iris %>%
    -  ggplot(aes(Sepal.Width, Sepal.Length, color = Species)) +
    -  geom_point() +
    -  geom_smooth(method = "lm")
    -
    -Original iris dataset

    -Original iris dataset -

    -
    -

    And here is a new sample with 50 observations of each species, made with the code simdf(iris, 100, "Species").

    -
    simdf(iris, 50, "Species") %>%
    -  ggplot(aes(Sepal.Width, Sepal.Length, color = Species)) +
    -  geom_point() +
    -  geom_smooth(method = "lm")
    +
    +

    +sim_df

    +

    This function produces a dataframe with the same distributions and correlations as an existing dataframe. It only returns numeric columns and simulates all numeric variables from a continuous normal distribution (for now). see vignette

    +

    For example, the following code creates a new sample from the built-in dataset iris with 50 observations of each species.

    +
    new_iris <- sim_df(iris, 50, "Species") 
    -Simulated iris dataset

    +Simulated iris dataset

    Simulated iris dataset

    -

    For now, the function only creates new variables sampled from a continuous normal distribution. I hope to add in other sampling distributions in the future. So you’d need to do any rounding or truncating yourself.

    -
    simdf(iris, 50, "Species") %>%
    -  mutate_if(is.numeric, round, 1) %>%
    -  ggplot(aes(Sepal.Width, Sepal.Length, color = Species)) +
    -  geom_point() +
    -  geom_smooth(method = "lm")
    -
    -Simulated iris dataset (rounded)

    -Simulated iris dataset (rounded) -

    -
    -
    -
    -
    -

    -pos_def_limits

    -

    Not all correlation matrices are possible. For example, if variables A and B are correlated with r = 1.0, then the correlation between A and C can only be exactly equal to the correlation between B and C.

    -

    The function pos_def_limits() lets you know what the possible range of values is for the missing value in a correlation matrix with one missing value. The correlation values are entered just from the top right triangle of the matrix, with a single NA for the missing value.

    -
    lims <- pos_def_limits(.8, .2, NA)
    - - - - - - - - - -
    minmax
    -0.4270.747
    -

    For example, if rAB = 0.8 and rAC = 0.2, then -0.427 <= rBC <= 0.747.

    -

    If you enter a correlation matrix that contains impossible combinations, your limits will be NA.

    -
    lims <- pos_def_limits(.8, .2,  0,
    -                          -.5, NA,
    -                               .2)
    - - - - - - - - - -
    minmax
    NANA
    -

    If you have a full matrix and want to know if it is positive definite, you can use the following code:

    -
    c(.2, .3, .4, .2,
    -      .3, -.1, .2,
    -           .4, .5,
    -               .3) %>%
    -  cormat_from_triangle() %>%
    -  is_pos_def()
    -#> [1] TRUE
    -
    matrix(c(1, .3, -.9, .2,
    -        .3,  1,  .4, .5,
    -       -.9, .4,   1, .3,
    -        .2, .5,  .3,  1), 4) %>%
    -  is_pos_def()
    -#> [1] FALSE
    +
    +

    +Additional functions

    check_sim_stats

    @@ -626,7 +237,7 @@

    #> 4 150 Peta… 0.82 -0.37 0.96 1 1.2 0.76

    You can also group your data and change the digits to round. Display the table using knitr::kable() by setting usekable to TRUE (remember to set results='asis' in the chunk header.

    check_sim_stats(iris, 
    -                grp_by = "Species", 
    +                between = "Species", 
                     digits = 3, 
                     usekable = TRUE)
    @@ -777,70 +388,80 @@

    -
    +
    +

    +make_id

    +

    It is useful for IDs for random effects (e.g., subjects or stimuli) to be character strings (so you don’t accidentally include them as fixed effects) with the same length s(o you can sort them in order like S01, S02,…, S10 rather than S1, S10, S2, …) This function returns a list of IDs that have the same string length and a specified prefix.

    +
    make_id(n = 10, prefix = "ITEM_")
    +#>  [1] "ITEM_01" "ITEM_02" "ITEM_03" "ITEM_04" "ITEM_05" "ITEM_06" "ITEM_07"
    +#>  [8] "ITEM_08" "ITEM_09" "ITEM_10"
    +

    You can also manually set the number of digits and set n to a range of integers.

    +
    make_id(n = 10:20, digits = 3)
    +#>  [1] "S010" "S011" "S012" "S013" "S014" "S015" "S016" "S017" "S018" "S019"
    +#> [11] "S020"
    +
    +

    -sim_design

    -

    Simulate data by specifying a design structure.

    -

    This function is under development and should be carefully checked!

    +long2wide
    between <- list("pet" = c("cat", "dog"))
     within <- list("time" = c("day", "night"))
    -mu <- data.frame(
    -  cat = c(10, 20),
    -  dog = c(15, 25),
    -  row.names = within$time
    -)
    -df <- sim_design(within, between, n = 100, cors = 0.5, mu = mu, sd = 5)
    +df_long <- sim_design(within, between, long = TRUE) + +df_wide <- long2wide(df_long, + within = "time", + between = "pet", + dv = "val", + id = "sub_id")
    +
    +
    +

    +pos_def_limits

    +

    Not all correlation matrices are possible. For example, if variables A and B are correlated with r = 1.0, then the correlation between A and C can only be exactly equal to the correlation between B and C.

    +

    The function pos_def_limits() lets you know what the possible range of values is for the missing value in a correlation matrix with one missing value. The correlation values are entered just from the top right triangle of the matrix, with a single NA for the missing value.

    +
    lims <- pos_def_limits(.8, .2, NA)
    - - - - - - - + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + +
    petnvardaynightmeansdminmax
    cat100day1.000.5510.394.69
    cat100night0.551.0019.784.67
    dog100day1.000.4615.024.18
    dog100night0.461.0024.794.74
    -0.4270.747
    +

    For example, if rAB = 0.8 and rAC = 0.2, then -0.427 <= rBC <= 0.747.

    +

    If you enter a correlation matrix that contains impossible combinations, your limits will be NA.

    +
    lims <- pos_def_limits(.8, .2,  0,
    +                          -.5, NA,
    +                               .2)
    + + + + + + + + +
    minmax
    NANA
    -

    +
    +

    +is_pos_def()

    +

    If you have a full matrix and want to know if it is positive definite, you can use the following code:

    +
    c(.2, .3, .4, .2,
    +      .3, -.1, .2,
    +           .4, .5,
    +               .3) %>%
    +  cormat_from_triangle() %>%
    +  is_pos_def()
    +#> [1] TRUE
    +
    matrix(c(1, .3, -.9, .2,
    +        .3,  1,  .4, .5,
    +       -.9, .4,   1, .3,
    +        .2, .5,  .3,  1), 4) %>%
    +  is_pos_def()
    +#> [1] FALSE
    @@ -857,11 +478,17 @@

    License

    Developers

      -
    • Lisa DeBruine
      Author, maintainer
    • +
    • Lisa DeBruine
      Author, maintainer ORCID
    - +
    +

    Dev status

    +
      +
    • Lifecycle: experimental
    • +
    +
    + diff --git a/docs/index_files/figure-html/plot-iris-sim-1.png b/docs/index_files/figure-html/plot-iris-sim-1.png new file mode 100644 index 00000000..29468f8d Binary files /dev/null and b/docs/index_files/figure-html/plot-iris-sim-1.png differ diff --git a/docs/news/index.html b/docs/news/index.html index 1970a793..b8338901 100644 --- a/docs/news/index.html +++ b/docs/news/index.html @@ -60,7 +60,7 @@ faux - 0.0.0.9005 + 0.0.0.9006 @@ -82,9 +82,6 @@ @@ -133,6 +130,14 @@

    • Bug fixes for sim_design() (failed when within or between factor number was 0)
    + +
    +

    +faux 0.0.0.9006

    +
      +
    • Changes to argument order and names (more consistent, but may break old scripts)
    • +
    • Updated vignettes
    • +
    @@ -142,6 +147,7 @@

    Contents

    diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml index 59781696..ba63c6ac 100644 --- a/docs/pkgdown.yml +++ b/docs/pkgdown.yml @@ -2,8 +2,7 @@ pandoc: 1.19.2.1 pkgdown: 1.3.0 pkgdown_sha: ~ articles: - intro: intro.html rnorm_multi: rnorm_multi.html sim_design: sim_design.html - simdf: simdf.html + sim_df: sim_df.html diff --git a/docs/reference/check_design.html b/docs/reference/check_design.html index 9743ebb2..b6769873 100644 --- a/docs/reference/check_design.html +++ b/docs/reference/check_design.html @@ -63,7 +63,7 @@ faux - 0.0.0.9005 + 0.0.0.9006 @@ -85,9 +85,6 @@ @@ -129,8 +126,8 @@

    Validate design

    -
    check_design(within = list(), between = list(), n = 100, cors = 0,
    -  mu = 0, sd = 1)
    +
    check_design(within = list(), between = list(), n = 100, mu = 0,
    +  sd = 1, r = 0)

    Arguments

    @@ -147,10 +144,6 @@

    Arg

    - - - - @@ -159,6 +152,10 @@

    Arg

    + + + +
    n

    the number of samples required

    cors

    the correlations among the variables (can be a single number, vars\*vars matrix, vars\*vars vector, or a vars\*(vars-1)/2 vector)

    mu

    a vector giving the means of the variables (numeric vector of length 1 or vars)

    sd

    the standard deviations of the variables (numeric vector of length 1 or vars)

    r

    the correlations among the variables (can be a single number, vars\*vars matrix, vars\*vars vector, or a vars\*(vars-1)/2 vector)

    Value

    @@ -170,7 +167,90 @@

    Examp
    within <- list(time = c("day", "night")) between <- list(pet = c("dog", "cat")) -design <- check_design(within, between)
    +check_design(within, between)
    #> $within +#> $within$time +#> [1] "day" "night" +#> +#> +#> $between +#> $between$pet +#> [1] "dog" "cat" +#> +#> +#> $within_factors +#> [1] "time" +#> +#> $between_factors +#> [1] "pet" +#> +#> $within_labels +#> $within_labels$time +#> day night +#> "day" "night" +#> +#> +#> $between_labels +#> $between_labels$pet +#> dog cat +#> "dog" "cat" +#> +#> +#> $cell_n +#> dog cat +#> day 100 100 +#> night 100 100 +#> +#> $cell_mu +#> dog cat +#> day 0 0 +#> night 0 0 +#> +#> $cell_sd +#> dog cat +#> day 1 1 +#> night 1 1 +#> +#> $cell_r +#> $cell_r$dog +#> [,1] [,2] +#> [1,] 1 0 +#> [2,] 0 1 +#> +#> $cell_r$cat +#> [,1] [,2] +#> [1,] 1 0 +#> [2,] 0 1 +#> +#> +#> $cells_w +#> [1] "day" "night" +#> +#> $cells_b +#> [1] "dog" "cat" +#> +#> $sub_id +#> [1] "S001" "S002" "S003" "S004" "S005" "S006" "S007" "S008" "S009" "S010" +#> [11] "S011" "S012" "S013" "S014" "S015" "S016" "S017" "S018" "S019" "S020" +#> [21] "S021" "S022" "S023" "S024" "S025" "S026" "S027" "S028" "S029" "S030" +#> [31] "S031" "S032" "S033" "S034" "S035" "S036" "S037" "S038" "S039" "S040" +#> [41] "S041" "S042" "S043" "S044" "S045" "S046" "S047" "S048" "S049" "S050" +#> [51] "S051" "S052" "S053" "S054" "S055" "S056" "S057" "S058" "S059" "S060" +#> [61] "S061" "S062" "S063" "S064" "S065" "S066" "S067" "S068" "S069" "S070" +#> [71] "S071" "S072" "S073" "S074" "S075" "S076" "S077" "S078" "S079" "S080" +#> [81] "S081" "S082" "S083" "S084" "S085" "S086" "S087" "S088" "S089" "S090" +#> [91] "S091" "S092" "S093" "S094" "S095" "S096" "S097" "S098" "S099" "S100" +#> [101] "S101" "S102" "S103" "S104" "S105" "S106" "S107" "S108" "S109" "S110" +#> [111] "S111" "S112" "S113" "S114" "S115" "S116" "S117" "S118" "S119" "S120" +#> [121] "S121" "S122" "S123" "S124" "S125" "S126" "S127" "S128" "S129" "S130" +#> [131] "S131" "S132" "S133" "S134" "S135" "S136" "S137" "S138" "S139" "S140" +#> [141] "S141" "S142" "S143" "S144" "S145" "S146" "S147" "S148" "S149" "S150" +#> [151] "S151" "S152" "S153" "S154" "S155" "S156" "S157" "S158" "S159" "S160" +#> [161] "S161" "S162" "S163" "S164" "S165" "S166" "S167" "S168" "S169" "S170" +#> [171] "S171" "S172" "S173" "S174" "S175" "S176" "S177" "S178" "S179" "S180" +#> [181] "S181" "S182" "S183" "S184" "S185" "S186" "S187" "S188" "S189" "S190" +#> [191] "S191" "S192" "S193" "S194" "S195" "S196" "S197" "S198" "S199" "S200" +#>
    +
    @@ -85,9 +85,6 @@ @@ -129,18 +126,31 @@

    Check table stats

    -
    check_sim_stats(dat, grp_by = NULL, digits = 2, usekable = FALSE)
    +
    check_sim_stats(.data, between = c(), within = c(), dv = c(),
    +  id = c(), digits = 2, usekable = FALSE)

    Arguments

    - - + + + + + + + + + + + + + + - - + + @@ -154,26 +164,26 @@

    Arg

    Value

    -

    tibble or kable

    +

    a tbl or kable

    Examples

    -
    check_sim_stats(iris, "Species")
    #> # A tibble: 12 x 9 +
    check_sim_stats(iris, "Species")
    #> # A tibble: 12 x 9 #> Species n var Sepal.Length Sepal.Width Petal.Length Petal.Width mean -#> <fct> <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> -#> 1 setosa 50 Sepa… 1 0.74 0.27 0.28 5.01 -#> 2 setosa 50 Sepa… 0.74 1 0.18 0.23 3.43 -#> 3 setosa 50 Peta… 0.27 0.18 1 0.33 1.46 -#> 4 setosa 50 Peta… 0.28 0.23 0.33 1 0.25 -#> 5 versic… 50 Sepa… 1 0.53 0.75 0.55 5.94 -#> 6 versic… 50 Sepa… 0.53 1 0.56 0.66 2.77 -#> 7 versic… 50 Peta… 0.75 0.56 1 0.79 4.26 -#> 8 versic… 50 Peta… 0.55 0.66 0.79 1 1.33 -#> 9 virgin… 50 Sepa… 1 0.46 0.86 0.28 6.59 -#> 10 virgin… 50 Sepa… 0.46 1 0.4 0.54 2.97 -#> 11 virgin… 50 Peta… 0.86 0.4 1 0.32 5.55 -#> 12 virgin… 50 Peta… 0.28 0.54 0.32 1 2.03 -#> # … with 1 more variable: sd <dbl>
    +#>
    <fct> <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> +#> 1 setosa 50 Sepa… 1 0.74 0.27 0.28 5.01 +#> 2 setosa 50 Sepa… 0.74 1 0.18 0.23 3.43 +#> 3 setosa 50 Peta… 0.27 0.18 1 0.33 1.46 +#> 4 setosa 50 Peta… 0.28 0.23 0.33 1 0.25 +#> 5 versic… 50 Sepa… 1 0.53 0.75 0.55 5.94 +#> 6 versic… 50 Sepa… 0.53 1 0.56 0.66 2.77 +#> 7 versic… 50 Peta… 0.75 0.56 1 0.79 4.26 +#> 8 versic… 50 Peta… 0.55 0.66 0.79 1 1.33 +#> 9 virgin… 50 Sepa… 1 0.46 0.86 0.28 6.59 +#> 10 virgin… 50 Sepa… 0.46 1 0.4 0.54 2.97 +#> 11 virgin… 50 Peta… 0.86 0.4 1 0.32 5.55 +#> 12 virgin… 50 Peta… 0.28 0.54 0.32 1 2.03 +#> # … with 1 more variable: sd <dbl>
    @@ -85,9 +85,6 @@ @@ -129,7 +126,7 @@

    Convert parameter

    -
    convert_param(param, cells_b, cells_w, type = "this parameter")
    +
    convert_param(param, cells_w, cells_b, type = "this parameter")

    Arguments

    dat

    the existing dataframe

    .data

    the existing tbl

    between

    a vector of column names for between-subject factors

    within

    a vector of column names for within-subject factors (if data is long)

    dv

    the column name of the dv (if data is long)

    grp_by

    an optional list of column names to group by

    id

    the column name(s) of the subject ID (if data is long)

    digits
    @@ -138,14 +135,14 @@

    Arg

    - - - - + + + + diff --git a/docs/reference/cormat.html b/docs/reference/cormat.html index c6b0a676..6abdb450 100644 --- a/docs/reference/cormat.html +++ b/docs/reference/cormat.html @@ -63,7 +63,7 @@ faux - 0.0.0.9005 + 0.0.0.9006 @@ -85,9 +85,6 @@ diff --git a/docs/reference/cormat_from_triangle.html b/docs/reference/cormat_from_triangle.html index f2797f34..d7d2db61 100644 --- a/docs/reference/cormat_from_triangle.html +++ b/docs/reference/cormat_from_triangle.html @@ -63,7 +63,7 @@ faux - 0.0.0.9005 + 0.0.0.9006 @@ -85,9 +85,6 @@ diff --git a/docs/reference/faceratings.html b/docs/reference/faceratings.html index 481c039d..8ed382bd 100644 --- a/docs/reference/faceratings.html +++ b/docs/reference/faceratings.html @@ -63,7 +63,7 @@ faux - 0.0.0.9005 + 0.0.0.9006 @@ -85,9 +85,6 @@ diff --git a/docs/reference/fix_name_labels.html b/docs/reference/fix_name_labels.html index fec99c75..a6041bfe 100644 --- a/docs/reference/fix_name_labels.html +++ b/docs/reference/fix_name_labels.html @@ -63,7 +63,7 @@ faux - 0.0.0.9005 + 0.0.0.9006 @@ -85,9 +85,6 @@ diff --git a/docs/reference/get_design_long.html b/docs/reference/get_design_long.html new file mode 100644 index 00000000..5518b325 --- /dev/null +++ b/docs/reference/get_design_long.html @@ -0,0 +1,186 @@ + + + + + + + + +Get design from long data — get_design_long • faux + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + +
    + +
    +
    + + +
    + +

    Makes a best guess at the design of a long-format data frame. +Finds all columns that contain a single value per unit of analysis (between factors), +all columns that contain the same values per unit of analysis (within factors), and +all columns that differ over units of analysis (dv, continuous factors)

    + +
    + +
    get_design_long(.data, id = "sub_id", dv = "val")
    + +

    Arguments

    +
    param

    the parameter (mu, sd, or n)

    cells_b

    a list of between-subject cell combinations

    cells_w

    a list of within-subject cells combinations

    cells_b

    a list of between-subject cell combinations

    type

    the name of the parameter (for error messages)

    + + + + + + + + + + + + + +
    .data

    the data frame (in long format)

    id

    the column name(s) that identify a unit of analysis

    dv

    the column name that identifies the DV

    + +

    Value

    + +

    the data frame in long format

    + + + + + + +
    + + +
    +

    Site built with pkgdown 1.3.0.

    +
    +
    + + + + + + + diff --git a/docs/reference/index.html b/docs/reference/index.html index 2e76c849..bedc5427 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -60,7 +60,7 @@ faux - 0.0.0.9005 + 0.0.0.9006 @@ -82,9 +82,6 @@ @@ -147,12 +144,6 @@

    convert_param()

    - -

    Convert parameter

    - -

    cormat()

    @@ -171,12 +162,30 @@

    get_design_long()

    + +

    Get design from long data

    + +

    is_pos_def()

    Check a Matrix is Positive Definite

    + +

    long2wide()

    + +

    Long to wide format

    + + + +

    make_id()

    + +

    Make ID

    + +

    pos_def_limits()

    @@ -208,15 +217,21 @@

    simdf()

    +

    sim_df()

    Simulate an existing dataframe

    -

    simdf_mixed()

    +

    sim_mixed_df()

    Generate a sample with random intercepts for subjects and items

    + + + +

    wide2long()

    + +

    Wide to long format

    diff --git a/docs/reference/is_pos_def.html b/docs/reference/is_pos_def.html index 375742c3..cc7e1e4c 100644 --- a/docs/reference/is_pos_def.html +++ b/docs/reference/is_pos_def.html @@ -63,7 +63,7 @@ faux - 0.0.0.9005 + 0.0.0.9006 @@ -85,9 +85,6 @@ diff --git a/docs/reference/long2wide.html b/docs/reference/long2wide.html new file mode 100644 index 00000000..654b00f5 --- /dev/null +++ b/docs/reference/long2wide.html @@ -0,0 +1,208 @@ + + + + + + + + +Long to wide format — long2wide • faux + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + +
    + +
    +
    + + +
    + +

    Converts data from long format to wide

    + +
    + +
    long2wide(.data, within = c(), between = c(), dv = "val",
    +  id = "sub_id")
    + +

    Arguments

    + + + + + + + + + + + + + + + + + + + + + + +
    .data

    the tbl in long format

    within

    the names of the within column(s)

    between

    the names of between column(s) (optional)

    dv

    the name of the DV (value) column

    id

    the names of the column(s) for grouping observations

    + +

    Value

    + +

    a tbl in wide format

    + + +

    Examples

    +
    df_long <- sim_design(2, 2, long = TRUE) +long2wide(df_long, "A", "B", "val", "sub_id")
    #> # A tibble: 200 x 4 +#> sub_id B A1 A2 +#> <chr> <fct> <dbl> <dbl> +#> 1 S001 B1 0.387 -1.40 +#> 2 S002 B1 0.785 0.255 +#> 3 S003 B1 1.06 -2.44 +#> 4 S004 B1 0.796 -0.00557 +#> 5 S005 B1 1.76 0.622 +#> 6 S006 B1 0.691 1.15 +#> 7 S007 B1 0.559 -1.82 +#> 8 S008 B1 0.537 -0.247 +#> 9 S009 B1 -0.227 -0.244 +#> 10 S010 B1 -0.978 -0.283 +#> # … with 190 more rows
    +
    +
    + +
    + +
    + + +
    +

    Site built with pkgdown 1.3.0.

    +
    +
    +
    + + + + + + diff --git a/docs/reference/make_id.html b/docs/reference/make_id.html new file mode 100644 index 00000000..999c632b --- /dev/null +++ b/docs/reference/make_id.html @@ -0,0 +1,191 @@ + + + + + + + + +Make ID — make_id • faux + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + +
    + +
    +
    + + +
    + +

    Make IDs with fixed length and a letter prefix for random effects (e.g., S001, S002, ..., S100).

    + +
    + +
    make_id(n = 100, prefix = "S", digits = 0)
    + +

    Arguments

    + + + + + + + + + + + + + + +
    n

    the number of IDs to generate (or a vector of numbers)

    prefix

    the letter prefix to the number

    digits

    the number of digits to use for the numeric part. Only used if this is larger than the number of digits in n.

    + +

    Value

    + +

    a vector of IDs

    + + +

    Examples

    +
    +make_id(20, "SUBJECT_")
    #> [1] "SUBJECT_01" "SUBJECT_02" "SUBJECT_03" "SUBJECT_04" "SUBJECT_05" +#> [6] "SUBJECT_06" "SUBJECT_07" "SUBJECT_08" "SUBJECT_09" "SUBJECT_10" +#> [11] "SUBJECT_11" "SUBJECT_12" "SUBJECT_13" "SUBJECT_14" "SUBJECT_15" +#> [16] "SUBJECT_16" "SUBJECT_17" "SUBJECT_18" "SUBJECT_19" "SUBJECT_20"
    make_id(10:30, digits = 3)
    #> [1] "S010" "S011" "S012" "S013" "S014" "S015" "S016" "S017" "S018" "S019" +#> [11] "S020" "S021" "S022" "S023" "S024" "S025" "S026" "S027" "S028" "S029" +#> [21] "S030"
    +
    +
    + +
    + +
    + + +
    +

    Site built with pkgdown 1.3.0.

    +
    +
    +
    + + + + + + diff --git a/docs/reference/pipe.html b/docs/reference/pipe.html index 9592e06d..3fa0d792 100644 --- a/docs/reference/pipe.html +++ b/docs/reference/pipe.html @@ -63,7 +63,7 @@ faux - 0.0.0.9005 + 0.0.0.9006 @@ -85,9 +85,6 @@ diff --git a/docs/reference/pos_def_limits.html b/docs/reference/pos_def_limits.html index 2c4957db..628cd5a6 100644 --- a/docs/reference/pos_def_limits.html +++ b/docs/reference/pos_def_limits.html @@ -63,7 +63,7 @@ faux - 0.0.0.9005 + 0.0.0.9006 @@ -85,9 +85,6 @@ diff --git a/docs/reference/rnorm_multi.html b/docs/reference/rnorm_multi.html index 4b74b783..da60ab24 100644 --- a/docs/reference/rnorm_multi.html +++ b/docs/reference/rnorm_multi.html @@ -32,7 +32,7 @@ - + @@ -63,7 +63,7 @@ faux - 0.0.0.9005 + 0.0.0.9006 @@ -85,9 +85,6 @@ @@ -125,12 +122,12 @@

    Multiple Normally Distributed Vectors

    -

    rnorm_multi makes multiple normally distributed vectors with specified relationships

    +

    rnorm_multi() makes multiple normally distributed vectors with specified relationships.

    -
    rnorm_multi(n, vars = 3, cors = 0, mu = 0, sd = 1,
    -  varnames = NULL, empirical = FALSE, as.matrix = FALSE)
    +
    rnorm_multi(n, vars = 3, mu = 0, sd = 1, r = 0, varnames = NULL,
    +  empirical = FALSE, as.matrix = FALSE, cors = NULL)

    Arguments

    @@ -143,10 +140,6 @@

    Arg

    - - - - @@ -155,227 +148,236 @@

    Arg

    + + + + - + - + + + + +
    vars

    the number of variables to return

    cors

    the correlations among the variables (can be a single number, vars\*vars matrix, vars\*vars vector, or a vars\*(vars-1)/2 vector)

    mu

    a vector giving the means of the variables (numeric vector of length 1 or vars)

    sd

    the standard deviations of the variables (numeric vector of length 1 or vars)

    r

    the correlations among the variables (can be a single number, vars\*vars matrix, vars\*vars vector, or a vars\*(vars-1)/2 vector)

    varnames

    optional names for the variables (string vector of length vars) defaults if cors is a matrix with column names

    optional names for the variables (string vector of length vars) defaults if r is a matrix with column names

    empirical

    logical. If true, mu, sd and cors specify the empirical not population mean, sd and covariance

    logical. If true, mu, sd and r specify the empirical not population mean, sd and covariance

    as.matrix

    logical. If true, returns a matrix

    cors

    (deprecated; use r)

    Value

    -

    dataframe of vars vectors

    +

    a tbl of vars vectors

    Examples

    -
    rnorm_multi(100, 3, c(0.2, 0.4, 0.5), varnames=c("A", "B", "C"))
    #> A B C -#> 1 0.750792572 1.424873995 1.01619162 -#> 2 -1.042243673 -0.275816637 0.52639710 -#> 3 0.878491981 2.434684102 2.13943193 -#> 4 -0.500509231 0.536184495 -0.07283162 -#> 5 -1.099839859 1.395668514 -1.63782640 -#> 6 -1.268203415 -0.516346650 -0.90254750 -#> 7 1.062341096 2.085361364 1.05389880 -#> 8 -0.072670487 0.713209683 -0.06669068 -#> 9 0.799007438 0.832138301 -0.84904571 -#> 10 0.274439429 -1.305844900 1.50703213 -#> 11 0.483758950 0.961135166 -0.09054153 -#> 12 -1.335560745 0.410231530 -0.60767290 -#> 13 -1.365394967 -1.989742102 -1.39808947 -#> 14 1.058685530 1.942674898 0.79418602 -#> 15 -0.059286958 -0.896242839 -0.20963410 -#> 16 2.732876265 0.225408869 1.48006414 -#> 17 0.520040403 -0.045099167 0.69894364 -#> 18 -1.261009283 0.795477058 0.38156161 -#> 19 0.152387337 -0.001507871 -1.21894494 -#> 20 1.153452339 -0.243123670 1.18059995 -#> 21 1.221036402 -1.329257135 -0.72696920 -#> 22 0.097150590 -0.488381489 -0.38403747 -#> 23 1.847044170 0.215193119 1.03746265 -#> 24 -0.213594069 -0.833126883 -0.60370775 -#> 25 -0.944882567 -1.108280341 -2.13611782 -#> 26 -0.203627401 -0.399418526 0.70486850 -#> 27 0.971899030 0.251539268 0.93124695 -#> 28 -0.067710098 0.330687769 -0.20521675 -#> 29 0.008234813 0.643985743 1.10851032 -#> 30 1.491364279 1.606960145 0.51407350 -#> 31 -1.491314723 -0.173569149 -0.59828848 -#> 32 0.788400822 -0.100235777 -0.87821239 -#> 33 -0.275325098 -0.506920585 0.16149729 -#> 34 -1.646030985 0.066253801 -2.08692166 -#> 35 -0.763940091 0.400133152 0.01055207 -#> 36 0.559861711 0.094147624 -0.24183194 -#> 37 0.851096610 2.217705492 1.27999821 -#> 38 -0.174916339 0.113335133 0.60261481 -#> 39 0.579792609 -0.678784282 0.78461866 -#> 40 -0.270889155 -0.210612089 -1.77436354 -#> 41 0.045616422 0.372504963 -0.50383699 -#> 42 -0.469471970 0.436848174 1.27704375 -#> 43 1.116862858 -1.388382465 0.45742838 -#> 44 0.435540978 -0.674785328 0.76644032 -#> 45 0.733199928 -0.330524626 -1.17879303 -#> 46 -1.202678340 -3.307972897 -1.77046080 -#> 47 -0.829440876 0.569422843 0.04572102 -#> 48 0.001784459 -0.684587716 -0.57477015 -#> 49 -1.255359394 0.562293801 0.23466705 -#> 50 2.329409361 0.563363357 1.59377323 -#> 51 -0.074278634 -0.571594535 -1.19274011 -#> 52 0.364196656 0.305387342 -0.05428873 -#> 53 -0.399415335 1.216792632 -0.33993885 -#> 54 -0.237192644 0.568221498 -0.35410471 -#> 55 0.395099319 -1.135493621 0.63054444 -#> 56 0.309387409 -0.471324983 -0.94209682 -#> 57 1.467751541 1.584594817 2.09891298 -#> 58 -1.921116085 -1.649349291 -2.52231252 -#> 59 0.936534595 0.074720566 -0.05360963 -#> 60 0.095504143 -1.141376090 0.49330431 -#> 61 -2.100395036 0.162246518 -0.70544430 -#> 62 1.437719944 0.530497597 -0.22212529 -#> 63 -0.956070367 -0.668411888 -0.93985501 -#> 64 0.399297618 0.122294316 0.08451302 -#> 65 -0.286806047 2.194503989 0.68953463 -#> 66 0.569197516 1.947087832 -0.16302798 -#> 67 -1.949290727 -0.897080260 0.12529717 -#> 68 0.394865364 0.065765899 -0.62937094 -#> 69 -0.503900977 -0.175240568 -0.45048740 -#> 70 1.439840970 0.910978267 1.54359768 -#> 71 0.887303297 1.141197444 1.30315927 -#> 72 -0.380841256 1.126287256 -1.27061154 -#> 73 0.046357782 -1.651026981 -1.30496243 -#> 74 0.501969183 -0.740051843 -0.21905098 -#> 75 -0.327677152 -1.412160664 -1.18738515 -#> 76 -0.041519929 -1.656652359 0.41972821 -#> 77 -1.237010922 -1.007396285 0.60385664 -#> 78 -0.999395609 0.507528583 0.54872639 -#> 79 -0.508546869 -0.250979903 0.26216982 -#> 80 1.706916012 -1.013138961 -0.24417856 -#> 81 -2.250638781 -1.264743365 -1.06547039 -#> 82 -1.465750791 -0.014902364 -1.49434831 -#> 83 -1.819797525 0.131454869 -0.23351058 -#> 84 0.644464430 -1.365010318 -0.43015447 -#> 85 0.673776094 -0.023604894 0.61492934 -#> 86 0.424058255 -2.337033827 -0.53427314 -#> 87 1.838236640 2.403650450 1.78253998 -#> 88 1.880351381 -1.351154782 0.05113393 -#> 89 0.143202959 -1.430192202 0.26174046 -#> 90 1.155769129 -0.186903327 0.04880420 -#> 91 0.559813595 -0.862499677 -0.53711912 -#> 92 -0.936341076 -0.503527111 -0.99627787 -#> 93 -1.049096344 0.032194348 -1.34962850 -#> 94 -0.497978396 0.162235981 0.31899526 -#> 95 0.565120969 -1.919780662 0.25948394 -#> 96 -0.664895124 -0.766542929 -2.21344800 -#> 97 0.829280403 0.592403315 -0.43854944 -#> 98 -0.017677656 -1.656619109 -0.45664659 -#> 99 -0.131923116 -1.896881557 -0.91048949 -#> 100 0.146219686 -0.896684910 1.27558673
    rnorm_multi(100, 3, c(1, 0.2, -0.5, 0.2, 1, 0.5, -0.5, 0.5, 1), varnames=c("A", "B", "C"))
    #> A B C -#> 1 1.39081618 1.2987554842 -0.54274184 -#> 2 0.12474394 -1.4341962212 -0.83385076 -#> 3 -1.31234622 -0.1773139308 1.11548187 -#> 4 0.13764915 1.4386412100 1.18934079 -#> 5 -1.55271429 -0.7649622047 -0.14994138 -#> 6 -0.69104513 -1.1444681510 -0.39532881 -#> 7 0.28331723 0.6930376611 0.78522733 -#> 8 0.62698282 2.6282635812 2.09300595 -#> 9 -1.35745434 0.4066573669 2.08806190 -#> 10 -0.06698887 -2.4653818490 -1.20034841 -#> 11 0.02483736 -0.0410126675 -0.86813242 -#> 12 0.79142685 -1.8084817841 -0.37123506 -#> 13 -0.88409828 -0.1531384713 0.49194054 -#> 14 0.60358179 -1.3450087188 -1.86485444 -#> 15 0.74467121 1.2994217211 0.32121665 -#> 16 0.26231671 0.7238420836 -0.21253993 -#> 17 -0.08841355 0.8852067209 1.38717738 -#> 18 -1.18692758 -1.5262910813 -0.22392502 -#> 19 1.02473345 0.3596218704 -1.36911133 -#> 20 0.29173164 0.0537979231 0.48062427 -#> 21 -0.10151671 1.0013139089 0.41741153 -#> 22 -0.38409912 0.1086193588 0.27613213 -#> 23 -0.17473214 1.3226328277 1.56677087 -#> 24 -0.68841302 -0.8568191519 0.22462907 -#> 25 -3.40268201 -1.1503624457 2.19509962 -#> 26 1.27476703 -0.1608727574 -1.60243175 -#> 27 -0.76774044 -0.3136043209 0.40748355 -#> 28 -2.93463796 0.0002708164 0.54330948 -#> 29 -0.36309562 0.4812979048 0.08769210 -#> 30 0.11235349 1.2436686101 0.40667721 -#> 31 -0.42112919 1.1241955619 0.82149954 -#> 32 0.65132497 -1.9692499386 -1.72175518 -#> 33 -0.39001734 1.2671731043 1.01569959 -#> 34 1.05444567 0.7305534127 -0.08594560 -#> 35 -0.65195625 -0.5195396901 0.41514849 -#> 36 -0.95833910 0.2003833523 1.41935582 -#> 37 -0.09312194 -1.7133185727 -0.91550462 -#> 38 0.15254009 -0.8956400361 -1.25958623 -#> 39 0.07535271 0.1033439817 0.16790440 -#> 40 -1.61236803 -1.1266807862 1.13697372 -#> 41 -0.53493316 -1.6457716779 0.28787366 -#> 42 -0.69921708 1.7978716255 1.97787573 -#> 43 -0.58102874 -1.3412768754 0.28024767 -#> 44 0.39080373 -0.9614140080 -1.51340506 -#> 45 0.98098214 -0.2651307385 -1.15693664 -#> 46 -1.41159592 0.9114650277 1.44179604 -#> 47 -1.05225142 -0.9418931935 -0.20252229 -#> 48 -0.40068116 -1.1950503292 -0.24799347 -#> 49 1.38669506 0.9589220188 0.06210057 -#> 50 -0.35464447 0.8920651923 0.55778884 -#> 51 1.53904431 1.2176546707 -0.77686030 -#> 52 0.16578814 0.4331779127 -0.40893746 -#> 53 -0.60914155 -0.8140949898 1.01178545 -#> 54 -2.16814062 -0.2863029111 1.76151931 -#> 55 -0.85887053 0.7286470557 1.46281376 -#> 56 -2.69493701 -0.5352409200 1.22842833 -#> 57 0.25329846 -0.2477709459 -0.33847285 -#> 58 -1.42870286 0.8775450724 1.65829026 -#> 59 1.59072274 0.1852101502 -0.06485567 -#> 60 0.19047954 -0.5331432083 -1.01246796 -#> 61 1.12633124 0.4510249139 -0.16385936 -#> 62 -0.86502391 0.4564717361 0.97700377 -#> 63 0.41198377 2.1574189238 0.29625434 -#> 64 -1.33266567 0.1334556024 2.13446398 -#> 65 -1.26258869 -0.1687697156 -0.84856895 -#> 66 -1.02980626 0.6029517902 0.65050954 -#> 67 0.38682167 -1.3061125155 -1.84341380 -#> 68 0.23417755 -0.4236220335 -0.99612575 -#> 69 0.38936213 1.4383517669 1.20765999 -#> 70 0.52613652 0.4669543053 -0.25615525 -#> 71 -0.78625593 -0.1890983901 -0.06583813 -#> 72 -0.52607091 -0.7252667231 -1.05297080 -#> 73 -0.28890582 -1.0441594449 -1.15239430 -#> 74 -1.25168046 -2.1088896749 -1.11497680 -#> 75 -0.67847213 -1.4529646435 -0.09410188 -#> 76 -0.99112777 0.2271702323 1.19094153 -#> 77 -0.27328045 -0.6278109929 0.89942994 -#> 78 -0.05788188 -0.2011886425 -0.70444974 -#> 79 0.44426016 0.3086371578 -0.14094071 -#> 80 -1.86391621 -0.2017716705 1.04344653 -#> 81 -0.52027246 0.1144370542 0.17047033 -#> 82 0.68790768 0.3227648343 -0.35982484 -#> 83 0.06379928 1.0963218557 -0.79995582 -#> 84 0.71240232 0.6765428974 0.08052656 -#> 85 1.00680982 1.3004044786 0.46945908 -#> 86 -0.19307208 -0.8284714031 0.42528039 -#> 87 0.58798451 0.2443710099 0.22379468 -#> 88 0.29882724 0.8581150980 1.22292625 -#> 89 1.84074363 1.0970181676 -0.69553217 -#> 90 -1.91299092 -1.2784569227 0.17807915 -#> 91 0.64255687 -0.5425659342 0.03429620 -#> 92 -0.83135155 0.4205869612 0.80329005 -#> 93 -1.44298017 0.7499570038 1.75007779 -#> 94 -0.80721771 -1.3782307639 -0.83537525 -#> 95 -0.56849692 -0.2160438395 -0.20213552 -#> 96 -1.43237796 1.0144647169 1.92039006 -#> 97 -0.17925759 -1.5018571840 -1.44550197 -#> 98 -0.35894894 0.9200034445 1.16485148 -#> 99 0.03204885 -0.5520698956 0.01677568 -#> 100 0.83799420 1.2748026410 -0.90440295
    +
    rnorm_multi(100, 3, 0, 1, c(0.2, 0.4, 0.5), varnames=c("A", "B", "C"))
    #> A B C +#> 1 -1.711924939 -1.011599824 -1.32018668 +#> 2 0.887134792 0.043794457 0.99462051 +#> 3 0.748427556 0.410991509 1.01598343 +#> 4 -0.988547303 -1.612324193 0.10827445 +#> 5 0.250415810 1.967522573 1.12127447 +#> 6 1.012473136 1.329047164 0.46137529 +#> 7 0.290391321 -0.492076253 -1.07257786 +#> 8 -0.999779485 -2.143044059 -1.61909580 +#> 9 1.296369128 0.223297040 0.05172819 +#> 10 1.844675802 0.525323939 1.43792983 +#> 11 -0.594332509 1.134701125 -0.51513681 +#> 12 2.892055704 -0.024653246 -0.87388294 +#> 13 0.674912545 1.145791522 -0.16442269 +#> 14 0.114929596 0.751160451 0.22516804 +#> 15 -0.798672240 -0.534844472 -1.59558698 +#> 16 -1.031496700 0.049687627 -0.54144674 +#> 17 -0.004310488 -1.375989084 0.16455645 +#> 18 1.415566785 1.598106413 1.06560667 +#> 19 -1.642731803 -0.241490494 -0.33724621 +#> 20 0.910502722 -0.191788473 -0.98662517 +#> 21 -1.175150978 -0.479527388 0.14625695 +#> 22 0.305901881 0.761645013 -0.54205693 +#> 23 -0.367942472 -1.256019905 -0.12063644 +#> 24 0.892308000 0.193255995 1.16876289 +#> 25 2.538452134 2.240282666 2.04947504 +#> 26 -0.695068465 0.222697327 -1.11980217 +#> 27 0.512021718 0.508770162 0.57718656 +#> 28 -0.932274424 2.643562030 2.24879422 +#> 29 -0.585604074 0.622505249 -0.24917585 +#> 30 -1.355941412 -0.590585765 -0.21519908 +#> 31 -0.733846362 0.082328508 -0.42830930 +#> 32 1.264872249 0.715577491 0.12590367 +#> 33 -0.518786456 0.201122493 -0.92638734 +#> 34 -0.637506919 -1.122500780 -0.86513473 +#> 35 0.632266473 -0.010632207 1.05787745 +#> 36 1.104769095 0.406299810 -0.21379747 +#> 37 1.472185167 0.971669943 0.38215346 +#> 38 0.208642678 1.232615573 -0.26677942 +#> 39 0.194660642 -0.021702220 -0.36527459 +#> 40 1.781764439 0.408108369 1.86009067 +#> 41 2.042761937 -0.215878860 1.47377857 +#> 42 -0.490815530 -0.425984391 -0.68877135 +#> 43 1.655781000 -0.033659668 1.27356903 +#> 44 -0.025382344 0.905039301 -0.02548762 +#> 45 -0.665268785 -0.563017316 0.06761990 +#> 46 -0.276302646 0.608159422 0.33566043 +#> 47 0.484250987 1.126346311 1.25406657 +#> 48 1.282907646 0.720836758 0.47160057 +#> 49 -0.277030402 -1.021860386 -1.96611414 +#> 50 -0.819145743 -0.037232249 -0.04063296 +#> 51 -1.564663610 -0.636464908 -1.85042764 +#> 52 -0.898058218 0.284764624 -0.34268128 +#> 53 1.805225599 -0.408926161 0.83297444 +#> 54 1.365198223 1.113118376 1.19018362 +#> 55 0.216231182 -0.115908105 0.10565315 +#> 56 0.641593299 1.738105080 2.21714184 +#> 57 0.213674643 0.168441446 -0.31953324 +#> 58 0.253013272 0.862077906 -0.22789514 +#> 59 0.287070174 -1.951729555 -0.83911657 +#> 60 -0.481794003 0.239976721 0.61024139 +#> 61 0.025132801 -0.628056375 -1.53689844 +#> 62 -0.167673231 -0.101515541 0.75379782 +#> 63 -2.425362627 -0.498203692 -1.06272155 +#> 64 1.350643220 -0.486047718 0.95719724 +#> 65 -1.242626738 2.175710297 0.92752367 +#> 66 -0.338836433 1.113640640 -0.15518463 +#> 67 -0.026841819 0.966138768 0.37921477 +#> 68 -0.453608406 0.413598279 0.23549706 +#> 69 -0.493723033 -1.334332953 -0.84630857 +#> 70 -1.221646818 -1.238780109 0.72514498 +#> 71 0.321048652 1.743009432 -0.49620029 +#> 72 -0.527149031 1.184322665 1.00890307 +#> 73 0.070016525 1.224225492 0.62085618 +#> 74 1.134493406 2.219461421 1.58638671 +#> 75 1.504749960 0.614156332 1.09643083 +#> 76 0.446530671 0.052052719 0.61180283 +#> 77 1.818863849 -0.302817368 0.04494236 +#> 78 -0.791160513 0.179908256 0.79171348 +#> 79 -0.300914823 -0.253138072 -0.53330713 +#> 80 0.926355041 1.873512137 0.35236855 +#> 81 0.144573957 1.043953709 -0.49422615 +#> 82 -0.027810676 0.325938318 -1.58919459 +#> 83 -2.100930284 1.066836105 -0.83668769 +#> 84 -0.444333571 -0.715766312 -0.85121894 +#> 85 -0.968875193 -1.687613536 -0.79915130 +#> 86 0.959440062 -1.088246737 1.55349882 +#> 87 -0.345251762 -1.686853560 0.65311349 +#> 88 0.572082195 -0.813048594 -1.24514160 +#> 89 -1.365521552 -1.373149134 -1.59569125 +#> 90 1.176097394 2.119269863 1.42040520 +#> 91 1.221033387 -0.577310879 -0.56619463 +#> 92 0.108998277 0.648942202 -0.11418245 +#> 93 0.101151686 0.111384203 0.73298987 +#> 94 0.586259342 1.453725946 1.13935521 +#> 95 0.153452555 1.348353614 -0.27134221 +#> 96 0.235523130 0.480340620 -0.05456858 +#> 97 0.785445485 1.876836130 -0.04612489 +#> 98 -0.198978331 -0.525346365 -0.12181418 +#> 99 0.629380511 -0.357590811 0.51006913 +#> 100 -2.276017019 -0.001639159 -1.01436632
    rnorm_multi(100, 3, 0, 1, c(1, 0.2, -0.5, 0.2, 1, 0.5, -0.5, 0.5, 1), varnames=c("A", "B", "C"))
    #> A B C +#> 1 0.98053619 1.397345080 1.24702308 +#> 2 0.28494917 -0.929721452 0.11231993 +#> 3 -0.48926432 -0.317158657 0.91387543 +#> 4 0.62746163 0.690257228 -0.04676857 +#> 5 0.52497027 -0.006770402 -0.54982277 +#> 6 -0.08113113 -1.203695112 -0.38591526 +#> 7 0.10950230 -0.448304595 -0.72749899 +#> 8 -0.98054763 -1.413085775 0.44500300 +#> 9 -0.22717869 0.588496717 1.47955055 +#> 10 -0.08167748 0.129810359 0.62297867 +#> 11 -1.06794579 1.622348860 2.17266535 +#> 12 1.23713517 1.254934760 0.21802500 +#> 13 2.34268548 -0.127564780 -1.84686336 +#> 14 -0.79906111 -0.670100344 -0.78681222 +#> 15 1.14762821 0.425896059 -0.19353105 +#> 16 -0.68792927 0.746387469 1.17342419 +#> 17 -0.42239464 -2.279769415 -1.45252677 +#> 18 -0.71983873 -0.782993158 -0.48339888 +#> 19 -0.27644209 -0.078365574 -0.15548499 +#> 20 0.65702277 -2.167247198 -2.28768427 +#> 21 1.36346504 2.807540375 0.61058213 +#> 22 2.26453757 -0.107450280 -1.03851429 +#> 23 1.07057295 1.036447960 0.77105891 +#> 24 -0.75535093 0.189251094 1.72491550 +#> 25 -0.68100214 -0.665149580 -0.04357655 +#> 26 -0.47017350 -0.314580812 -0.27934568 +#> 27 0.73476477 1.147051694 0.19216533 +#> 28 -0.09149070 0.876867711 -0.08339957 +#> 29 -2.05839553 -1.187120062 -0.84369069 +#> 30 -0.62779679 0.680592120 0.13823150 +#> 31 0.82896856 0.492073064 -0.10702775 +#> 32 1.12230254 0.029101542 -0.69550195 +#> 33 1.41109327 -0.547954608 -1.55760838 +#> 34 -1.28998785 -0.644308963 0.45818445 +#> 35 -0.74524292 0.423205419 2.23457494 +#> 36 0.18163205 0.475178614 0.81660170 +#> 37 -1.29214617 0.507173875 0.60586872 +#> 38 -0.52453660 -0.422831247 0.14219949 +#> 39 2.71082498 1.025301778 -0.77900169 +#> 40 0.67234061 -0.927892656 -0.94160161 +#> 41 -0.22932465 -0.452949466 -0.75393801 +#> 42 0.79662820 0.012013809 -0.66456985 +#> 43 -1.48598921 -1.485862845 -0.34075299 +#> 44 -2.73794234 -0.333565773 1.35801078 +#> 45 1.04730267 -0.239862856 -0.94911295 +#> 46 0.79570220 0.409234356 -0.02512850 +#> 47 -0.29797799 -0.960151155 0.32858669 +#> 48 1.32398457 0.086940658 -0.84502893 +#> 49 0.70645034 0.763352966 0.66553042 +#> 50 -1.19011584 -0.898273386 0.01265418 +#> 51 0.17313328 0.375553597 -0.01675357 +#> 52 1.04061188 -0.219130525 -0.77337525 +#> 53 -0.43883826 -0.042922039 1.27360295 +#> 54 0.67987637 -1.058120498 -0.27502560 +#> 55 -0.51468288 0.062507127 0.76999444 +#> 56 -0.80241295 -0.945210621 0.34889021 +#> 57 -0.47060134 -1.268959331 0.70686003 +#> 58 2.49959376 0.682354301 -1.46377249 +#> 59 -0.76711289 -0.641906992 0.26363487 +#> 60 0.84037060 -0.130252667 -0.61136080 +#> 61 -0.47759972 0.405378724 0.06637905 +#> 62 -1.35795459 0.303024258 1.33414293 +#> 63 -0.86120089 -1.445283701 0.98588190 +#> 64 0.65405904 -1.262137582 -1.45782179 +#> 65 -1.52487511 -1.185720620 -0.29528001 +#> 66 -0.60310623 0.543202595 0.16109147 +#> 67 -1.34640940 0.111464903 0.03001872 +#> 68 -1.20726090 -0.211677096 0.84891723 +#> 69 -0.07838011 0.191885581 0.98564804 +#> 70 1.13262307 0.785543693 -0.06059885 +#> 71 1.36490199 -0.252005899 -2.00968693 +#> 72 1.18092139 -0.345183489 -0.80804496 +#> 73 -1.50057211 -0.143484529 0.56351319 +#> 74 -0.67605220 -0.161655297 0.70612018 +#> 75 -0.39630979 -0.711680593 -0.01193046 +#> 76 -1.06729240 0.790110596 0.42953674 +#> 77 0.25135374 0.004217116 -1.46584325 +#> 78 -0.62606452 -0.500831364 0.35233621 +#> 79 0.22130814 -1.054521838 -0.13425086 +#> 80 -0.82249462 -0.106502250 1.12587401 +#> 81 1.13659660 0.694685870 -0.63335312 +#> 82 0.63912932 -0.346001998 -0.15632373 +#> 83 0.50735668 -0.121686372 -0.35290927 +#> 84 0.95787156 0.663581444 -0.85623386 +#> 85 -0.91227314 -1.085103286 0.84931769 +#> 86 -0.23904032 0.820166860 0.16160248 +#> 87 -0.71341295 -0.149361316 0.56590330 +#> 88 1.64368977 -1.175172449 -2.12371033 +#> 89 -0.35317370 0.697507425 -0.12301334 +#> 90 -1.24671366 -0.243868992 1.74687093 +#> 91 -0.64148189 0.722090091 1.47996273 +#> 92 0.22026898 1.174067582 1.19864762 +#> 93 -0.88325604 0.271478223 0.35463827 +#> 94 0.07313435 1.405022266 1.00772946 +#> 95 -1.47150701 0.560249257 1.03483886 +#> 96 -1.10044697 -1.187067944 0.66307881 +#> 97 -2.23696083 0.102519789 2.21376158 +#> 98 -0.92624102 -1.158465566 0.37674556 +#> 99 -1.39758231 0.256387690 1.30028559 +#> 100 -0.21576305 -0.512530549 -0.64474625
    +
    @@ -85,9 +85,6 @@ @@ -129,7 +126,7 @@

    Correlated Normal Vector

    -
    rnorm_pre(x, rho = 0, ymean = 0, ysd = 1)
    +
    rnorm_pre(x, mu = 0, sd = 1, r = 0)

    Arguments

    @@ -139,17 +136,17 @@

    Arg

    - - - - - + - + + + + +

    the existing vector

    rho

    desired correlation between existing and returned vectors

    ymeanmu

    desired mean of returned vector

    ysdsd

    desired SD of returned vector

    r

    desired correlation between existing and returned vectors

    Value

    @@ -159,7 +156,7 @@

    Value

    Examples

    v1 <- rnorm(10) -v2 <- rnorm_pre(v1, 0.5, 0, 1) +v2 <- rnorm_pre(v1, 0, 1, 0.5) cor(v1, v2)
    #> [1] 0.5
    @@ -85,9 +85,6 @@ @@ -129,17 +126,17 @@

    Select grouping and numeric columns and group

    -
    select_num_grp(dat, grp_by = NULL, cols = NULL)
    +
    select_num_grp(.data, between = c(), cols = NULL)

    Arguments

    - - + + - + @@ -150,25 +147,25 @@

    Arg

    Value

    -

    tibble

    +

    a tbl

    Examples

    -
    select_num_grp(iris, "Species")
    #> # A tibble: 150 x 5 -#> # Groups: Species [3] +
    select_num_grp(iris, "Species")
    #> # A tibble: 150 x 5 +#> # Groups: Species [3] #> Species Sepal.Length Sepal.Width Petal.Length Petal.Width -#> <fct> <dbl> <dbl> <dbl> <dbl> -#> 1 setosa 5.1 3.5 1.4 0.2 -#> 2 setosa 4.9 3 1.4 0.2 -#> 3 setosa 4.7 3.2 1.3 0.2 -#> 4 setosa 4.6 3.1 1.5 0.2 -#> 5 setosa 5 3.6 1.4 0.2 -#> 6 setosa 5.4 3.9 1.7 0.4 -#> 7 setosa 4.6 3.4 1.4 0.3 -#> 8 setosa 5 3.4 1.5 0.2 -#> 9 setosa 4.4 2.9 1.4 0.2 -#> 10 setosa 4.9 3.1 1.5 0.1 -#> # … with 140 more rows
    +#>
    <fct> <dbl> <dbl> <dbl> <dbl> +#> 1 setosa 5.1 3.5 1.4 0.2 +#> 2 setosa 4.9 3 1.4 0.2 +#> 3 setosa 4.7 3.2 1.3 0.2 +#> 4 setosa 4.6 3.1 1.5 0.2 +#> 5 setosa 5 3.6 1.4 0.2 +#> 6 setosa 5.4 3.9 1.7 0.4 +#> 7 setosa 4.6 3.4 1.4 0.3 +#> 8 setosa 5 3.4 1.5 0.2 +#> 9 setosa 4.4 2.9 1.4 0.2 +#> 10 setosa 4.9 3.1 1.5 0.1 +#> # … with 140 more rows
    @@ -85,9 +85,6 @@ @@ -125,12 +122,12 @@

    Simulate Data from Design

    -

    sim_design generates a dataframe with a specified within and between design

    +

    sim_design() generates a data table with a specified within and between design.

    -
    sim_design(within = list(), between = list(), n = 100, cors = 0,
    -  mu = 0, sd = 1, empirical = FALSE, frame_long = FALSE)
    +
    sim_design(within = list(), between = list(), n = 100, mu = 0,
    +  sd = 1, r = 0, empirical = FALSE, long = FALSE)

    Arguments

    dat

    the existing dataframe

    .data

    the existing tbl

    grp_bybetween

    an optional list of column names to group by

    @@ -147,10 +144,6 @@

    Arg

    - - - - @@ -159,19 +152,23 @@

    Arg

    + + + + - + - - + +
    n

    the number of samples required

    cors

    the correlations among the variables (can be a single number, vars\*vars matrix, vars\*vars vector, or a vars\*(vars-1)/2 vector)

    mu

    a vector giving the means of the variables (numeric vector of length 1 or vars)

    sd

    the standard deviations of the variables (numeric vector of length 1 or vars)

    r

    the correlations among the variables (can be a single number, vars\*vars matrix, vars\*vars vector, or a vars\*(vars-1)/2 vector)

    empirical

    logical. If true, mu, sd and cors specify the empirical not population mean, sd and covariance

    logical. If true, mu, sd and r specify the empirical not population mean, sd and covariance

    frame_long

    Whether the returned dataframe is in wide (default = FALSE) or long (TRUE) format

    long

    Whether the returned tbl is in wide (default = FALSE) or long (TRUE) format

    Value

    -

    dataframe

    +

    a tbl

    diff --git a/docs/reference/sim_design_.html b/docs/reference/sim_design_.html index 5b6d3a0e..997e7e70 100644 --- a/docs/reference/sim_design_.html +++ b/docs/reference/sim_design_.html @@ -32,7 +32,7 @@ - + @@ -63,7 +63,7 @@ faux - 0.0.0.9005 + 0.0.0.9006 @@ -85,9 +85,6 @@ @@ -125,11 +122,11 @@

    Simulate Data from Design

    -

    sim_from_design generates a dataframe with a specified design

    +

    sim_design_ generates a data table with a specified design

    -
    sim_design_(design, empirical = FALSE, frame_long = FALSE)
    +
    sim_design_(design, empirical = FALSE, long = FALSE)

    Arguments

    @@ -140,17 +137,17 @@

    Arg

    - + - - + +
    empirical

    logical. If true, mu, sd and cors specify the empirical not population mean, sd and covariance

    logical. If true, mu, sd and r specify the empirical not population mean, sd and covariance

    frame_long

    Whether the returned dataframe is in wide (default = FALSE) or long (TRUE) format

    long

    Whether the returned tbl is in wide (default = FALSE) or long (TRUE) format

    Value

    -

    dataframe

    +

    a tbl

    diff --git a/docs/reference/sim_df.html b/docs/reference/sim_df.html new file mode 100644 index 00000000..e3ea6eb6 --- /dev/null +++ b/docs/reference/sim_df.html @@ -0,0 +1,194 @@ + + + + + + + + +Simulate an existing dataframe — sim_df • faux + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + +
    + +
    +
    + + +
    + +

    sim_df Produces a data table with the same distributions and correlations as an existing data table Only returns numeric columns and simulates all numeric variables from a continuous normal distribution (for now).

    + +
    + +
    sim_df(.data, n = 100, between = c(), empirical = FALSE,
    +  grp_by = NULL)
    + +

    Arguments

    + + + + + + + + + + + + + + + + + + + + + + +
    .data

    the existing tbl (must be in wide format)

    n

    the number of samples to return per group

    between

    a list of the between-subject columns

    empirical

    logical. Passed on to rnorm_multi

    grp_by

    (deprecated; use between)

    + +

    Value

    + +

    a tbl

    + + +

    Examples

    +
    iris100 <- sim_df(iris, 100) +iris_species <- sim_df(iris, 100, between = "Species")
    +
    + +
    + +
    + + +
    +

    Site built with pkgdown 1.3.0.

    +
    +
    +
    + + + + + + diff --git a/docs/reference/sim_mixed_df.html b/docs/reference/sim_mixed_df.html new file mode 100644 index 00000000..36852235 --- /dev/null +++ b/docs/reference/sim_mixed_df.html @@ -0,0 +1,297 @@ + + + + + + + + +Generate a sample with random intercepts for subjects and items — sim_mixed_df • faux + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + +
    + +
    +
    + + +
    + +

    sim_mixed_df() produces a data table with the same distributions of by-subject and by-item random intercepts as an existing data table.

    + +
    + +
    sim_mixed_df(.data, sub_n = 100, item_n = 25, dv = 1, sub_id = 2,
    +  item_id = 3)
    + +

    Arguments

    + + + + + + + + + + + + + + + + + + + + + + + + + + +
    .data

    the existing tbl

    sub_n

    the number of subjects to simulate

    item_n

    the number of items to simulate

    dv

    the column name or index containing the DV

    sub_id

    the column name or index for the subject IDs

    item_id

    the column name or index for the item IDs

    + +

    Value

    + +

    a tbl

    + + +

    Examples

    +
    sim_mixed_df(faceratings, 10, 10, "rating", "rater_id", "face_id")
    #> sub_id item_id sub_i item_i dv +#> 1 1 1 0.15336871 0.1152507 3.4263347 +#> 2 2 1 -0.07546968 0.1152507 2.6552210 +#> 3 3 1 -0.09464565 0.1152507 2.5473771 +#> 4 4 1 0.13680669 0.1152507 4.5736075 +#> 5 5 1 0.46710613 0.1152507 4.6470300 +#> 6 6 1 0.71713231 0.1152507 5.0470723 +#> 7 7 1 -0.16377959 0.1152507 3.0286129 +#> 8 8 1 -0.03648029 0.1152507 3.0203508 +#> 9 9 1 -0.44490238 0.1152507 2.4007496 +#> 10 10 1 -0.50294582 0.1152507 1.4482325 +#> 11 1 2 0.15336871 0.9694343 4.2646276 +#> 12 2 2 -0.07546968 0.9694343 3.5117228 +#> 13 3 2 -0.09464565 0.9694343 1.9205807 +#> 14 4 2 0.13680669 0.9694343 3.3332175 +#> 15 5 2 0.46710613 0.9694343 4.1037056 +#> 16 6 2 0.71713231 0.9694343 4.8340290 +#> 17 7 2 -0.16377959 0.9694343 4.3931885 +#> 18 8 2 -0.03648029 0.9694343 3.5224828 +#> 19 9 2 -0.44490238 0.9694343 2.7403387 +#> 20 10 2 -0.50294582 0.9694343 4.9209943 +#> 21 1 3 0.15336871 -0.1809222 2.3955324 +#> 22 2 3 -0.07546968 -0.1809222 5.0649247 +#> 23 3 3 -0.09464565 -0.1809222 3.0032075 +#> 24 4 3 0.13680669 -0.1809222 2.4135067 +#> 25 5 3 0.46710613 -0.1809222 4.5656069 +#> 26 6 3 0.71713231 -0.1809222 2.8650467 +#> 27 7 3 -0.16377959 -0.1809222 3.6126806 +#> 28 8 3 -0.03648029 -0.1809222 0.9722492 +#> 29 9 3 -0.44490238 -0.1809222 0.6125408 +#> 30 10 3 -0.50294582 -0.1809222 3.8909402 +#> 31 1 4 0.15336871 1.0979794 5.9448229 +#> 32 2 4 -0.07546968 1.0979794 3.9574235 +#> 33 3 4 -0.09464565 1.0979794 4.8079004 +#> 34 4 4 0.13680669 1.0979794 5.0826263 +#> 35 5 4 0.46710613 1.0979794 3.3817108 +#> 36 6 4 0.71713231 1.0979794 3.2445244 +#> 37 7 4 -0.16377959 1.0979794 4.2746244 +#> 38 8 4 -0.03648029 1.0979794 4.7967650 +#> 39 9 4 -0.44490238 1.0979794 4.3737837 +#> 40 10 4 -0.50294582 1.0979794 4.2923638 +#> 41 1 5 0.15336871 0.2255086 5.9170173 +#> 42 2 5 -0.07546968 0.2255086 2.7538325 +#> 43 3 5 -0.09464565 0.2255086 3.1842397 +#> 44 4 5 0.13680669 0.2255086 4.2260520 +#> 45 5 5 0.46710613 0.2255086 1.5543152 +#> 46 6 5 0.71713231 0.2255086 3.9556215 +#> 47 7 5 -0.16377959 0.2255086 3.8447343 +#> 48 8 5 -0.03648029 0.2255086 4.5385200 +#> 49 9 5 -0.44490238 0.2255086 2.6456965 +#> 50 10 5 -0.50294582 0.2255086 2.2025837 +#> 51 1 6 0.15336871 -0.5688417 3.5177725 +#> 52 2 6 -0.07546968 -0.5688417 2.9838297 +#> 53 3 6 -0.09464565 -0.5688417 3.5923231 +#> 54 4 6 0.13680669 -0.5688417 3.1022826 +#> 55 5 6 0.46710613 -0.5688417 2.7535176 +#> 56 6 6 0.71713231 -0.5688417 5.4940446 +#> 57 7 6 -0.16377959 -0.5688417 2.5234260 +#> 58 8 6 -0.03648029 -0.5688417 0.3511046 +#> 59 9 6 -0.44490238 -0.5688417 1.8498874 +#> 60 10 6 -0.50294582 -0.5688417 3.5379786 +#> 61 1 7 0.15336871 -0.1003512 4.2409599 +#> 62 2 7 -0.07546968 -0.1003512 3.5556705 +#> 63 3 7 -0.09464565 -0.1003512 2.6899611 +#> 64 4 7 0.13680669 -0.1003512 2.0439883 +#> 65 5 7 0.46710613 -0.1003512 5.0354057 +#> 66 6 7 0.71713231 -0.1003512 3.5521496 +#> 67 7 7 -0.16377959 -0.1003512 2.3525066 +#> 68 8 7 -0.03648029 -0.1003512 3.5415480 +#> 69 9 7 -0.44490238 -0.1003512 4.1344729 +#> 70 10 7 -0.50294582 -0.1003512 2.3269392 +#> 71 1 8 0.15336871 1.6989795 6.4250556 +#> 72 2 8 -0.07546968 1.6989795 5.7450023 +#> 73 3 8 -0.09464565 1.6989795 3.8872425 +#> 74 4 8 0.13680669 1.6989795 4.5973502 +#> 75 5 8 0.46710613 1.6989795 5.7349937 +#> 76 6 8 0.71713231 1.6989795 2.6852695 +#> 77 7 8 -0.16377959 1.6989795 3.7919020 +#> 78 8 8 -0.03648029 1.6989795 4.7810577 +#> 79 9 8 -0.44490238 1.6989795 5.1999969 +#> 80 10 8 -0.50294582 1.6989795 5.4375538 +#> 81 1 9 0.15336871 1.2452224 5.3180720 +#> 82 2 9 -0.07546968 1.2452224 2.7003936 +#> 83 3 9 -0.09464565 1.2452224 4.3759073 +#> 84 4 9 0.13680669 1.2452224 6.5338902 +#> 85 5 9 0.46710613 1.2452224 3.8544295 +#> 86 6 9 0.71713231 1.2452224 5.2503336 +#> 87 7 9 -0.16377959 1.2452224 4.6527757 +#> 88 8 9 -0.03648029 1.2452224 3.6883031 +#> 89 9 9 -0.44490238 1.2452224 2.8378979 +#> 90 10 9 -0.50294582 1.2452224 4.8626384 +#> 91 1 10 0.15336871 -0.2064970 4.7460439 +#> 92 2 10 -0.07546968 -0.2064970 1.8389187 +#> 93 3 10 -0.09464565 -0.2064970 1.3021962 +#> 94 4 10 0.13680669 -0.2064970 2.4226626 +#> 95 5 10 0.46710613 -0.2064970 3.0711292 +#> 96 6 10 0.71713231 -0.2064970 2.2198113 +#> 97 7 10 -0.16377959 -0.2064970 0.9575364 +#> 98 8 10 -0.03648029 -0.2064970 2.6135275 +#> 99 9 10 -0.44490238 -0.2064970 1.2924662 +#> 100 10 10 -0.50294582 -0.2064970 0.8947152
    +
    + +
    + +
    + + +
    +

    Site built with pkgdown 1.3.0.

    +
    +
    +
    + + + + + + diff --git a/docs/reference/simdf_mixed.html b/docs/reference/simdf_mixed.html index 1515559f..2dfb5c19 100644 --- a/docs/reference/simdf_mixed.html +++ b/docs/reference/simdf_mixed.html @@ -85,9 +85,6 @@ @@ -167,107 +164,107 @@

    Value

    Examples

    -
    simdf_mixed(faceratings, 10, 10, "rating", "rater_id", "face_id")
    #> sub_id item_id sub_i item_i dv -#> 1 1 1 0.7171917 -0.16440417 2.7076926 -#> 2 2 1 1.5872296 -0.16440417 4.0497225 -#> 3 3 1 -0.7289360 -0.16440417 2.8124829 -#> 4 4 1 0.6003483 -0.16440417 1.6454961 -#> 5 5 1 0.5163209 -0.16440417 2.8399589 -#> 6 6 1 0.2563833 -0.16440417 3.0883100 -#> 7 7 1 -0.7781445 -0.16440417 3.0922434 -#> 8 8 1 1.1424562 -0.16440417 3.7438529 -#> 9 9 1 0.1711736 -0.16440417 1.9164980 -#> 10 10 1 1.0828005 -0.16440417 2.1094020 -#> 11 1 2 0.7171917 0.37636537 6.5942167 -#> 12 2 2 1.5872296 0.37636537 3.8486785 -#> 13 3 2 -0.7289360 0.37636537 3.4411992 -#> 14 4 2 0.6003483 0.37636537 5.3235771 -#> 15 5 2 0.5163209 0.37636537 4.2771207 -#> 16 6 2 0.2563833 0.37636537 3.3479907 -#> 17 7 2 -0.7781445 0.37636537 2.7002602 -#> 18 8 2 1.1424562 0.37636537 4.1520841 -#> 19 9 2 0.1711736 0.37636537 5.1249236 -#> 20 10 2 1.0828005 0.37636537 4.4809776 -#> 21 1 3 0.7171917 -0.82422093 3.2861012 -#> 22 2 3 1.5872296 -0.82422093 2.8786693 -#> 23 3 3 -0.7289360 -0.82422093 1.9902052 -#> 24 4 3 0.6003483 -0.82422093 2.0458923 -#> 25 5 3 0.5163209 -0.82422093 1.5216236 -#> 26 6 3 0.2563833 -0.82422093 3.3659935 -#> 27 7 3 -0.7781445 -0.82422093 2.9983831 -#> 28 8 3 1.1424562 -0.82422093 3.4864340 -#> 29 9 3 0.1711736 -0.82422093 2.7199936 -#> 30 10 3 1.0828005 -0.82422093 4.6086848 -#> 31 1 4 0.7171917 -0.91128136 1.6789966 -#> 32 2 4 1.5872296 -0.91128136 5.6968101 -#> 33 3 4 -0.7289360 -0.91128136 -1.0487718 -#> 34 4 4 0.6003483 -0.91128136 2.0597989 -#> 35 5 4 0.5163209 -0.91128136 2.3732315 -#> 36 6 4 0.2563833 -0.91128136 4.2745678 -#> 37 7 4 -0.7781445 -0.91128136 1.8469632 -#> 38 8 4 1.1424562 -0.91128136 4.1014591 -#> 39 9 4 0.1711736 -0.91128136 1.0318854 -#> 40 10 4 1.0828005 -0.91128136 4.2240532 -#> 41 1 5 0.7171917 1.66678147 5.0273318 -#> 42 2 5 1.5872296 1.66678147 3.5821303 -#> 43 3 5 -0.7289360 1.66678147 5.4538830 -#> 44 4 5 0.6003483 1.66678147 7.9807309 -#> 45 5 5 0.5163209 1.66678147 3.7473337 -#> 46 6 5 0.2563833 1.66678147 7.1439931 -#> 47 7 5 -0.7781445 1.66678147 4.8426279 -#> 48 8 5 1.1424562 1.66678147 6.6205912 -#> 49 9 5 0.1711736 1.66678147 6.7093920 -#> 50 10 5 1.0828005 1.66678147 4.3002833 -#> 51 1 6 0.7171917 -0.32253158 3.5070761 -#> 52 2 6 1.5872296 -0.32253158 4.5057448 -#> 53 3 6 -0.7289360 -0.32253158 0.9024044 -#> 54 4 6 0.6003483 -0.32253158 3.9435972 -#> 55 5 6 0.5163209 -0.32253158 4.3432880 -#> 56 6 6 0.2563833 -0.32253158 1.5616119 -#> 57 7 6 -0.7781445 -0.32253158 1.5701492 -#> 58 8 6 1.1424562 -0.32253158 1.8015241 -#> 59 9 6 0.1711736 -0.32253158 2.9668905 -#> 60 10 6 1.0828005 -0.32253158 4.5541003 -#> 61 1 7 0.7171917 0.06766548 3.6839723 -#> 62 2 7 1.5872296 0.06766548 3.5298722 -#> 63 3 7 -0.7289360 0.06766548 1.3879482 -#> 64 4 7 0.6003483 0.06766548 3.0134067 -#> 65 5 7 0.5163209 0.06766548 3.8805034 -#> 66 6 7 0.2563833 0.06766548 2.8650564 -#> 67 7 7 -0.7781445 0.06766548 3.9055132 -#> 68 8 7 1.1424562 0.06766548 5.1346467 -#> 69 9 7 0.1711736 0.06766548 2.2637057 -#> 70 10 7 1.0828005 0.06766548 3.8728360 -#> 71 1 8 0.7171917 -0.41971766 4.1334290 -#> 72 2 8 1.5872296 -0.41971766 4.1028115 -#> 73 3 8 -0.7289360 -0.41971766 2.0302724 -#> 74 4 8 0.6003483 -0.41971766 2.8500859 -#> 75 5 8 0.5163209 -0.41971766 1.2627937 -#> 76 6 8 0.2563833 -0.41971766 2.0276961 -#> 77 7 8 -0.7781445 -0.41971766 0.9953581 -#> 78 8 8 1.1424562 -0.41971766 4.0599506 -#> 79 9 8 0.1711736 -0.41971766 3.1317760 -#> 80 10 8 1.0828005 -0.41971766 3.4296845 -#> 81 1 9 0.7171917 -0.09253167 2.7561118 -#> 82 2 9 1.5872296 -0.09253167 4.7486546 -#> 83 3 9 -0.7289360 -0.09253167 0.9585535 -#> 84 4 9 0.6003483 -0.09253167 3.6791317 -#> 85 5 9 0.5163209 -0.09253167 2.7224103 -#> 86 6 9 0.2563833 -0.09253167 2.6014336 -#> 87 7 9 -0.7781445 -0.09253167 0.6113194 -#> 88 8 9 1.1424562 -0.09253167 5.3929322 -#> 89 9 9 0.1711736 -0.09253167 3.5604510 -#> 90 10 9 1.0828005 -0.09253167 3.8776255 -#> 91 1 10 0.7171917 1.38999715 5.8347359 -#> 92 2 10 1.5872296 1.38999715 6.5293892 -#> 93 3 10 -0.7289360 1.38999715 4.4470914 -#> 94 4 10 0.6003483 1.38999715 5.8856564 -#> 95 5 10 0.5163209 1.38999715 4.7294257 -#> 96 6 10 0.2563833 1.38999715 5.8323700 -#> 97 7 10 -0.7781445 1.38999715 3.5409873 -#> 98 8 10 1.1424562 1.38999715 6.7181566 -#> 99 9 10 0.1711736 1.38999715 4.5778140 -#> 100 10 10 1.0828005 1.38999715 4.5562784
    +
    simdf_mixed(faceratings, 10, 10, "rating", "rater_id", "face_id")
    #> sub_id item_id sub_i item_i dv +#> 1 1 1 0.15336871 0.1152507 3.4263347 +#> 2 2 1 -0.07546968 0.1152507 2.6552210 +#> 3 3 1 -0.09464565 0.1152507 2.5473771 +#> 4 4 1 0.13680669 0.1152507 4.5736075 +#> 5 5 1 0.46710613 0.1152507 4.6470300 +#> 6 6 1 0.71713231 0.1152507 5.0470723 +#> 7 7 1 -0.16377959 0.1152507 3.0286129 +#> 8 8 1 -0.03648029 0.1152507 3.0203508 +#> 9 9 1 -0.44490238 0.1152507 2.4007496 +#> 10 10 1 -0.50294582 0.1152507 1.4482325 +#> 11 1 2 0.15336871 0.9694343 4.2646276 +#> 12 2 2 -0.07546968 0.9694343 3.5117228 +#> 13 3 2 -0.09464565 0.9694343 1.9205807 +#> 14 4 2 0.13680669 0.9694343 3.3332175 +#> 15 5 2 0.46710613 0.9694343 4.1037056 +#> 16 6 2 0.71713231 0.9694343 4.8340290 +#> 17 7 2 -0.16377959 0.9694343 4.3931885 +#> 18 8 2 -0.03648029 0.9694343 3.5224828 +#> 19 9 2 -0.44490238 0.9694343 2.7403387 +#> 20 10 2 -0.50294582 0.9694343 4.9209943 +#> 21 1 3 0.15336871 -0.1809222 2.3955324 +#> 22 2 3 -0.07546968 -0.1809222 5.0649247 +#> 23 3 3 -0.09464565 -0.1809222 3.0032075 +#> 24 4 3 0.13680669 -0.1809222 2.4135067 +#> 25 5 3 0.46710613 -0.1809222 4.5656069 +#> 26 6 3 0.71713231 -0.1809222 2.8650467 +#> 27 7 3 -0.16377959 -0.1809222 3.6126806 +#> 28 8 3 -0.03648029 -0.1809222 0.9722492 +#> 29 9 3 -0.44490238 -0.1809222 0.6125408 +#> 30 10 3 -0.50294582 -0.1809222 3.8909402 +#> 31 1 4 0.15336871 1.0979794 5.9448229 +#> 32 2 4 -0.07546968 1.0979794 3.9574235 +#> 33 3 4 -0.09464565 1.0979794 4.8079004 +#> 34 4 4 0.13680669 1.0979794 5.0826263 +#> 35 5 4 0.46710613 1.0979794 3.3817108 +#> 36 6 4 0.71713231 1.0979794 3.2445244 +#> 37 7 4 -0.16377959 1.0979794 4.2746244 +#> 38 8 4 -0.03648029 1.0979794 4.7967650 +#> 39 9 4 -0.44490238 1.0979794 4.3737837 +#> 40 10 4 -0.50294582 1.0979794 4.2923638 +#> 41 1 5 0.15336871 0.2255086 5.9170173 +#> 42 2 5 -0.07546968 0.2255086 2.7538325 +#> 43 3 5 -0.09464565 0.2255086 3.1842397 +#> 44 4 5 0.13680669 0.2255086 4.2260520 +#> 45 5 5 0.46710613 0.2255086 1.5543152 +#> 46 6 5 0.71713231 0.2255086 3.9556215 +#> 47 7 5 -0.16377959 0.2255086 3.8447343 +#> 48 8 5 -0.03648029 0.2255086 4.5385200 +#> 49 9 5 -0.44490238 0.2255086 2.6456965 +#> 50 10 5 -0.50294582 0.2255086 2.2025837 +#> 51 1 6 0.15336871 -0.5688417 3.5177725 +#> 52 2 6 -0.07546968 -0.5688417 2.9838297 +#> 53 3 6 -0.09464565 -0.5688417 3.5923231 +#> 54 4 6 0.13680669 -0.5688417 3.1022826 +#> 55 5 6 0.46710613 -0.5688417 2.7535176 +#> 56 6 6 0.71713231 -0.5688417 5.4940446 +#> 57 7 6 -0.16377959 -0.5688417 2.5234260 +#> 58 8 6 -0.03648029 -0.5688417 0.3511046 +#> 59 9 6 -0.44490238 -0.5688417 1.8498874 +#> 60 10 6 -0.50294582 -0.5688417 3.5379786 +#> 61 1 7 0.15336871 -0.1003512 4.2409599 +#> 62 2 7 -0.07546968 -0.1003512 3.5556705 +#> 63 3 7 -0.09464565 -0.1003512 2.6899611 +#> 64 4 7 0.13680669 -0.1003512 2.0439883 +#> 65 5 7 0.46710613 -0.1003512 5.0354057 +#> 66 6 7 0.71713231 -0.1003512 3.5521496 +#> 67 7 7 -0.16377959 -0.1003512 2.3525066 +#> 68 8 7 -0.03648029 -0.1003512 3.5415480 +#> 69 9 7 -0.44490238 -0.1003512 4.1344729 +#> 70 10 7 -0.50294582 -0.1003512 2.3269392 +#> 71 1 8 0.15336871 1.6989795 6.4250556 +#> 72 2 8 -0.07546968 1.6989795 5.7450023 +#> 73 3 8 -0.09464565 1.6989795 3.8872425 +#> 74 4 8 0.13680669 1.6989795 4.5973502 +#> 75 5 8 0.46710613 1.6989795 5.7349937 +#> 76 6 8 0.71713231 1.6989795 2.6852695 +#> 77 7 8 -0.16377959 1.6989795 3.7919020 +#> 78 8 8 -0.03648029 1.6989795 4.7810577 +#> 79 9 8 -0.44490238 1.6989795 5.1999969 +#> 80 10 8 -0.50294582 1.6989795 5.4375538 +#> 81 1 9 0.15336871 1.2452224 5.3180720 +#> 82 2 9 -0.07546968 1.2452224 2.7003936 +#> 83 3 9 -0.09464565 1.2452224 4.3759073 +#> 84 4 9 0.13680669 1.2452224 6.5338902 +#> 85 5 9 0.46710613 1.2452224 3.8544295 +#> 86 6 9 0.71713231 1.2452224 5.2503336 +#> 87 7 9 -0.16377959 1.2452224 4.6527757 +#> 88 8 9 -0.03648029 1.2452224 3.6883031 +#> 89 9 9 -0.44490238 1.2452224 2.8378979 +#> 90 10 9 -0.50294582 1.2452224 4.8626384 +#> 91 1 10 0.15336871 -0.2064970 4.7460439 +#> 92 2 10 -0.07546968 -0.2064970 1.8389187 +#> 93 3 10 -0.09464565 -0.2064970 1.3021962 +#> 94 4 10 0.13680669 -0.2064970 2.4226626 +#> 95 5 10 0.46710613 -0.2064970 3.0711292 +#> 96 6 10 0.71713231 -0.2064970 2.2198113 +#> 97 7 10 -0.16377959 -0.2064970 0.9575364 +#> 98 8 10 -0.03648029 -0.2064970 2.6135275 +#> 99 9 10 -0.44490238 -0.2064970 1.2924662 +#> 100 10 10 -0.50294582 -0.2064970 0.8947152