Skip to content

Commit

Permalink
Merge pull request #4 from debruine/dev
Browse files Browse the repository at this point in the history
Cleaned up documentation and arguments
  • Loading branch information
debruine authored May 3, 2019
2 parents 42bbc5a + 74d8d42 commit ab48367
Show file tree
Hide file tree
Showing 88 changed files with 3,993 additions and 2,235 deletions.
8 changes: 4 additions & 4 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Package: faux
Title: Simulation Functions
Version: 0.0.0.9005
Date: 2019-04-29
Authors@R: person("Lisa", "DeBruine", email = "[email protected]", role = c("aut", "cre"))
Version: 0.0.0.9006
Date: 2019-05-02
Authors@R: person(given = "Lisa", family = "DeBruine", role = c("aut", "cre"), email = "[email protected]", comment = c(ORCID = "0000-0002-7523-5539"))
Description: Provides functions for simulating multiple variables with specified relationships.
Depends:
R (>= 3.2.4)
Expand All @@ -19,7 +19,7 @@ Imports:
ggplot2
License: MIT + file LICENSE
Suggests:
testthat,
testthat (>= 2.1.0),
knitr,
rmarkdown
VignetteBuilder: knitr
Expand Down
8 changes: 6 additions & 2 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,16 @@ export(check_design)
export(check_sim_stats)
export(cormat)
export(cormat_from_triangle)
export(get_design_long)
export(is_pos_def)
export(long2wide)
export(make_id)
export(pos_def_limits)
export(rnorm_multi)
export(rnorm_pre)
export(select_num_grp)
export(sim_design)
export(simdf)
export(simdf_mixed)
export(sim_df)
export(sim_mixed_df)
export(wide2long)
importFrom(magrittr,"%>%")
7 changes: 6 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,9 @@

# faux 0.0.0.9005

* Bug fixes for `sim_design()` (failed when within or between factor number was 0)
* Bug fixes for `sim_design()` (failed when within or between factor number was 0)

# faux 0.0.0.9006

* Changes to argument order and names (more consistent, but may break old scripts)
* Updated vignettes
323 changes: 323 additions & 0 deletions R/check_design.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,323 @@
#' Validate design
#'
#' \code{check_design} validates the specified within and between design
#'
#' @param within a list of the within-subject factors
#' @param between a list of the between-subject factors
#' @param n the number of samples required
#' @param mu a vector giving the means of the variables (numeric vector of length 1 or vars)
#' @param sd the standard deviations of the variables (numeric vector of length 1 or vars)
#' @param r the correlations among the variables (can be a single number, vars\*vars matrix, vars\*vars vector, or a vars\*(vars-1)/2 vector)
#'
#' @return list
#'
#' @examples
#'
#' within <- list(time = c("day", "night"))
#' between <- list(pet = c("dog", "cat"))
#' check_design(within, between)
#'
#' @export
#'
check_design <- function(within = list(), between = list(),
n = 100, mu = 0, sd = 1, r = 0) {
# name anonymous factors
if (is.numeric(within) && within %in% 2:10 %>% mean() == 1) { # vector of level numbers
within_names <- LETTERS[1:length(within)]
within <- purrr::map2(within_names, within, ~paste0(.x, 1:.y))
names(within) <- within_names
}
if (is.numeric(between) && between %in% 2:10 %>% mean() == 1) { # vector of level numbers
between_names <- LETTERS[(length(within)+1):(length(within)+length(between))]
between <- purrr::map2(between_names, between, ~paste0(.x, 1:.y))
names(between) <- between_names
}

# check factor specification
if (!is.list(within) || !is.list(between)) {
stop("within and between must be lists")
} else if (length(within) == 0 && length(between) == 0) {
stop("You must specify at least one factor")
}

# if within or between factors are named vectors,
# use their names as column names and values as labels for plots
between_labels <- purrr::map(between, fix_name_labels)
between <- lapply(between_labels, names)
within_labels <- purrr::map(within, fix_name_labels)
within <- lapply(within_labels, names)

within_factors <- names(within)
between_factors <- names(between)

# handle no w/in or btwn
if (length(between_factors) == 0) between_factors <- ".tmpvar."
if (length(within_factors) == 0) within_factors <- ".tmpvar."

# check for duplicate factor names
factor_overlap <- intersect(within_factors, between_factors)
if (length(factor_overlap)) {
stop("You have multiple factors with the same name (",
paste(factor_overlap, collapse = ", "),
"). Please give all factors unique names.")
}

# check for duplicate level names within any factor
dupes <- c(within, between) %>%
lapply(duplicated) %>%
lapply(sum) %>%
lapply(as.logical) %>%
unlist()

if (sum(dupes)) {
dupelevels <- c(within, between) %>%
names() %>%
magrittr::extract(dupes) %>%
paste(collapse = ", ")
stop("You have duplicate levels for factor(s): ", dupelevels)
}

# define columns
if (length(within) == 0) {
cells_w = "val"
} else {
cells_w <- do.call(expand.grid, within) %>%
tidyr::unite("b", 1:ncol(.)) %>% dplyr::pull("b")
}
if (length(between) == 0) {
cells_b = ".tmpvar."
} else {
cells_b <- do.call(expand.grid, between) %>%
tidyr::unite("b", 1:ncol(.)) %>% dplyr::pull("b")
}

# convert n, mu and sd from vector/list formats
cell_n <- convert_param(n, cells_w, cells_b, "Ns")
cell_mu <- convert_param(mu, cells_w, cells_b, "means")
cell_sd <- convert_param(sd, cells_w, cells_b, "SDs")

# figure out number of subjects and their IDs
sub_n <- sum(cell_n[1,])
sub_id <- make_id(sub_n)

# set up cell correlations from r (number, vector, matrix or list styles)
cell_r <- list()
if (length(within)) {
for (cell in cells_b) {
cell_cor <- if(is.list(r)) r[[cell]] else r
cell_r[[cell]] <- cormat(cell_cor, length(cells_w))
}
}

list(
within = within,
between = between,
within_factors = within_factors,
between_factors = between_factors,
within_labels = within_labels,
between_labels = between_labels,
cells_w = cells_w,
cells_b = cells_b,
cell_n = cell_n,
cell_mu = cell_mu,
cell_sd = cell_sd,
cell_r = cell_r,
sub_id = sub_id
)
}

#' Convert parameter
#'
#' Converts parameter specification from vector or list format
#'
#' @param param the parameter (mu, sd, or n)
#' @param cells_w a list of within-subject cells combinations
#' @param cells_b a list of between-subject cell combinations
#' @param type the name of the parameter (for error messages)
#'
#' @return a data frame
#' @keywords internal
#'
convert_param <- function (param, cells_w, cells_b, type = "this parameter") {
w_n <- length(cells_w)
b_n <- length(cells_b)
all_n <- b_n*w_n

if (is.data.frame(param)) { # convert to list first
# check for row/column confusion
cols_are_b <- setdiff(names(param), cells_b) %>% length() == 0
rows_are_w <- setdiff(rownames(param), cells_w) %>% length() == 0
cols_are_w <- setdiff(names(param), cells_w) %>% length() == 0
rows_are_b <- setdiff(rownames(param), cells_b) %>% length() == 0
if (cols_are_b && rows_are_w) {
# check this first in case rows and cols are the same labels
param <- as.list(param) %>% lapply(magrittr::set_names, rownames(param))
} else if (cols_are_w && rows_are_b) {
param <- t(param) %>% as.data.frame()
param <- as.list(param) %>% lapply(magrittr::set_names, rownames(param))
} else {
stop("The ", type, " dataframe is misspecified.")
}
}

if (is.list(param)) {
param2 <- c()
# add param in right order
for (f in cells_b) {
if (length(param[[f]]) == 1) {
new_param <- rep(param[[f]], w_n)
} else if (length(param[[f]]) != w_n) {
stop("The number of ", type, " for cell ", f,
" is not correct. Please specify either 1 or a vector of ",
w_n, " per cell")
} else if (setdiff(cells_w, names(param[[f]])) %>% length() == 0) {
new_param <- param[[f]][cells_w] # add named parameters in the right order
} else {
new_param <- param[[f]] # parameters are not or incorrectly named, add in this order
}
param2 <- c(param2, new_param)
}

if (length(cells_b) == 0) { # no between-subject factors
message("no between-subject factors")
if (length(param) == 1) {
param2 <- rep(param, w_n)
} else if (length(param) != w_n) {
stop("The number of ", type,
" is not correct. Please specify either 1 or a vector of ",
w_n, " per cell")
} else if (setdiff(cells_w, names(param)) %>% length() == 0) {
param2 <- param[cells_w] # add named parameters in the right order
} else {
param2 <- param # parameters are not or incorrectly named, add in this order
}
}
} else if (is.numeric(param)) {
if (length(param) == 1) {
param2 <- rep(param, all_n)
} else if (length(param) == all_n) {
param2 <- param
} else {
stop("The number of ", type, " is not correct. Please specify 1, a vector of ",
all_n , ", or use the list format")
}
}

dd <- matrix(param2, ncol = b_n) %>% as.data.frame()
names(dd) <- cells_b
rownames(dd) <- cells_w

dd
}


#' Get design from long data
#'
#' Makes a best guess at the design of a long-format data frame.
#' Finds all columns that contain a single value per unit of analysis (between factors),
#' all columns that contain the same values per unit of analysis (within factors), and
#' all columns that differ over units of analysis (dv, continuous factors)
#'
#' @param .data the data frame (in long format)
#' @param id the column name(s) that identify a unit of analysis
#' @param dv the column name that identifies the DV
#'
#' @return the data frame in long format
#'
#' @export
#'
get_design_long <- function(.data, id = "sub_id", dv = "val") {
between_factors <- .data %>%
dplyr::group_by_at(dplyr::vars(tidyselect::one_of(id))) %>%
dplyr::summarise_all(dplyr::n_distinct) %>%
dplyr::ungroup() %>%
dplyr::select(-tidyselect::one_of(id)) %>%
dplyr::summarise_all(max) %>%
dplyr::select_if(~ . == 1) %>%
names()

within_factors <- .data %>%
dplyr::select(-tidyselect::one_of(between_factors)) %>%
dplyr::group_by_at(dplyr::vars(tidyselect::one_of(id))) %>%
dplyr::summarise_all(paste0, collapse = ",") %>%
dplyr::ungroup() %>%
dplyr::select(-tidyselect::one_of(id)) %>%
dplyr::summarise_all(dplyr::n_distinct) %>%
dplyr::select_if(~ . == 1) %>%
names()

within <- .data %>%
dplyr::select(tidyselect::one_of(within_factors)) %>%
dplyr::mutate_all(as.factor) %>%
dplyr::summarise_all(~levels(.) %>% paste0(collapse = ".|.")) %>%
as.list() %>%
sapply(strsplit, split=".|.", fixed = TRUE)

between <- .data %>%
dplyr::select(tidyselect::one_of(between_factors)) %>%
dplyr::mutate_all(as.factor) %>%
dplyr::summarise_all(~levels(.) %>% paste0(collapse = ".|.")) %>%
as.list() %>%
sapply(strsplit, split=".|.", fixed = TRUE)

between_labels <- purrr::map(between, fix_name_labels)
within_labels <- purrr::map(within, fix_name_labels)

cells_b <- do.call(expand.grid, between) %>%
tidyr::unite("b", 1:ncol(.)) %>% dplyr::pull("b")

cells_w <- do.call(expand.grid, within) %>%
tidyr::unite("b", 1:ncol(.)) %>% dplyr::pull("b")

# get n, mu, sd, r per cell
chk <- check_sim_stats(.data, between_factors, within_factors, dv, id)

n <- chk %>%
tidyr::unite(".within", tidyselect::one_of(between_factors)) %>%
dplyr::select(.within, var, n) %>%
tidyr::spread(var, n) %>%
tibble::column_to_rownames(".within")

mu <- chk %>%
tidyr::unite(".within", tidyselect::one_of(between_factors)) %>%
dplyr::select(.within, var, mean) %>%
tidyr::spread(var, mean) %>%
tibble::column_to_rownames(".within")

sd <- chk %>%
tidyr::unite(".within", tidyselect::one_of(between_factors)) %>%
dplyr::select(.within, var, sd) %>%
tidyr::spread(var, sd) %>%
tibble::column_to_rownames(".within")

cors <- chk %>%
tidyr::unite(".between", tidyselect::one_of(between_factors)) %>%
dplyr::select(tidyselect::one_of(c(".between", "var", cells_w))) %>%
dplyr::mutate(var = forcats::fct_relevel(var, cells_w)) %>%
dplyr::arrange(var) %>%
dplyr::group_by(.between) %>%
tidyr::nest(.key = "r") %>%
as.list()

r <- purrr::map(cors$r, ~tibble::column_to_rownames(., "var"))
names(r) <- cors$.between

design <- list(
within = within,
between = between,
within_factors = within_factors,
between_factors = between_factors,
within_labels = within_labels,
between_labels = between_labels,
cells_w = cells_w,
cells_b = cells_b,
cell_n = n,
cell_mu = mu,
cell_sd = sd,
cell_r = r,
sub_id = id
)

design
}

Loading

0 comments on commit ab48367

Please sign in to comment.