Skip to content

Commit

Permalink
Make everything cleaner
Browse files Browse the repository at this point in the history
  • Loading branch information
debruine committed May 3, 2019
1 parent ea2a335 commit 74d8d42
Show file tree
Hide file tree
Showing 51 changed files with 1,450 additions and 445 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ Imports:
ggplot2
License: MIT + file LICENSE
Suggests:
testthat,
testthat (>= 2.1.0),
knitr,
rmarkdown
VignetteBuilder: knitr
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ export(check_design)
export(check_sim_stats)
export(cormat)
export(cormat_from_triangle)
export(get_design_long)
export(is_pos_def)
export(long2wide)
export(make_id)
Expand Down
7 changes: 6 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,9 @@

# faux 0.0.0.9005

* Bug fixes for `sim_design()` (failed when within or between factor number was 0)
* Bug fixes for `sim_design()` (failed when within or between factor number was 0)

# faux 0.0.0.9006

* Changes to argument order and names (more consistent, but may break old scripts)
* Updated vignettes
118 changes: 115 additions & 3 deletions R/check_design.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
#'
#' within <- list(time = c("day", "night"))
#' between <- list(pet = c("dog", "cat"))
#' design <- check_design(within, between)
#' check_design(within, between)
#'
#' @export
#'
Expand Down Expand Up @@ -116,12 +116,12 @@ check_design <- function(within = list(), between = list(),
between_factors = between_factors,
within_labels = within_labels,
between_labels = between_labels,
cells_w = cells_w,
cells_b = cells_b,
cell_n = cell_n,
cell_mu = cell_mu,
cell_sd = cell_sd,
cell_r = cell_r,
cells_w = cells_w,
cells_b = cells_b,
sub_id = sub_id
)
}
Expand Down Expand Up @@ -209,3 +209,115 @@ convert_param <- function (param, cells_w, cells_b, type = "this parameter") {

dd
}


#' Get design from long data
#'
#' Makes a best guess at the design of a long-format data frame.
#' Finds all columns that contain a single value per unit of analysis (between factors),
#' all columns that contain the same values per unit of analysis (within factors), and
#' all columns that differ over units of analysis (dv, continuous factors)
#'
#' @param .data the data frame (in long format)
#' @param id the column name(s) that identify a unit of analysis
#' @param dv the column name that identifies the DV
#'
#' @return the data frame in long format
#'
#' @export
#'
get_design_long <- function(.data, id = "sub_id", dv = "val") {
between_factors <- .data %>%
dplyr::group_by_at(dplyr::vars(tidyselect::one_of(id))) %>%
dplyr::summarise_all(dplyr::n_distinct) %>%
dplyr::ungroup() %>%
dplyr::select(-tidyselect::one_of(id)) %>%
dplyr::summarise_all(max) %>%
dplyr::select_if(~ . == 1) %>%
names()

within_factors <- .data %>%
dplyr::select(-tidyselect::one_of(between_factors)) %>%
dplyr::group_by_at(dplyr::vars(tidyselect::one_of(id))) %>%
dplyr::summarise_all(paste0, collapse = ",") %>%
dplyr::ungroup() %>%
dplyr::select(-tidyselect::one_of(id)) %>%
dplyr::summarise_all(dplyr::n_distinct) %>%
dplyr::select_if(~ . == 1) %>%
names()

within <- .data %>%
dplyr::select(tidyselect::one_of(within_factors)) %>%
dplyr::mutate_all(as.factor) %>%
dplyr::summarise_all(~levels(.) %>% paste0(collapse = ".|.")) %>%
as.list() %>%
sapply(strsplit, split=".|.", fixed = TRUE)

between <- .data %>%
dplyr::select(tidyselect::one_of(between_factors)) %>%
dplyr::mutate_all(as.factor) %>%
dplyr::summarise_all(~levels(.) %>% paste0(collapse = ".|.")) %>%
as.list() %>%
sapply(strsplit, split=".|.", fixed = TRUE)

between_labels <- purrr::map(between, fix_name_labels)
within_labels <- purrr::map(within, fix_name_labels)

cells_b <- do.call(expand.grid, between) %>%
tidyr::unite("b", 1:ncol(.)) %>% dplyr::pull("b")

cells_w <- do.call(expand.grid, within) %>%
tidyr::unite("b", 1:ncol(.)) %>% dplyr::pull("b")

# get n, mu, sd, r per cell
chk <- check_sim_stats(.data, between_factors, within_factors, dv, id)

n <- chk %>%
tidyr::unite(".within", tidyselect::one_of(between_factors)) %>%
dplyr::select(.within, var, n) %>%
tidyr::spread(var, n) %>%
tibble::column_to_rownames(".within")

mu <- chk %>%
tidyr::unite(".within", tidyselect::one_of(between_factors)) %>%
dplyr::select(.within, var, mean) %>%
tidyr::spread(var, mean) %>%
tibble::column_to_rownames(".within")

sd <- chk %>%
tidyr::unite(".within", tidyselect::one_of(between_factors)) %>%
dplyr::select(.within, var, sd) %>%
tidyr::spread(var, sd) %>%
tibble::column_to_rownames(".within")

cors <- chk %>%
tidyr::unite(".between", tidyselect::one_of(between_factors)) %>%
dplyr::select(tidyselect::one_of(c(".between", "var", cells_w))) %>%
dplyr::mutate(var = forcats::fct_relevel(var, cells_w)) %>%
dplyr::arrange(var) %>%
dplyr::group_by(.between) %>%
tidyr::nest(.key = "r") %>%
as.list()

r <- purrr::map(cors$r, ~tibble::column_to_rownames(., "var"))
names(r) <- cors$.between

design <- list(
within = within,
between = between,
within_factors = within_factors,
between_factors = between_factors,
within_labels = within_labels,
between_labels = between_labels,
cells_w = cells_w,
cells_b = cells_b,
cell_n = n,
cell_mu = mu,
cell_sd = sd,
cell_r = r,
sub_id = id
)

design
}

10 changes: 5 additions & 5 deletions R/check_sim_stats.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,30 +2,30 @@
#'
#' \code{check_sim_stats} Generates a table of the correlations and means of numeric columns in a data frame
#'
#' @param dat the existing dataframe
#' @param .data the existing tbl
#' @param between a vector of column names for between-subject factors
#' @param within a vector of column names for within-subject factors (if data is long)
#' @param dv the column name of the dv (if data is long)
#' @param id the column name(s) of the subject ID (if data is long)
#' @param digits how many digits to round to (default = 2)
#' @param usekable logical. If TRUE, output with knitr::kable
#'
#' @return tibble or kable
#' @return a tbl or kable
#' @examples
#' check_sim_stats(iris, "Species")
#' @export
#'

check_sim_stats <- function(dat, between = c(), within = c(), dv = c(), id = c(),
check_sim_stats <- function(.data, between = c(), within = c(), dv = c(), id = c(),
digits = 2, usekable = FALSE) {

if (length(within) && length(dv) && length(id)) {
# convert long to wide
dat <- long2wide(dat, within, between, dv, id) %>%
.data <- long2wide(.data, within, between, dv, id) %>%
dplyr::select(-tidyselect::one_of(id))
}

grpdat <- select_num_grp(dat, between)
grpdat <- select_num_grp(.data, between)
grpvars <- dplyr::group_vars(grpdat)
numvars <- names(grpdat)[!names(grpdat) %in% grpvars]

Expand Down
36 changes: 24 additions & 12 deletions R/long2wide.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,22 @@
#'
#' Converts data from long format to wide
#'
#' @param dat the long data frame to convert
#' @param .data the tbl in long format
#' @param within the names of the within column(s)
#' @param between the names of between column(s) (optional)
#' @param dv the name of the DV (value) column
#' @param id the names of the column(s) for grouping observations
#'
#' @return the data frame in wide format
#' @return a tbl in wide format
#'
#' @examples
#' df_long <- sim_design(2, 2, long = TRUE)
#' long2wide(df_long, "A", "B", "val", "sub_id")
#'
#' @export
#'
long2wide <- function(dat, within = c(), between = c(), dv = c(), id = c()) {
dat %>%
long2wide <- function(.data, within = c(), between = c(), dv = "val", id = "sub_id") {
.data %>%
dplyr::select(tidyselect::one_of(c(id, between, within, dv))) %>%
tidyr::unite(".tmpwithin.", tidyselect::one_of(within)) %>%
dplyr::group_by_at(dplyr::vars(tidyselect::one_of(between))) %>%
Expand All @@ -25,17 +29,25 @@ long2wide <- function(dat, within = c(), between = c(), dv = c(), id = c()) {
#'
#' Converts data from wide format to long
#'
#' @param dat the wide data frame to convert
#' @param within the names of the within factors
#' @param dv the names of the DV (value) columns
#' @param sep Separator between columns (see tidyr::separate)
#' @param .data the tbl in wide format
#' @param within_factors the names of the within factors
#' @param within_cols the names (or indices) of the within-subject (value) columns
#' @param sep Separator for within-columns (see tidyr::separate)
#'
#' @return a tbl in long format
#'
#' @return the data frame in long format
#' @examples
#' wide2long(iris, c("Feature", "Measure"), 1:4)
#'
#' @export
#'
wide2long <- function(dat, within_factors = c(), within_cols = c(), sep = "[^[:alnum:]]+") {
dat %>%
wide2long <- function(.data, within_factors = c(), within_cols = c(), sep = "[^[:alnum:]]+") {
if (is.numeric(within_cols)) {
within_cols <- names(.data)[within_cols]
}

.data %>%
tidyr::gather(".tmpwithin.", "val", tidyselect::one_of(within_cols)) %>%
tidyr::separate(".tmpwithin.", within_factors, sep = sep)
}
}

20 changes: 15 additions & 5 deletions R/make_id.R
Original file line number Diff line number Diff line change
@@ -1,17 +1,27 @@
#' Make ID
#'
#' Make IDs with fixed length and a letter prefix for random effects (e.g., S001, S002, ..., S100).
#' @param n the number of IDs to generate
#' @param n the number of IDs to generate (or a vector of numbers)
#' @param prefix the letter prefix to the number
#' @param digits the number of digits to use for the numeric part. Only used if this is larger than the number of digits in n.
#'
#' @return a vector of IDs
#' @export
#'
#' @examples
#'
#' make_id(20, "SUBJECT_")
#' make_id(10:30, digits = 3)
#'
make_id <- function(n = 100, prefix = "S") {
max_digits <- floor(log10(n))+1
paste0(prefix, formatC(1:n, width = max_digits, flag = "0"))
}
make_id <- function(n = 100, prefix = "S", digits = 0) {
# set max digits to the larger of digits in `n`` or `digits`
if (length(n) == 1) {
max_n <- n
n <- 1:max_n
} else {
max_n <- max(n)
}

max_digits <- max(floor(log10(max_n))+1, digits)
paste0(prefix, formatC(n, width = max_digits, flag = "0"))
}
10 changes: 6 additions & 4 deletions R/rnorm_multi.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#' Multiple Normally Distributed Vectors
#'
#' \code{rnorm_multi} makes multiple normally distributed vectors with specified relationships
#' \code{rnorm_multi()} makes multiple normally distributed vectors with specified relationships.
#'
#' @param n the number of samples required
#' @param vars the number of variables to return
Expand All @@ -12,10 +12,12 @@
#' @param as.matrix logical. If true, returns a matrix
#' @param cors (deprecated; use r)
#'
#' @return dataframe of vars vectors
#' @return a tbl of vars vectors
#'
#' @examples
#' rnorm_multi(100, 3, c(0.2, 0.4, 0.5), varnames=c("A", "B", "C"))
#' rnorm_multi(100, 3, c(1, 0.2, -0.5, 0.2, 1, 0.5, -0.5, 0.5, 1), varnames=c("A", "B", "C"))
#' rnorm_multi(100, 3, 0, 1, c(0.2, 0.4, 0.5), varnames=c("A", "B", "C"))
#' rnorm_multi(100, 3, 0, 1, c(1, 0.2, -0.5, 0.2, 1, 0.5, -0.5, 0.5, 1), varnames=c("A", "B", "C"))
#'
#' @export

rnorm_multi <- function(n, vars = 3, mu = 0, sd = 1, r = 0,
Expand Down
24 changes: 12 additions & 12 deletions R/select_num_grp.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,39 +2,39 @@
#'
#' \code{select_num_grp} Select grouping and (optionally specified) numeric columns and group
#'
#' @param dat the existing dataframe
#' @param .data the existing tbl
#' @param between an optional list of column names to group by
#' @param cols an optional list of column names to return (default of NULL returns all numeric columns)
#'
#' @return tibble
#' @return a tbl
#' @examples
#' select_num_grp(iris, "Species")
#' @export

select_num_grp <- function(dat, between = c(), cols = NULL) {
select_num_grp <- function(.data, between = c(), cols = NULL) {
# error checking -----------------
if (is.matrix(dat)) {
dat = as.data.frame(dat)
} else if (!is.data.frame(dat)) {
stop("dat must be a data frame or matrix")
if (is.matrix(.data)) {
.data = as.data.frame(.data)
} else if (!is.data.frame(.data)) {
stop(".data must be a data frame or matrix")
}

# select only grouping and numeric columns -----------------
if (is.null(between)) {
# no grouping, so select all numeric columns
numdat <- dplyr::select_if(dat, is.numeric)
numdat <- dplyr::select_if(.data, is.numeric)
grpdat <- numdat
} else if (is.numeric(between) || is.character(between)) {
# get grouping column names if specified by index
if (is.numeric(between)) between <- names(dat)[between]
if (is.numeric(between)) between <- names(.data)[between]

# numeric columns, excluding grouping columns
numdat <- dat %>%
numdat <- .data %>%
dplyr::select(-tidyselect::one_of(between)) %>%
dplyr::select_if(is.numeric)

# get grouping columns, add remaining numeric columns, and group
grpdat <- dat %>%
grpdat <- .data %>%
dplyr::select(tidyselect::one_of(between)) %>%
dplyr::bind_cols(numdat) %>%
dplyr::group_by_at(dplyr::vars(tidyselect::one_of(between)))
Expand All @@ -44,7 +44,7 @@ select_num_grp <- function(dat, between = c(), cols = NULL) {

if (!is.null(cols)) {
# return only grouping and cols
if (is.numeric(cols)) cols <- names(dat)[cols]
if (is.numeric(cols)) cols <- names(.data)[cols]

grpdat <- grpdat %>%
dplyr::select(tidyselect::one_of(c(between, cols)))
Expand Down
Loading

0 comments on commit 74d8d42

Please sign in to comment.