From 9c5e3e35db1c6e8cb8c5fa8be237a6db4e55fe69 Mon Sep 17 00:00:00 2001 From: Daniel Sjoberg Date: Fri, 14 Jun 2024 08:38:28 -0700 Subject: [PATCH] in progress --- NAMESPACE | 2 + R/ard_categorical.survey.design.R | 137 ++++++++++------ R/ard_continuous.survey.design.R | 4 +- R/ard_dichotomous.survey.design.R | 146 ++++++++++++++++++ R/ard_missing.survey.design.R | 114 ++++++++++++++ man/ard_categorical.survey.design.Rd | 4 +- man/ard_continuous.survey.design.Rd | 2 +- man/ard_dichotomous.survey.design.Rd | 69 +++++++++ man/ard_missing.survey.design.Rd | 61 ++++++++ man/dot-check_dichotomous_value.Rd | 25 +++ man/dot-unique_and_sorted.Rd | 29 ++++ .../test-ard_categorical.survey.design.R | 3 + 12 files changed, 541 insertions(+), 55 deletions(-) create mode 100644 R/ard_dichotomous.survey.design.R create mode 100644 R/ard_missing.survey.design.R create mode 100644 man/ard_dichotomous.survey.design.Rd create mode 100644 man/ard_missing.survey.design.Rd create mode 100644 man/dot-check_dichotomous_value.Rd create mode 100644 man/dot-unique_and_sorted.Rd diff --git a/NAMESPACE b/NAMESPACE index 9c95ccf46..3675bcc74 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,6 +3,8 @@ S3method(ard_attributes,survey.design) S3method(ard_categorical,survey.design) S3method(ard_continuous,survey.design) +S3method(ard_dichotomous,survey.design) +S3method(ard_missing,survey.design) S3method(ard_regression,default) S3method(ard_stats_anova,anova) S3method(ard_stats_anova,data.frame) diff --git a/R/ard_categorical.survey.design.R b/R/ard_categorical.survey.design.R index d8f02cda9..108b6537c 100644 --- a/R/ard_categorical.survey.design.R +++ b/R/ard_categorical.survey.design.R @@ -12,7 +12,7 @@ #' @param data (`survey.design`)\cr #' a design object often created with [`survey::svydesign()`]. #' @param variables ([`tidy-select`][dplyr::dplyr_tidy_select])\cr -#' columns to include in summaries. Default is `everything()`. +#' columns to include in summaries. #' @param by ([`tidy-select`][dplyr::dplyr_tidy_select])\cr #' results are calculated for **all combinations** of the column specified #' and the variables. A single column may be specified. @@ -43,7 +43,7 @@ #' #' ard_categorical(svy_titanic, variables = c(Class, Age), by = Survived) ard_categorical.survey.design <- function(data, - variables = everything(), + variables, by = NULL, statistic = everything() ~ c("n", "N", "p", "p.std.error", "deff", "n_unweighted", "N_unweighted", "p_unweighted"), denominator = c("column", "row", "cell"), @@ -52,9 +52,12 @@ ard_categorical.survey.design <- function(data, "n" = "Unweighted n", "N" = "Unweighted N", "p" = "Unweighted %"), ...) { set_cli_abort_call() + check_pkg_installed(pkg = "survey", reference_pkg = "cardx") + check_dots_empty() deff <- TRUE # we may update in the future to make this an argument for users # process arguments ---------------------------------------------------------- + check_not_missing(variables) cards::process_selectors( data = data$variables, variables = {{ variables }}, @@ -81,7 +84,7 @@ ard_categorical.survey.design <- function(data, x = statistic, predicate = \(x) all(x %in% accepted_svy_stats), error_msg = c("Error in the values of the {.arg statistic} argument.", - i = "Values must be in {.val {accepted_svy_stats}}" + i = "Values must be in {.val {accepted_svy_stats}}" ) ) denominator <- arg_match(denominator) @@ -136,6 +139,13 @@ ard_categorical.survey.design <- function(data, by = any_of(by), denominator = denominator ) |> + # all the survey levels are reported as character, so we do the same here. + dplyr::mutate( + across( + c(cards::all_ard_groups("levels"), cards::all_ard_variables("levels")), + ~map(.x, as.character) + ) + ) |> dplyr::select(-c("stat_label", "fmt_fn", "warning", "error")) |> dplyr::mutate( stat_name = @@ -166,9 +176,10 @@ ard_categorical.survey.design <- function(data, context = "categorical", warning = list(NULL), error = list(NULL), - ) |> + ) |> cards::tidy_ard_column_order() %>% - {structure(., class = c("card", class(.)))} # styler: off + {structure(., class = c("card", class(.)))} |> # styler: off + cards::tidy_ard_row_order() } @@ -177,24 +188,52 @@ ard_categorical.survey.design <- function(data, lapply( variables, \(variable) { + # convert the variable to a factor if not already one or a lgl, so we get the correct rate stats from svymean + if (!inherits(data$variables[[variable]], c("factor", "logical"))) { + data$variables[[variable]] <- factor(data$variables[[variable]]) + } + + # there are issues with svymean() when a variable has only one level. adding a second as needed + variable_lvls <- .unique_values_sort(data$variables, variable) + if (!is_empty(by)) by_lvls <- .unique_values_sort(data$variables, by) # styler: off + if (length(variable_lvls) == 1L) { + data$variables[[variable]] <- + factor(data$variables[[variable]], levels = c(variable_lvls, paste("not", variable_lvls))) + } + if (!is_empty(by) && length(by_lvls) == 1L) { + data$variables[[by]] <- + factor(data$variables[[by]], levels = c(by_lvls, paste("not", by_lvls))) + } + # each combination of denominator and whether there is a by variable is handled separately - case_switch( - # by variable and column percentages - !is_empty(by) && denominator == "column" ~ - .one_svytable_rates_by_column(data, variable, by, deff), - # by variable and row percentages - !is_empty(by) && denominator == "row" ~ - .one_svytable_rates_by_row(data, variable, by, deff), - # by variable and cell percentages - !is_empty(by) && denominator == "cell" ~ - .one_svytable_rates_by_cell(data, variable, by, deff), - # no by variable and column/cell percentages - denominator %in% c("column", "cell") ~ - .one_svytable_rates_no_by_column_and_cell(data, variable, deff), - # no by variable and row percentages - denominator == "row" ~ - .one_svytable_rates_no_by_row(data, variable, deff) - ) + result <- + case_switch( + # by variable and column percentages + !is_empty(by) && denominator == "column" ~ + .one_svytable_rates_by_column(data, variable, by, deff), + # by variable and row percentages + !is_empty(by) && denominator == "row" ~ + .one_svytable_rates_by_row(data, variable, by, deff), + # by variable and cell percentages + !is_empty(by) && denominator == "cell" ~ + .one_svytable_rates_by_cell(data, variable, by, deff), + # no by variable and column/cell percentages + denominator %in% c("column", "cell") ~ + .one_svytable_rates_no_by_column_and_cell(data, variable, deff), + # no by variable and row percentages + denominator == "row" ~ + .one_svytable_rates_no_by_row(data, variable, deff) + ) + + # if a level was added, remove the fake level + if (length(variable_lvls) == 1L) { + result <- result |> dplyr::filter(.data$variable_level %in% variable_lvls) + } + if (!is_empty(by) && length(by_lvls) == 1L) { + result <- result |> dplyr::filter(.data$group1_level %in% by_lvls) + } + + result } ) |> dplyr::bind_rows() @@ -262,9 +301,9 @@ ard_categorical.survey.design <- function(data, ), name = str_remove_all(.data$name, "se\\.") %>% - str_remove_all("DEff\\.") %>% - str_remove_all(by) %>% - str_remove_all("`") + str_remove_all("DEff\\.") %>% + str_remove_all(by) %>% + str_remove_all("`") ) |> tidyr::pivot_wider(names_from = "stat", values_from = "value") |> set_names(c("variable_level", "group1_level", "p", "p.std.error", "deff")) |> @@ -295,9 +334,9 @@ ard_categorical.survey.design <- function(data, ), name = str_remove_all(.data$name, "se\\.") %>% - str_remove_all("DEff\\.") %>% - str_remove_all(variable) %>% - str_remove_all("`") + str_remove_all("DEff\\.") %>% + str_remove_all(variable) %>% + str_remove_all("`") ) |> tidyr::pivot_wider(names_from = "stat", values_from = "value") |> set_names(c("group1_level", "variable_level", "p", "p.std.error", "deff")) |> @@ -339,27 +378,27 @@ ard_categorical.survey.design <- function(data, # add big N and p, then return data frame of results switch(denominator, - "column" = - df_counts |> - dplyr::mutate( - .by = any_of("group1_level"), - N = sum(.data$n), - p = .data$n / .data$N - ), - "row" = - df_counts |> - dplyr::mutate( - .by = any_of("variable_level"), - N = sum(.data$n), - p = .data$n / .data$N - ), - "cell" = - df_counts |> - dplyr::mutate( - .by = any_of(c("group1_level", "variable_level")), - N = sum(.data$n), - p = .data$n / .data$N - ) + "column" = + df_counts |> + dplyr::mutate( + .by = any_of("group1_level"), + N = sum(.data$n), + p = .data$n / .data$N + ), + "row" = + df_counts |> + dplyr::mutate( + .by = any_of("variable_level"), + N = sum(.data$n), + p = .data$n / .data$N + ), + "cell" = + df_counts |> + dplyr::mutate( + .by = any_of(c("group1_level", "variable_level")), + N = sum(.data$n), + p = .data$n / .data$N + ) ) } diff --git a/R/ard_continuous.survey.design.R b/R/ard_continuous.survey.design.R index e6b53380f..5e8d1a31f 100644 --- a/R/ard_continuous.survey.design.R +++ b/R/ard_continuous.survey.design.R @@ -5,7 +5,7 @@ #' @param data (`survey.design`)\cr #' a design object often created with [`survey::svydesign()`]. #' @param variables ([`tidy-select`][dplyr::dplyr_tidy_select])\cr -#' columns to include in summaries. Default is `everything()`. +#' columns to include in summaries. #' @param by ([`tidy-select`][dplyr::dplyr_tidy_select])\cr #' results are calculated for **all combinations** of the columns specified, #' including unobserved combinations and unobserved factor levels. @@ -56,8 +56,6 @@ ard_continuous.survey.design <- function(data, variables, by = NULL, check_pkg_installed(pkg = "survey", reference_pkg = "cardx") # check inputs --------------------------------------------------------------- - check_not_missing(data) - check_class(data, cls = "survey.design") check_not_missing(variables) # process inputs ------------------------------------------------------------- diff --git a/R/ard_dichotomous.survey.design.R b/R/ard_dichotomous.survey.design.R new file mode 100644 index 000000000..53cc8bd6c --- /dev/null +++ b/R/ard_dichotomous.survey.design.R @@ -0,0 +1,146 @@ +#' ARD Dichotomous Survey Statistics +#' +#' Compute Analysis Results Data (ARD) for dichotomous summary statistics. +#' +#' @inheritParams ard_categorical.survey.design +#' @param value (named `list`)\cr +#' named list of dichotomous values to tabulate. +#' Default is `cards::maximum_variable_value(data$variables)`, +#' which returns the largest/last value after a sort. +#' +#' @return an ARD data frame of class 'card' +#' @export +#' +#' @examples +#' survey::svydesign(ids = ~1, data = mtcars, weights = ~1) |> +#' ard_dichotomous(by = vs, variables = c(cyl, am), value = list(cyl = 4)) +ard_dichotomous.survey.design <- function(data, + variables, + by = NULL, + value = cards::maximum_variable_value(data$variables[variables]), + statistic = everything() ~ c("n", "N", "p", "p.std.error", "deff", "n_unweighted", "N_unweighted", "p_unweighted"), + denominator = c("column", "row", "cell"), + fmt_fn = NULL, + stat_label = everything() ~ list(p = "%", p.std.error = "SE(%)", deff = "Design Effect", + "n" = "Unweighted n", "N" = "Unweighted N", "p" = "Unweighted %"), + ...) { + set_cli_abort_call() + check_dots_empty() + check_pkg_installed(pkg = "survey", reference_pkg = "cardx") + + # check inputs --------------------------------------------------------------- + check_not_missing(variables) + + # process inputs ------------------------------------------------------------- + cards::process_selectors(data$variables, variables = {{ variables }}) + cards::process_formula_selectors(data$variables[variables], value = value) + cards::fill_formula_selectors( + data$variables[variables], + value = formals(asNamespace("cardx")[["ard_dichotomous.survey.design"]])[["value"]] |> eval() + ) + .check_dichotomous_value(data$variables, value) + + # return empty tibble if no variables selected ------------------------------- + if (is_empty(variables)) { + return(dplyr::tibble()) + } + + # calculate summary statistics ----------------------------------------------- + ard_categorical( + data = data, + variables = all_of(variables), + by = {{ by }}, + statistic = statistic, + denominator = denominator, + fmt_fn = fmt_fn, + stat_label = stat_label + ) |> + dplyr::filter( + pmap( + list(.data$variable, .data$variable_level), + function(variable, variable_level) { + variable_level %in% .env$value[[variable]] + } + ) |> + unlist() + ) |> + dplyr::mutate(context = "dichotomous") +} + +#' Perform Value Checks +#' +#' Check the validity of the values passed in `ard_dichotomous(value)`. +#' +#' @param data (`data.frame`)\cr +#' a data frame +#' @param value (named `list`)\cr +#' a named list +#' +#' @return returns invisible if check is successful, throws an error message if not. +#' @keywords internal +#' +#' @examples +#' cardx:::.check_dichotomous_value(mtcars, list(cyl = 4)) +.check_dichotomous_value <- function(data, value) { + imap( + value, + function(value, column) { + accepted_values <- .unique_and_sorted(data[[column]]) + if (length(value) != 1L || !value %in% accepted_values) { + message <- "Error in argument {.arg value} for variable {.val {column}}." + cli::cli_abort( + if (length(value) != 1L) { + c(message, "i" = "The value must be one of {.val {accepted_values}}.") + } else { + c(message, "i" = "A value of {.val {value}} was passed, but must be one of {.val {accepted_values}}.") + }, + call = get_cli_abort_call() + ) + } + } + ) |> + invisible() +} + +#' ARD-flavor of unique() +#' +#' Essentially a wrapper for `unique(x) |> sort()` with `NA` levels removed. +#' For factors, all levels are returned even if they are unobserved. +#' Similarly, logical vectors always return `c(TRUE, FALSE)`, even if +#' both levels are not observed. +#' +#' @param x (`any`)\cr +#' a vector +#' +#' @return a vector +#' @keywords internal +#' +#' @examples +#' cards:::.unique_and_sorted(factor(letters[c(5, 5:1)], levels = letters)) +#' +#' cards:::.unique_and_sorted(c(FALSE, TRUE, TRUE, FALSE)) +#' +#' cards:::.unique_and_sorted(c(5, 5:1)) +.unique_and_sorted <- function(x, useNA = c("no", "always")) { + # styler: off + useNA <- match.arg(useNA) + # if a factor return a factor that includes the same levels (including unobserved levels) + if (inherits(x, "factor")) { + return( + factor( + if (useNA == "no") levels(x) + else c(levels(x), NA_character_), + levels = levels(x) + ) + ) + } + if (inherits(x, "logical")) { + if (useNA == "no") return(c(TRUE, FALSE)) + else return(c(TRUE, FALSE, NA)) + } + + # otherwise, return a simple unique and sort of the vector + if (useNA == "no") return(unique(x) |> sort()) + else return(unique(x) |> sort() |> c(NA)) + # styler: on +} diff --git a/R/ard_missing.survey.design.R b/R/ard_missing.survey.design.R new file mode 100644 index 000000000..b0a277632 --- /dev/null +++ b/R/ard_missing.survey.design.R @@ -0,0 +1,114 @@ +#' ARD Missing Survey Statistics +#' +#' Compute Analysis Results Data (ARD) for statistics related to data missingness for survey objects +#' +#' @inheritParams ard_categorical.survey.design +#' +#' @return an ARD data frame of class 'card' +#' @export +#' +#' @examplesIf cardx:::is_pkg_installed("survey", reference_pkg = "cardx") +#' svy_titanic <- survey::svydesign(~1, data = as.data.frame(Titanic), weights = ~Freq) +#' +#' ard_missing(svy_titanic, variables = c(Class, Age), by = Survived) +ard_missing.survey.design <- function(data, + variables, + by = NULL, + statistic = + everything() ~ c("N_obs", "N_miss", "N_nonmiss", "p_miss", "p_nonmiss", + "N_obs_unweighted", "N_miss_unweighted", "N_nonmiss_unweighted", + "p_miss_unweighted", "p_nonmiss_unweighted"), + fmt_fn = NULL, + stat_label = + everything() ~ list("N_obs", "N_miss", "N_nonmiss", "p_miss", "p_nonmiss", + "N_obs_unweighted", "N_miss_unweighted", "N_nonmiss_unweighted", + "p_miss_unweighted", "p_nonmiss_unweighted"), + ...) { + set_cli_abort_call() + check_dots_empty() + check_pkg_installed(pkg = "survey", reference_pkg = "cardx") + + # process inputs ------------------------------------------------------------- + check_not_missing(variables) + cards::process_selectors( + data = data$variables, + variables = {{ variables }}, + by = {{ by }} + ) + + # convert all variables to T/F whether it's missing -------------------------- + data$variables <- data$variables |> + dplyr::mutate(across(all_of(variables), Negate(is.na))) + + cards::process_formula_selectors( + data$variables[variables], + statistic = statistic, + fmt_fn = fmt_fn, + stat_label = stat_label + ) + cards::fill_formula_selectors( + data$variables[variables], + statistic = formals(asNamespace("cards")[["ard_missing.survey.design"]])[["statistic"]] |> eval() + ) + + stats_available <- c("N_obs", "N_miss", "N_nonmiss", "p_miss", "p_nonmiss", + "N_obs_unweighted", "N_miss_unweighted", "N_nonmiss_unweighted", + "p_miss_unweighted", "p_nonmiss_unweighted") + cards::check_list_elements( + x = statistic, + predicate = \(x) is.character(x) && all(x %in% stats_available), + error_msg = "Elements passed in the {.arg statistic} argument must be one or more of {.val {stats_available}}" + ) + + # calculate results ---------------------------------------------------------- + result <- + ard_categorical( + data = data, + variables = all_of(variables), + by = any_of(by), + statistic = everything() ~ c("n", "N", "p", "n_unweighted", "N_unweighted", "p_unweighted") + ) + + # rename the stats for missingness ------------------------------------------- + result <- result |> + dplyr::mutate( + stat_name = + dplyr::case_when( + .data$stat_name %in% "N" ~ "N_obs", + .data$stat_name %in% "n" & .data$variable_level %in% FALSE ~ "N_miss", + .data$stat_name %in% "n" & .data$variable_level %in% TRUE ~ "N_nonmiss", + .data$stat_name %in% "p" & .data$variable_level %in% FALSE ~ "p_miss", + .data$stat_name %in% "p" & .data$variable_level %in% TRUE ~ "p_nonmiss", + .data$stat_name %in% "N_unweighted" ~ "N_obs_unweighted", + .data$stat_name %in% "n_unweighted" & .data$variable_level %in% FALSE ~ "N_miss_unweighted", + .data$stat_name %in% "n_unweighted" & .data$variable_level %in% TRUE ~ "N_nonmiss_unweighted", + .data$stat_name %in% "p_unweighted" & .data$variable_level %in% FALSE ~ "p_miss_unweighted", + .data$stat_name %in% "p_unweighted" & .data$variable_level %in% TRUE ~ "p_nonmiss_unweighted" + ) + ) |> + dplyr::select(-cards::all_ard_variables("levels"), -"stat_label", -"fmt_fn") |> + dplyr::slice(1L, .by = "stat_name") + + # final processing of fmt_fn ------------------------------------------------- + result <- result |> + .process_nested_list_as_df( + arg = fmt_fn, + new_column = "fmt_fn" + ) |> + .default_svy_cat_fmt_fn() + + # merge in statistic labels -------------------------------------------------- + result <- result |> + .process_nested_list_as_df( + arg = stat_label, + new_column = "stat_label", + unlist = TRUE + ) |> + dplyr::mutate(stat_label = dplyr::coalesce(.data$stat_label, .data$stat_name)) + + # return final object -------------------------------------------------------- + result |> + dplyr::mutate(context = "missing") |> + cards::tidy_ard_column_order() %>% + {structure(., class = c("card", class(.)))} # styler: off +} diff --git a/man/ard_categorical.survey.design.Rd b/man/ard_categorical.survey.design.Rd index 51c65374e..a96a43e91 100644 --- a/man/ard_categorical.survey.design.Rd +++ b/man/ard_categorical.survey.design.Rd @@ -6,7 +6,7 @@ \usage{ \method{ard_categorical}{survey.design}( data, - variables = everything(), + variables, by = NULL, statistic = everything() ~ c("n", "N", "p", "p.std.error", "deff", "n_unweighted", "N_unweighted", "p_unweighted"), @@ -22,7 +22,7 @@ a design object often created with \code{\link[survey:svydesign]{survey::svydesign()}}.} \item{variables}{(\code{\link[dplyr:dplyr_tidy_select]{tidy-select}})\cr -columns to include in summaries. Default is \code{everything()}.} +columns to include in summaries.} \item{by}{(\code{\link[dplyr:dplyr_tidy_select]{tidy-select}})\cr results are calculated for \strong{all combinations} of the column specified diff --git a/man/ard_continuous.survey.design.Rd b/man/ard_continuous.survey.design.Rd index 6ba18722a..690238cb1 100644 --- a/man/ard_continuous.survey.design.Rd +++ b/man/ard_continuous.survey.design.Rd @@ -19,7 +19,7 @@ a design object often created with \code{\link[survey:svydesign]{survey::svydesign()}}.} \item{variables}{(\code{\link[dplyr:dplyr_tidy_select]{tidy-select}})\cr -columns to include in summaries. Default is \code{everything()}.} +columns to include in summaries.} \item{by}{(\code{\link[dplyr:dplyr_tidy_select]{tidy-select}})\cr results are calculated for \strong{all combinations} of the columns specified, diff --git a/man/ard_dichotomous.survey.design.Rd b/man/ard_dichotomous.survey.design.Rd new file mode 100644 index 000000000..862472639 --- /dev/null +++ b/man/ard_dichotomous.survey.design.Rd @@ -0,0 +1,69 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ard_dichotomous.survey.design.R +\name{ard_dichotomous.survey.design} +\alias{ard_dichotomous.survey.design} +\title{ARD Dichotomous Survey Statistics} +\usage{ +\method{ard_dichotomous}{survey.design}( + data, + variables, + by = NULL, + value = cards::maximum_variable_value(data$variables[variables]), + statistic = everything() ~ c("n", "N", "p", "p.std.error", "deff", "n_unweighted", + "N_unweighted", "p_unweighted"), + denominator = c("column", "row", "cell"), + fmt_fn = NULL, + stat_label = everything() ~ list(p = "\%", p.std.error = "SE(\%)", deff = + "Design Effect", n = "Unweighted n", N = "Unweighted N", p = "Unweighted \%"), + ... +) +} +\arguments{ +\item{data}{(\code{survey.design})\cr +a design object often created with \code{\link[survey:svydesign]{survey::svydesign()}}.} + +\item{variables}{(\code{\link[dplyr:dplyr_tidy_select]{tidy-select}})\cr +columns to include in summaries.} + +\item{by}{(\code{\link[dplyr:dplyr_tidy_select]{tidy-select}})\cr +results are calculated for \strong{all combinations} of the column specified +and the variables. A single column may be specified.} + +\item{value}{(named \code{list})\cr +named list of dichotomous values to tabulate. +Default is \code{cards::maximum_variable_value(data$variables)}, +which returns the largest/last value after a sort.} + +\item{statistic}{(\code{\link[=syntax]{formula-list-selector}})\cr +a named list, a list of formulas, +or a single formula where the list element is a character vector of +statistic names to include. See default value for options.} + +\item{denominator}{(\code{string})\cr +a string indicating the type proportions to calculate. Must be one of +\code{"column"} (the default), \code{"row"}, and \code{"cell"}.} + +\item{fmt_fn}{(\code{\link[=syntax]{formula-list-selector}})\cr +a named list, a list of formulas, +or a single formula where the list element is a named list of functions +(or the RHS of a formula), +e.g. \verb{list(mpg = list(mean = \\(x) round(x, digits = 2) |> as.character))}.} + +\item{stat_label}{(\code{\link[=syntax]{formula-list-selector}})\cr +a named list, a list of formulas, or a single formula where +the list element is either a named list or a list of formulas defining the +statistic labels, e.g. \code{everything() ~ list(mean = "Mean", sd = "SD")} or +\code{everything() ~ list(mean ~ "Mean", sd ~ "SD")}.} + +\item{...}{These dots are for future extensions and must be empty.} +} +\value{ +an ARD data frame of class 'card' +} +\description{ +Compute Analysis Results Data (ARD) for dichotomous summary statistics. +} +\examples{ +survey::svydesign(ids = ~1, data = mtcars, weights = ~1) |> + ard_dichotomous(by = vs, variables = c(cyl, am), value = list(cyl = 4)) +} diff --git a/man/ard_missing.survey.design.Rd b/man/ard_missing.survey.design.Rd new file mode 100644 index 000000000..616cbb229 --- /dev/null +++ b/man/ard_missing.survey.design.Rd @@ -0,0 +1,61 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ard_missing.survey.design.R +\name{ard_missing.survey.design} +\alias{ard_missing.survey.design} +\title{ARD Missing Survey Statistics} +\usage{ +\method{ard_missing}{survey.design}( + data, + variables, + by = NULL, + statistic = everything() ~ c("N_obs", "N_miss", "N_nonmiss", "p_miss", "p_nonmiss", + "N_obs_unweighted", "N_miss_unweighted", "N_nonmiss_unweighted", "p_miss_unweighted", + "p_nonmiss_unweighted"), + fmt_fn = NULL, + stat_label = everything() ~ default_stat_labels(), + ... +) +} +\arguments{ +\item{data}{(\code{survey.design})\cr +a design object often created with \code{\link[survey:svydesign]{survey::svydesign()}}.} + +\item{variables}{(\code{\link[dplyr:dplyr_tidy_select]{tidy-select}})\cr +columns to include in summaries.} + +\item{by}{(\code{\link[dplyr:dplyr_tidy_select]{tidy-select}})\cr +results are calculated for \strong{all combinations} of the column specified +and the variables. A single column may be specified.} + +\item{statistic}{(\code{\link[=syntax]{formula-list-selector}})\cr +a named list, a list of formulas, +or a single formula where the list element is a character vector of +statistic names to include. See default value for options.} + +\item{fmt_fn}{(\code{\link[=syntax]{formula-list-selector}})\cr +a named list, a list of formulas, +or a single formula where the list element is a named list of functions +(or the RHS of a formula), +e.g. \verb{list(mpg = list(mean = \\(x) round(x, digits = 2) |> as.character))}.} + +\item{stat_label}{(\code{\link[=syntax]{formula-list-selector}})\cr +a named list, a list of formulas, or a single formula where +the list element is either a named list or a list of formulas defining the +statistic labels, e.g. \code{everything() ~ list(mean = "Mean", sd = "SD")} or +\code{everything() ~ list(mean ~ "Mean", sd ~ "SD")}.} + +\item{...}{These dots are for future extensions and must be empty.} +} +\value{ +an ARD data frame of class 'card' +} +\description{ +Compute Analysis Results Data (ARD) for statistics related to data missingness for survey objects +} +\examples{ +\dontshow{if (cardx:::is_pkg_installed("survey", reference_pkg = "cardx")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +svy_titanic <- survey::svydesign(~1, data = as.data.frame(Titanic), weights = ~Freq) + +ard_missing(svy_titanic, variables = c(Class, Age), by = Survived) +\dontshow{\}) # examplesIf} +} diff --git a/man/dot-check_dichotomous_value.Rd b/man/dot-check_dichotomous_value.Rd new file mode 100644 index 000000000..b008619ec --- /dev/null +++ b/man/dot-check_dichotomous_value.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ard_dichotomous.survey.design.R +\name{.check_dichotomous_value} +\alias{.check_dichotomous_value} +\title{Perform Value Checks} +\usage{ +.check_dichotomous_value(data, value) +} +\arguments{ +\item{data}{(\code{data.frame})\cr +a data frame} + +\item{value}{(named \code{list})\cr +a named list} +} +\value{ +returns invisible if check is successful, throws an error message if not. +} +\description{ +Check the validity of the values passed in \code{ard_dichotomous(value)}. +} +\examples{ +cardx:::.check_dichotomous_value(mtcars, list(cyl = 4)) +} +\keyword{internal} diff --git a/man/dot-unique_and_sorted.Rd b/man/dot-unique_and_sorted.Rd new file mode 100644 index 000000000..b030838b8 --- /dev/null +++ b/man/dot-unique_and_sorted.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ard_dichotomous.survey.design.R +\name{.unique_and_sorted} +\alias{.unique_and_sorted} +\title{ARD-flavor of unique()} +\usage{ +.unique_and_sorted(x, useNA = c("no", "always")) +} +\arguments{ +\item{x}{(\code{any})\cr +a vector} +} +\value{ +a vector +} +\description{ +Essentially a wrapper for \code{unique(x) |> sort()} with \code{NA} levels removed. +For factors, all levels are returned even if they are unobserved. +Similarly, logical vectors always return \code{c(TRUE, FALSE)}, even if +both levels are not observed. +} +\examples{ +cards:::.unique_and_sorted(factor(letters[c(5, 5:1)], levels = letters)) + +cards:::.unique_and_sorted(c(FALSE, TRUE, TRUE, FALSE)) + +cards:::.unique_and_sorted(c(5, 5:1)) +} +\keyword{internal} diff --git a/tests/testthat/test-ard_categorical.survey.design.R b/tests/testthat/test-ard_categorical.survey.design.R index 8154fe309..a7dc25a49 100644 --- a/tests/testthat/test-ard_categorical.survey.design.R +++ b/tests/testthat/test-ard_categorical.survey.design.R @@ -5,6 +5,9 @@ # The function _should_ work if the underlying type is factor or logical # - Do we get results for unobserved factor levels in the `by` and `variable` variables? # - Do we get results for unobserved logical levels in the `by` and `variable` variables, e.g. if there are only TRUE, we should have FALSE rows too? +# - It turns out variables (both by and variables) that only have one level are problematic in some ways. +# I've coded around these issues, but we need thorough testing when either by or a variable has a single level. +# We need tests for when these variables are factor, logical, and other to ensure every case is handled properly. # - A trick to test survey data is to take a normal data frame, convert it to survey using equal weights. Then all the results should equal the unweighted summariess # dplyr::tibble(y = rep(FALSE, 15), x = rep(TRUE, 15)) |> # survey::svydesign(ids = ~1, data = _, weights = ~1) |>