Skip to content

Commit

Permalink
in progress
Browse files Browse the repository at this point in the history
  • Loading branch information
ddsjoberg committed Jun 14, 2024
1 parent bc7c3c3 commit 9c5e3e3
Show file tree
Hide file tree
Showing 12 changed files with 541 additions and 55 deletions.
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
S3method(ard_attributes,survey.design)
S3method(ard_categorical,survey.design)
S3method(ard_continuous,survey.design)
S3method(ard_dichotomous,survey.design)
S3method(ard_missing,survey.design)
S3method(ard_regression,default)
S3method(ard_stats_anova,anova)
S3method(ard_stats_anova,data.frame)
Expand Down
137 changes: 88 additions & 49 deletions R/ard_categorical.survey.design.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
#' @param data (`survey.design`)\cr
#' a design object often created with [`survey::svydesign()`].
#' @param variables ([`tidy-select`][dplyr::dplyr_tidy_select])\cr
#' columns to include in summaries. Default is `everything()`.
#' columns to include in summaries.
#' @param by ([`tidy-select`][dplyr::dplyr_tidy_select])\cr
#' results are calculated for **all combinations** of the column specified
#' and the variables. A single column may be specified.
Expand Down Expand Up @@ -43,7 +43,7 @@
#'
#' ard_categorical(svy_titanic, variables = c(Class, Age), by = Survived)
ard_categorical.survey.design <- function(data,
variables = everything(),
variables,
by = NULL,
statistic = everything() ~ c("n", "N", "p", "p.std.error", "deff", "n_unweighted", "N_unweighted", "p_unweighted"),
denominator = c("column", "row", "cell"),
Expand All @@ -52,9 +52,12 @@ ard_categorical.survey.design <- function(data,
"n" = "Unweighted n", "N" = "Unweighted N", "p" = "Unweighted %"),
...) {
set_cli_abort_call()
check_pkg_installed(pkg = "survey", reference_pkg = "cardx")
check_dots_empty()
deff <- TRUE # we may update in the future to make this an argument for users

# process arguments ----------------------------------------------------------
check_not_missing(variables)
cards::process_selectors(
data = data$variables,
variables = {{ variables }},
Expand All @@ -81,7 +84,7 @@ ard_categorical.survey.design <- function(data,
x = statistic,
predicate = \(x) all(x %in% accepted_svy_stats),
error_msg = c("Error in the values of the {.arg statistic} argument.",
i = "Values must be in {.val {accepted_svy_stats}}"
i = "Values must be in {.val {accepted_svy_stats}}"
)
)
denominator <- arg_match(denominator)
Expand Down Expand Up @@ -136,6 +139,13 @@ ard_categorical.survey.design <- function(data,
by = any_of(by),
denominator = denominator
) |>
# all the survey levels are reported as character, so we do the same here.
dplyr::mutate(
across(
c(cards::all_ard_groups("levels"), cards::all_ard_variables("levels")),
~map(.x, as.character)
)
) |>
dplyr::select(-c("stat_label", "fmt_fn", "warning", "error")) |>
dplyr::mutate(
stat_name =
Expand Down Expand Up @@ -166,9 +176,10 @@ ard_categorical.survey.design <- function(data,
context = "categorical",
warning = list(NULL),
error = list(NULL),
) |>
) |>
cards::tidy_ard_column_order() %>%
{structure(., class = c("card", class(.)))} # styler: off
{structure(., class = c("card", class(.)))} |> # styler: off
cards::tidy_ard_row_order()
}


Expand All @@ -177,24 +188,52 @@ ard_categorical.survey.design <- function(data,
lapply(
variables,
\(variable) {
# convert the variable to a factor if not already one or a lgl, so we get the correct rate stats from svymean
if (!inherits(data$variables[[variable]], c("factor", "logical"))) {
data$variables[[variable]] <- factor(data$variables[[variable]])
}

# there are issues with svymean() when a variable has only one level. adding a second as needed
variable_lvls <- .unique_values_sort(data$variables, variable)
if (!is_empty(by)) by_lvls <- .unique_values_sort(data$variables, by) # styler: off
if (length(variable_lvls) == 1L) {
data$variables[[variable]] <-
factor(data$variables[[variable]], levels = c(variable_lvls, paste("not", variable_lvls)))
}
if (!is_empty(by) && length(by_lvls) == 1L) {
data$variables[[by]] <-
factor(data$variables[[by]], levels = c(by_lvls, paste("not", by_lvls)))
}

# each combination of denominator and whether there is a by variable is handled separately
case_switch(
# by variable and column percentages
!is_empty(by) && denominator == "column" ~
.one_svytable_rates_by_column(data, variable, by, deff),
# by variable and row percentages
!is_empty(by) && denominator == "row" ~
.one_svytable_rates_by_row(data, variable, by, deff),
# by variable and cell percentages
!is_empty(by) && denominator == "cell" ~
.one_svytable_rates_by_cell(data, variable, by, deff),
# no by variable and column/cell percentages
denominator %in% c("column", "cell") ~
.one_svytable_rates_no_by_column_and_cell(data, variable, deff),
# no by variable and row percentages
denominator == "row" ~
.one_svytable_rates_no_by_row(data, variable, deff)
)
result <-
case_switch(
# by variable and column percentages
!is_empty(by) && denominator == "column" ~
.one_svytable_rates_by_column(data, variable, by, deff),
# by variable and row percentages
!is_empty(by) && denominator == "row" ~
.one_svytable_rates_by_row(data, variable, by, deff),
# by variable and cell percentages
!is_empty(by) && denominator == "cell" ~
.one_svytable_rates_by_cell(data, variable, by, deff),
# no by variable and column/cell percentages
denominator %in% c("column", "cell") ~
.one_svytable_rates_no_by_column_and_cell(data, variable, deff),
# no by variable and row percentages
denominator == "row" ~
.one_svytable_rates_no_by_row(data, variable, deff)
)

# if a level was added, remove the fake level
if (length(variable_lvls) == 1L) {
result <- result |> dplyr::filter(.data$variable_level %in% variable_lvls)
}
if (!is_empty(by) && length(by_lvls) == 1L) {
result <- result |> dplyr::filter(.data$group1_level %in% by_lvls)
}

result
}
) |>
dplyr::bind_rows()
Expand Down Expand Up @@ -262,9 +301,9 @@ ard_categorical.survey.design <- function(data,
),
name =
str_remove_all(.data$name, "se\\.") %>%
str_remove_all("DEff\\.") %>%
str_remove_all(by) %>%
str_remove_all("`")
str_remove_all("DEff\\.") %>%
str_remove_all(by) %>%
str_remove_all("`")
) |>
tidyr::pivot_wider(names_from = "stat", values_from = "value") |>
set_names(c("variable_level", "group1_level", "p", "p.std.error", "deff")) |>
Expand Down Expand Up @@ -295,9 +334,9 @@ ard_categorical.survey.design <- function(data,
),
name =
str_remove_all(.data$name, "se\\.") %>%
str_remove_all("DEff\\.") %>%
str_remove_all(variable) %>%
str_remove_all("`")
str_remove_all("DEff\\.") %>%
str_remove_all(variable) %>%
str_remove_all("`")
) |>
tidyr::pivot_wider(names_from = "stat", values_from = "value") |>
set_names(c("group1_level", "variable_level", "p", "p.std.error", "deff")) |>
Expand Down Expand Up @@ -339,27 +378,27 @@ ard_categorical.survey.design <- function(data,

# add big N and p, then return data frame of results
switch(denominator,
"column" =
df_counts |>
dplyr::mutate(
.by = any_of("group1_level"),
N = sum(.data$n),
p = .data$n / .data$N
),
"row" =
df_counts |>
dplyr::mutate(
.by = any_of("variable_level"),
N = sum(.data$n),
p = .data$n / .data$N
),
"cell" =
df_counts |>
dplyr::mutate(
.by = any_of(c("group1_level", "variable_level")),
N = sum(.data$n),
p = .data$n / .data$N
)
"column" =
df_counts |>
dplyr::mutate(
.by = any_of("group1_level"),
N = sum(.data$n),
p = .data$n / .data$N
),
"row" =
df_counts |>
dplyr::mutate(
.by = any_of("variable_level"),
N = sum(.data$n),
p = .data$n / .data$N
),
"cell" =
df_counts |>
dplyr::mutate(
.by = any_of(c("group1_level", "variable_level")),
N = sum(.data$n),
p = .data$n / .data$N
)
)
}

Expand Down
4 changes: 1 addition & 3 deletions R/ard_continuous.survey.design.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#' @param data (`survey.design`)\cr
#' a design object often created with [`survey::svydesign()`].
#' @param variables ([`tidy-select`][dplyr::dplyr_tidy_select])\cr
#' columns to include in summaries. Default is `everything()`.
#' columns to include in summaries.
#' @param by ([`tidy-select`][dplyr::dplyr_tidy_select])\cr
#' results are calculated for **all combinations** of the columns specified,
#' including unobserved combinations and unobserved factor levels.
Expand Down Expand Up @@ -56,8 +56,6 @@ ard_continuous.survey.design <- function(data, variables, by = NULL,
check_pkg_installed(pkg = "survey", reference_pkg = "cardx")

# check inputs ---------------------------------------------------------------
check_not_missing(data)
check_class(data, cls = "survey.design")
check_not_missing(variables)

# process inputs -------------------------------------------------------------
Expand Down
146 changes: 146 additions & 0 deletions R/ard_dichotomous.survey.design.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
#' ARD Dichotomous Survey Statistics
#'
#' Compute Analysis Results Data (ARD) for dichotomous summary statistics.
#'
#' @inheritParams ard_categorical.survey.design
#' @param value (named `list`)\cr
#' named list of dichotomous values to tabulate.
#' Default is `cards::maximum_variable_value(data$variables)`,
#' which returns the largest/last value after a sort.
#'
#' @return an ARD data frame of class 'card'
#' @export
#'
#' @examples
#' survey::svydesign(ids = ~1, data = mtcars, weights = ~1) |>
#' ard_dichotomous(by = vs, variables = c(cyl, am), value = list(cyl = 4))
ard_dichotomous.survey.design <- function(data,
variables,
by = NULL,
value = cards::maximum_variable_value(data$variables[variables]),
statistic = everything() ~ c("n", "N", "p", "p.std.error", "deff", "n_unweighted", "N_unweighted", "p_unweighted"),
denominator = c("column", "row", "cell"),
fmt_fn = NULL,
stat_label = everything() ~ list(p = "%", p.std.error = "SE(%)", deff = "Design Effect",
"n" = "Unweighted n", "N" = "Unweighted N", "p" = "Unweighted %"),
...) {
set_cli_abort_call()
check_dots_empty()
check_pkg_installed(pkg = "survey", reference_pkg = "cardx")

# check inputs ---------------------------------------------------------------
check_not_missing(variables)

# process inputs -------------------------------------------------------------
cards::process_selectors(data$variables, variables = {{ variables }})
cards::process_formula_selectors(data$variables[variables], value = value)
cards::fill_formula_selectors(
data$variables[variables],
value = formals(asNamespace("cardx")[["ard_dichotomous.survey.design"]])[["value"]] |> eval()
)
.check_dichotomous_value(data$variables, value)

# return empty tibble if no variables selected -------------------------------
if (is_empty(variables)) {
return(dplyr::tibble())
}

# calculate summary statistics -----------------------------------------------
ard_categorical(
data = data,
variables = all_of(variables),
by = {{ by }},
statistic = statistic,
denominator = denominator,
fmt_fn = fmt_fn,
stat_label = stat_label
) |>
dplyr::filter(
pmap(
list(.data$variable, .data$variable_level),
function(variable, variable_level) {
variable_level %in% .env$value[[variable]]
}
) |>
unlist()
) |>
dplyr::mutate(context = "dichotomous")
}

#' Perform Value Checks
#'
#' Check the validity of the values passed in `ard_dichotomous(value)`.
#'
#' @param data (`data.frame`)\cr
#' a data frame
#' @param value (named `list`)\cr
#' a named list
#'
#' @return returns invisible if check is successful, throws an error message if not.
#' @keywords internal
#'
#' @examples
#' cardx:::.check_dichotomous_value(mtcars, list(cyl = 4))
.check_dichotomous_value <- function(data, value) {
imap(
value,
function(value, column) {
accepted_values <- .unique_and_sorted(data[[column]])
if (length(value) != 1L || !value %in% accepted_values) {
message <- "Error in argument {.arg value} for variable {.val {column}}."
cli::cli_abort(
if (length(value) != 1L) {
c(message, "i" = "The value must be one of {.val {accepted_values}}.")
} else {
c(message, "i" = "A value of {.val {value}} was passed, but must be one of {.val {accepted_values}}.")
},
call = get_cli_abort_call()
)
}
}
) |>
invisible()
}

#' ARD-flavor of unique()
#'
#' Essentially a wrapper for `unique(x) |> sort()` with `NA` levels removed.
#' For factors, all levels are returned even if they are unobserved.
#' Similarly, logical vectors always return `c(TRUE, FALSE)`, even if
#' both levels are not observed.
#'
#' @param x (`any`)\cr
#' a vector
#'
#' @return a vector
#' @keywords internal
#'
#' @examples
#' cards:::.unique_and_sorted(factor(letters[c(5, 5:1)], levels = letters))
#'
#' cards:::.unique_and_sorted(c(FALSE, TRUE, TRUE, FALSE))
#'
#' cards:::.unique_and_sorted(c(5, 5:1))
.unique_and_sorted <- function(x, useNA = c("no", "always")) {
# styler: off
useNA <- match.arg(useNA)
# if a factor return a factor that includes the same levels (including unobserved levels)
if (inherits(x, "factor")) {
return(
factor(
if (useNA == "no") levels(x)
else c(levels(x), NA_character_),
levels = levels(x)
)
)
}
if (inherits(x, "logical")) {
if (useNA == "no") return(c(TRUE, FALSE))
else return(c(TRUE, FALSE, NA))
}

# otherwise, return a simple unique and sort of the vector
if (useNA == "no") return(unique(x) |> sort())
else return(unique(x) |> sort() |> c(NA))
# styler: on
}
Loading

0 comments on commit 9c5e3e3

Please sign in to comment.