Skip to content

Commit

Permalink
added function to restore types in ARDs
Browse files Browse the repository at this point in the history
  • Loading branch information
ddsjoberg committed Nov 1, 2024
1 parent 9c0f38c commit fd17eb7
Show file tree
Hide file tree
Showing 15 changed files with 269 additions and 46 deletions.
3 changes: 2 additions & 1 deletion R/ard_categorical.survey.design.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
#' a named list, a list of formulas,
#' or a single formula where the list element is a named list of functions
#' (or the RHS of a formula),
#' e.g. `list(mpg = list(mean = \(x) round(x, digits = 2) |> as.character))`.
#' e.g. `list(mpg = list(mean = \(x) round(x, digits = 2) |> as.character()))`.
#' @param stat_label ([`formula-list-selector`][cards::syntax])\cr
#' a named list, a list of formulas, or a single formula where
#' the list element is either a named list or a list of formulas defining the
Expand Down Expand Up @@ -207,6 +207,7 @@ ard_categorical.survey.design <- function(data,

# return final object --------------------------------------------------------
cards |>
.restore_original_column_types(data = data$variables) |>
dplyr::mutate(
context = "categorical",
warning = list(NULL),
Expand Down
5 changes: 3 additions & 2 deletions R/ard_categorical_ci.survey.design.R
Original file line number Diff line number Diff line change
Expand Up @@ -145,12 +145,13 @@ ard_categorical_ci.survey.design <- function(data,
fmt_fn = map(.data$stat, ~ case_switch(is.numeric(.x) ~ 2L, .default = as.character))
) |>
cards::as_card() |>
cards::tidy_ard_column_order()
cards::tidy_ard_column_order() |>
.restore_original_column_types(data = data$variables)

# if a value was passed for the variable, subset on those results
if (!is_empty(value)) {
df_full <- df_full |>
dplyr::filter(.data$variable_level %in% .env$value)
dplyr::filter(unlist(.data$variable_level) %in% .env$value)
}

df_full
Expand Down
113 changes: 111 additions & 2 deletions R/ard_continuous.survey.design.R
Original file line number Diff line number Diff line change
Expand Up @@ -99,8 +99,8 @@ ard_continuous.survey.design <- function(data, variables, by = NULL,
)
}
) |>
dplyr::bind_rows()

dplyr::bind_rows() |>
.restore_original_column_types(data = data$variables)

# add stat_labels ------------------------------------------------------------
df_stats <-
Expand Down Expand Up @@ -319,3 +319,112 @@ accepted_svy_stats <- function(expand_quantiles = TRUE) {
)
)
}


# some operations coerce the variable types to character.
# this function will convert the `_level` values to their original types
.restore_original_column_types <- function(ard, data) {
# identify grouping variable names with associated levels --------------------
by <- character()
for (v in names(dplyr::select(ard, cards::all_ard_groups("names")))) {
if (paste0(v, "_level") %in% names(ard)) by <- c(by, ard[[v]][1]) # styler: off
}
variables <- character()
if ("variable" %in% names(ard)) {
variables <- ard[["variable"]] |> unique()
}

# if there are no levels to correct, then return ard as it is
if (is_empty(variables) && is_empty(by)) return(ard) # styler: off

# add an ID for sorting
ard$...ard_id_for_sorting... <- seq_len(nrow(ard))

# nest the raw data with original types --------------------------------------
if (!is_empty(variables)) {
if (!"variable_level" %in% names(ard)) df_variable_orginal_types <- unique(ard["variable"]) # styler: off
else if (!all(variables %in% names(data))) { # for survfit summaries, the times/probs var is not in the data
df_variable_orginal_types <- unique(ard[c("variable", "variable_level")])
}
else {
df_variable_orginal_types <-
map(
variables,
~ cards::nest_for_ard(data, by = .x, include_data = FALSE) |>
stats::setNames(c("variable", "variable_level"))
) |>
dplyr::bind_rows()
}
}
if (!is_empty(by)) {
df_by_orginal_types <-
cards::nest_for_ard(data, by = by, include_data = FALSE)
}

# combine groups and variables together
if (!is_empty(variables) && !is_empty(by)) {
df_original_types <-
dplyr::cross_join(df_by_orginal_types, df_variable_orginal_types)
}
else if (!is_empty(variables)) {
df_original_types <- df_variable_orginal_types
}
else if (!is_empty(by)) {
df_original_types <- df_by_orginal_types
}

# unlisting the sorting according the character value
df_original_types <- df_original_types |>
dplyr::arrange(across(everything(), ~map(., as.character) |> unlist(.)))

ard_nested <- ard |>
tidyr::nest(..ard_data... = -c(cards::all_ard_groups(), cards::all_ard_variables())) |>
dplyr::arrange(across(c(cards::all_ard_groups(), cards::all_ard_variables()), unlist))

# if all columns match, then replace the coerced character cols with their original type/class
all_cols_equal <-
every(
names(df_original_types) |> setdiff("variable_level"),
~ all(
unlist(ard_nested[[.x]]) == as.character(unlist(df_original_types[[.x]])) |
(is.na(unlist(ard_nested[[.x]])) & is.na(unlist(df_original_types[[.x]])))
)
)
# the variable level needs to be handled separately because there can be mixed type and we can't unlist
if (isTRUE(all_cols_equal) && "variable_level" %in% names(df_original_types)) {
all_cols_equal <-
seq_len(nrow(df_original_types)) |>
map_lgl(
~identical(
as.character(df_original_types[["variable_level"]][[.x]]),
as.character(ard_nested[["variable_level"]][[.x]])
)
) |>
all()
}

if (isTRUE(all_cols_equal)) {
return(
dplyr::bind_cols(
df_original_types,
dplyr::select(ard_nested, -all_of(names(df_original_types))),
.name_repair = "minimal"
) |>
tidyr::unnest(cols = "..ard_data...") |>
dplyr::arrange(.data$...ard_id_for_sorting...) |>
dplyr::select(-"...ard_id_for_sorting...") |>
cards::as_card()
)
}

# I hope this message is never triggered!
cli::cli_inform(c(
"If you see this message, variable levels have been coerced to character, which could cause downstream issues.",
"*" = "Please post a reproducible example to {.url https://github.com/insightsengineering/cardx/issues/new}, so we can address in the next release.",
"i" = "You can create a minimal reproducible example with {.fun reprex::reprex}."
))

ard |>
dplyr::arrange(.data$...ard_id_for_sorting...) |>
dplyr::select(-"...ard_id_for_sorting...")
}
3 changes: 2 additions & 1 deletion R/ard_continuous_ci.survey.design.R
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,8 @@ ard_continuous_ci.survey.design <- function(data,
method = method,
df = df,
...
)
) |>
.restore_original_column_types(data = data$variables)
}

.calculate_ard_continuous_survey_ci <- function(FUN, data, variables, by, conf.level, ...) {
Expand Down
3 changes: 2 additions & 1 deletion R/ard_survival_survfit.R
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,8 @@ ard_survival_survfit.data.frame <- function(x, y, variables,
package = "survival",
method.args = {{ method.args }}
) |>
ard_survival_survfit(times = times, probs = probs, type = type)
ard_survival_survfit(times = times, probs = probs, type = type) |>
.restore_original_column_types(data = x)
}

#' Process Survival Fit For Time Estimates
Expand Down
2 changes: 1 addition & 1 deletion man/ard_categorical.survey.design.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/ard_dichotomous.survey.design.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/ard_missing.survey.design.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

48 changes: 25 additions & 23 deletions tests/testthat/_snaps/ard_categorical_ci.survey.design.md
Original file line number Diff line number Diff line change
@@ -1,28 +1,30 @@
# ard_categorical_ci(data)

Code
dplyr::select(as.data.frame(ard_categorical_ci(dclus1, variables = c(both,
awards))), -warning, -error, -fmt_fn, -context)
dplyr::select(ard_categorical_ci(dclus1, variables = c(both, awards)), -warning,
-error, -fmt_fn, -context)
Message
{cards} data frame: 20 x 5
Output
variable variable_level stat_name stat_label stat
1 both No estimate estimate 0.273224
2 both No conf.low conf.low 0.2131745
3 both No conf.high conf.high 0.342819
4 both No method method logit
5 both No conf.level conf.level 0.95
6 both Yes estimate estimate 0.726776
7 both Yes conf.low conf.low 0.657181
8 both Yes conf.high conf.high 0.7868255
9 both Yes method method logit
10 both Yes conf.level conf.level 0.95
11 awards No estimate estimate 0.2896175
12 awards No conf.low conf.low 0.2241835
13 awards No conf.high conf.high 0.3651608
14 awards No method method logit
15 awards No conf.level conf.level 0.95
16 awards Yes estimate estimate 0.7103825
17 awards Yes conf.low conf.low 0.6348392
18 awards Yes conf.high conf.high 0.7758165
19 awards Yes method method logit
20 awards Yes conf.level conf.level 0.95
variable variable_level stat_name stat_label stat
1 both No estimate estimate 0.273
2 both No conf.low conf.low 0.213
3 both No conf.high conf.high 0.343
4 both No method method logit
5 both No conf.level conf.lev… 0.95
6 both Yes estimate estimate 0.727
7 both Yes conf.low conf.low 0.657
8 both Yes conf.high conf.high 0.787
9 both Yes method method logit
10 both Yes conf.level conf.lev… 0.95
11 awards No estimate estimate 0.29
12 awards No conf.low conf.low 0.224
13 awards No conf.high conf.high 0.365
14 awards No method method logit
15 awards No conf.level conf.lev… 0.95
16 awards Yes estimate estimate 0.71
17 awards Yes conf.low conf.low 0.635
18 awards Yes conf.high conf.high 0.776
19 awards Yes method method logit
20 awards Yes conf.level conf.lev… 0.95

2 changes: 1 addition & 1 deletion tests/testthat/_snaps/ard_survival_survfit.md
Original file line number Diff line number Diff line change
Expand Up @@ -364,7 +364,7 @@
# ard_survival_survfit.data.frame() works as expected

Code
res_quo <- print(dplyr::mutate(ard_survival_survfit.data.frame(x = mtcars, y = "survival::Surv(mpg, am)",
res_quo <- print(dplyr::mutate(ard_survival_survfit(x = mtcars, y = "survival::Surv(mpg, am)",
variables = "vs", times = 20, method.args = list(start.time = 0, id = cyl)),
stat = lapply(stat, function(x) ifelse(is.numeric(x), cards::round5(x, 3), x))),
n = Inf)
Expand Down
44 changes: 44 additions & 0 deletions tests/testthat/test-ard_categorical.survey.design.R
Original file line number Diff line number Diff line change
Expand Up @@ -1324,3 +1324,47 @@ test_that("ard_categorical follows ard structure", {
cards::check_ard_structure(method = FALSE)
)
})

test_that("ard_categorical.survey.design() original types are retained", {
svy_titanic <-
survey::svydesign(
~1,
data = as.data.frame(Titanic) |> dplyr::mutate(Class.dbl = as.numeric(Class),
Class.int = as.integer(Class)),
weights = ~Freq
)

# factors and integer check
expect_silent(
ard <-
ard_categorical(svy_titanic, variables = c(Class, Age, Class.int, Class.dbl), by = Survived)
)
expect_equal(
unlist(ard$group1_level) |> levels(),
levels(as.data.frame(Titanic)$Survived)
)
expect_true(
dplyr::filter(ard, variable %in% "Class") |>
dplyr::pull("variable_level") |>
getElement(1L) |>
is.factor()
)
expect_true(
dplyr::filter(ard, variable %in% "Age") |>
dplyr::pull("variable_level") |>
getElement(1L) |>
is.factor()
)
expect_true(
dplyr::filter(ard, variable %in% "Class.int") |>
dplyr::pull("variable_level") |>
getElement(1L) |>
is.integer()
)
expect_true(
dplyr::filter(ard, variable %in% "Class.dbl") |>
dplyr::pull("variable_level") |>
getElement(1L) |>
is.numeric()
)
})
31 changes: 25 additions & 6 deletions tests/testthat/test-ard_categorical_ci.survey.design.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ dclus1 <- survey::svydesign(id = ~dnum, weights = ~pw, data = apiclus1, fpc = ~f
test_that("ard_categorical_ci(data)", {
expect_snapshot(
ard_categorical_ci(dclus1, variables = c(both, awards)) |>
as.data.frame() |>
dplyr::select(-warning, -error, -fmt_fn, -context)
)
})
Expand All @@ -17,7 +16,7 @@ test_that("ard_categorical_ci(variables)", {
)

expect_equal(
cards::get_ard_statistics(ard, variable %in% "both", variable_level %in% "No")[c("estimate", "conf.low", "conf.high")],
cards::get_ard_statistics(ard, variable %in% "both", map(variable_level, as.character) %in% "No")[c("estimate", "conf.low", "conf.high")],
survey::svyciprop(~ I(both == "No"), design = dclus1, method = "logit", level = 0.95) %>%
{c(as.list(.), as.list(attr(., "ci")))} |> # styler: off
set_names(c("estimate", "conf.low", "conf.high"))
Expand Down Expand Up @@ -56,9 +55,9 @@ test_that("ard_categorical_ci(by)", {
expect_equal(
cards::get_ard_statistics(
ard,
group1_level %in% "No",
map(group1_level, as.character) %in% "No",
variable %in% "both",
variable_level %in% "No",
map(variable_level, as.character) %in% "No",
stat_name %in% c("estimate", "conf.low", "conf.high")
),
survey::svyciprop(~ I(both == "No"), design = dclus1 |> subset(sch.wide == "No")) %>%
Expand Down Expand Up @@ -92,7 +91,7 @@ test_that("ard_categorical_ci(conf.level)", {
)

expect_equal(
cards::get_ard_statistics(ard, variable %in% "both", variable_level == "No", stat_name %in% c("estimate", "conf.low", "conf.high")),
cards::get_ard_statistics(ard, variable %in% "both", map(variable_level, as.character) %in% "No", stat_name %in% c("estimate", "conf.low", "conf.high")),
survey::svyciprop(~ I(both == "No"), design = dclus1, level = 0.80, df = survey::degf(dclus1)) %>%
{c(as.list(.), as.list(attr(., "ci")))} |> # styler: off
set_names(c("estimate", "conf.low", "conf.high"))
Expand All @@ -105,11 +104,31 @@ test_that("ard_categorical_ci(method)", {
)

expect_equal(
cards::get_ard_statistics(ard, variable %in% "both", variable_level == "No", stat_name %in% c("estimate", "conf.low", "conf.high")),
cards::get_ard_statistics(ard, variable %in% "both", map(variable_level, as.character) %in% "No", stat_name %in% c("estimate", "conf.low", "conf.high")),
survey::svyciprop(~ I(both == "No"), design = dclus1, method = "likelihood", df = survey::degf(dclus1)) %>%
{c(as.list(.), as.list(attr(., "ci")))} |> # styler: off
set_names(c("estimate", "conf.low", "conf.high"))
)

# check type
expect_true(ard$variable_level |> unique() |> map_lgl(is.factor) |> all())
})

test_that("ard_categorical_ci(value)", {
data(api, package = "survey")
dclus1 <- survey::svydesign(id = ~dnum, weights = ~pw, data = apiclus1, fpc = ~fpc)

expect_equal(
ard_categorical_ci(dclus1, variables = sch.wide, value = sch.wide ~ "Yes", method = "xlogit"),
ard_categorical_ci(dclus1, variables = sch.wide, method = "xlogit") |>
dplyr::filter(unlist(variable_level) %in% "Yes")
)

expect_equal(
ard_categorical_ci(dclus1, variables = c(sch.wide, both), value = list(sch.wide ~ "Yes", both ~ "Yes"), method = "xlogit"),
ard_categorical_ci(dclus1, variables = c(sch.wide, both), method = "xlogit") |>
dplyr::filter(map(variable_level, as.character) %in% "Yes")
)
})

test_that("ard_categorical_ci.survey.design() follows ard structure", {
Expand Down
Loading

0 comments on commit fd17eb7

Please sign in to comment.