Skip to content

Commit

Permalink
Catching errors for mismatches in alt_counts_df splits (#721)
Browse files Browse the repository at this point in the history
* error catching

* adding mix cases

* feat: we need this only in the error

* adding more explicit check

* news

* correction

* chore: adding ""

* adding more things and discovering a possible issue

* spaces fix

* Update R/tt_dotabulation.R

Co-authored-by: Joe Zhu <[email protected]>
Signed-off-by: Davide Garolini <[email protected]>

---------

Signed-off-by: Davide Garolini <[email protected]>
Signed-off-by: Davide Garolini <[email protected]>
Co-authored-by: Joe Zhu <[email protected]>
  • Loading branch information
Melkiades and shajoezhu authored Aug 29, 2023
1 parent 25600cb commit 7c23ddf
Show file tree
Hide file tree
Showing 4 changed files with 95 additions and 0 deletions.
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
* Cleaned up spelling in documentation ([#685](https://github.com/insightsengineering/rtables/issues/685))
* Added `qtable_layout` and fixed `qtable` labeling via `row_labels` ([#698](https://github.com/insightsengineering/rtables/issues/698))
* Added vignette on exploratory analysis with `qtable`.
* Error catching and test coverage for cases where `alt_counts_df` presents different splits from `df`.

## rtables 0.6.2
* Fixed major regressions for `page_by` machinery caused by migration to `formatters` 0.5.1 pagination framework.
Expand Down
34 changes: 34 additions & 0 deletions R/tt_dotabulation.R
Original file line number Diff line number Diff line change
Expand Up @@ -753,12 +753,46 @@ setMethod(".make_split_kids", "Split",
spl_context = spl_context)[["datasplit"]],
error = function(e) e)

# Removing NA rows - to explore why this happens at all in a split
# This would be a fix but it is done in post-processing instead of pre-proc -> xxx
# x alt_dfpart <- lapply(alt_dfpart, function(data) {
# x data[!apply(is.na(data), 1, all), ]
# x })

# Error localization
if (is(alt_dfpart, "error")) {
stop("Following error encountered in splitting alt_counts_df: ",
alt_dfpart$message,
call. = FALSE)
}
# Error if split does not have the same values in the alt_df (and order)
# The following breaks if there are different levels (do_split returns empty list)
# or if there are different number of the same levels. Added handling of NAs
# in the values of the factor when is all only NAs
is_all_na <- all(is.na(alt_df[[spl_payload(spl)]]))

if (!all(names(dataspl) %in% names(alt_dfpart)) ||
length(alt_dfpart) != length(dataspl) ||
is_all_na) {
alt_df_spl_vals <- unique(alt_df[[spl_payload(spl)]])
end_part <- ""

if (!all(alt_df_spl_vals %in% levels(alt_df_spl_vals))) {
end_part <- paste0(" and following levels: ",
paste_vec(levels(alt_df_spl_vals)))
}

if (is_all_na) {
end_part <- ". Found only NAs in alt_counts_df split"
}

stop("alt_counts_df split variable(s) [", spl_payload(spl),
"] (in split ", as.character(class(spl)),
") does not have the same factor levels of df.\ndf has c(", '"',
paste(names(dataspl), collapse = '", "'), '"', ") levels while alt_counts_df has ",
ifelse(length(alt_df_spl_vals) > 0, paste_vec(alt_df_spl_vals), ""),
" unique values", end_part)
}
} else {
alt_dfpart <- setNames(rep(list(NULL), length(dataspl)), names(dataspl))
}
Expand Down
5 changes: 5 additions & 0 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -97,3 +97,8 @@ spl_context_to_disp_path <- function(ctx) {
ret <- "root"
ret
}

# Utility function to paste vector of values in a nice way
paste_vec <- function(vec) {
paste0('c("', paste(vec, collapse = '", "'), '")')
}
55 changes: 55 additions & 0 deletions tests/testthat/test-tab_afun_cfun.R
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,61 @@ test_that("Error localization for missing split variable when done in alt_count_
expect_silent(lyt_row %>% build_table(ex_adsl, alt_counts_df = DM))
})

test_that("Error localization for missmatch split variable when done in alt_count_df", {
afun_tmp <- function(x, .alt_df_row, .spl_context,...) {
# Important check that order is aligned even if source levels are not
check_val <- unique(.alt_df_row$ARMCD)
# This is something mysterious happening in splits for which if the values are all
# NAs in the split column, the dataspl has the nrow of the data in NA rows. xxx ToFix
check_val <- check_val[!is.na(check_val)]
stopifnot(as.character(check_val) == .spl_context$value[2])
mean(x)
}
lyt_row <- basic_table() %>% split_rows_by("ARMCD") %>% analyze("BMRKR1", afun = afun_tmp)

# Mismatch in the number of splits (NA is 0)
DM_tmp <- DM %>% mutate("ARMCD" = NA_character_)
expect_error(lyt_row %>% build_table(ex_adsl, alt_counts_df = DM_tmp),
regexp = paste0("alt_counts_df split variable\\(s\\) \\[ARMCD\\] *"))

# Mismatch of levels
armcd_col <- factor(sample(c("arm A", "arm B", "arm C"), nrow(DM), replace = TRUE))
DM_tmp <- DM %>% mutate("ARMCD" = armcd_col)
expect_error(lyt_row %>% build_table(ex_adsl, alt_counts_df = DM_tmp),
regexp = paste0("alt_counts_df split variable\\(s\\) \\[ARMCD\\] *"))

# Mix mismatch of levels
armcd_col <- factor(sample(c("arm A", "ARM B", "ARM C"), nrow(DM), replace = TRUE))
DM_tmp <- DM %>% mutate("ARMCD" = armcd_col)
expect_error(lyt_row %>% build_table(ex_adsl, alt_counts_df = DM_tmp),
regexp = paste0("alt_counts_df split variable\\(s\\) \\[ARMCD\\] *"))

# Mismatch in the number of levels
armcd_col2 <- factor(sample(levels(ex_adsl$ARMCD)[c(1, 2)], nrow(DM), replace = TRUE))
DM_tmp <- DM %>% mutate("ARMCD" = armcd_col2)
expect_error(lyt_row %>% build_table(ex_adsl, alt_counts_df = DM_tmp),
regexp = paste0("alt_counts_df split variable\\(s\\) \\[ARMCD\\] *"))

# Another order -> should work? yes, split is valid
levels(armcd_col) <- levels(ex_adsl$ARMCD)[c(1, 3, 2)]
DM_tmp <- DM %>% mutate("ARMCD" = armcd_col)
expect_silent(lyt_row %>% build_table(ex_adsl, alt_counts_df = DM_tmp))

# Mix mismatch of levels but covering them all -> valid split
armcd_col <- factor(sample(c("arm A", levels(ex_adsl$ARMCD)), nrow(DM), replace = TRUE))
DM_tmp <- DM %>% mutate("ARMCD" = armcd_col)
expect_silent(lyt_row %>% build_table(ex_adsl, alt_counts_df = DM_tmp))

# Values are all NA, but the levels are correct
DM_tmp$ARMCD <- factor(NA, levels = levels(ex_adsl$ARMCD))
expect_error(lyt_row %>% build_table(ex_adsl, alt_counts_df = DM_tmp),
regexp = paste0("alt_counts_df split variable\\(s\\) \\[ARMCD\\] *"))

DM_tmp$ARMCD <- factor(NA, levels = levels(ex_adsl$ARMCD))
DM_tmp$ARMCD[seq_along(levels(ex_adsl$ARMCD))] <- levels(ex_adsl$ARMCD)
expect_silent(lyt_row %>% build_table(ex_adsl, alt_counts_df = DM_tmp))
})

context("Content functions (cfun)")

test_that(".alt_df_row appears in cfun but not in afun.", {
Expand Down

0 comments on commit 7c23ddf

Please sign in to comment.