diff --git a/NEWS.md b/NEWS.md index 166c564f9..00c7cdaca 100644 --- a/NEWS.md +++ b/NEWS.md @@ -16,6 +16,7 @@ * Cleaned up spelling in documentation ([#685](https://github.com/insightsengineering/rtables/issues/685)) * Added `qtable_layout` and fixed `qtable` labeling via `row_labels` ([#698](https://github.com/insightsengineering/rtables/issues/698)) * Added vignette on exploratory analysis with `qtable`. + * Error catching and test coverage for cases where `alt_counts_df` presents different splits from `df`. ## rtables 0.6.2 * Fixed major regressions for `page_by` machinery caused by migration to `formatters` 0.5.1 pagination framework. diff --git a/R/tt_dotabulation.R b/R/tt_dotabulation.R index 57f56a70a..2ab9dc79a 100644 --- a/R/tt_dotabulation.R +++ b/R/tt_dotabulation.R @@ -753,12 +753,46 @@ setMethod(".make_split_kids", "Split", spl_context = spl_context)[["datasplit"]], error = function(e) e) + # Removing NA rows - to explore why this happens at all in a split + # This would be a fix but it is done in post-processing instead of pre-proc -> xxx + # x alt_dfpart <- lapply(alt_dfpart, function(data) { + # x data[!apply(is.na(data), 1, all), ] + # x }) + # Error localization if (is(alt_dfpart, "error")) { stop("Following error encountered in splitting alt_counts_df: ", alt_dfpart$message, call. = FALSE) } + # Error if split does not have the same values in the alt_df (and order) + # The following breaks if there are different levels (do_split returns empty list) + # or if there are different number of the same levels. Added handling of NAs + # in the values of the factor when is all only NAs + is_all_na <- all(is.na(alt_df[[spl_payload(spl)]])) + + if (!all(names(dataspl) %in% names(alt_dfpart)) || + length(alt_dfpart) != length(dataspl) || + is_all_na) { + alt_df_spl_vals <- unique(alt_df[[spl_payload(spl)]]) + end_part <- "" + + if (!all(alt_df_spl_vals %in% levels(alt_df_spl_vals))) { + end_part <- paste0(" and following levels: ", + paste_vec(levels(alt_df_spl_vals))) + } + + if (is_all_na) { + end_part <- ". Found only NAs in alt_counts_df split" + } + + stop("alt_counts_df split variable(s) [", spl_payload(spl), + "] (in split ", as.character(class(spl)), + ") does not have the same factor levels of df.\ndf has c(", '"', + paste(names(dataspl), collapse = '", "'), '"', ") levels while alt_counts_df has ", + ifelse(length(alt_df_spl_vals) > 0, paste_vec(alt_df_spl_vals), ""), + " unique values", end_part) + } } else { alt_dfpart <- setNames(rep(list(NULL), length(dataspl)), names(dataspl)) } diff --git a/R/utils.R b/R/utils.R index fd56eeb30..70c4e87dc 100644 --- a/R/utils.R +++ b/R/utils.R @@ -97,3 +97,8 @@ spl_context_to_disp_path <- function(ctx) { ret <- "root" ret } + +# Utility function to paste vector of values in a nice way +paste_vec <- function(vec) { + paste0('c("', paste(vec, collapse = '", "'), '")') +} diff --git a/tests/testthat/test-tab_afun_cfun.R b/tests/testthat/test-tab_afun_cfun.R index efa5bf9c0..021687823 100644 --- a/tests/testthat/test-tab_afun_cfun.R +++ b/tests/testthat/test-tab_afun_cfun.R @@ -185,6 +185,61 @@ test_that("Error localization for missing split variable when done in alt_count_ expect_silent(lyt_row %>% build_table(ex_adsl, alt_counts_df = DM)) }) +test_that("Error localization for missmatch split variable when done in alt_count_df", { + afun_tmp <- function(x, .alt_df_row, .spl_context,...) { + # Important check that order is aligned even if source levels are not + check_val <- unique(.alt_df_row$ARMCD) + # This is something mysterious happening in splits for which if the values are all + # NAs in the split column, the dataspl has the nrow of the data in NA rows. xxx ToFix + check_val <- check_val[!is.na(check_val)] + stopifnot(as.character(check_val) == .spl_context$value[2]) + mean(x) + } + lyt_row <- basic_table() %>% split_rows_by("ARMCD") %>% analyze("BMRKR1", afun = afun_tmp) + + # Mismatch in the number of splits (NA is 0) + DM_tmp <- DM %>% mutate("ARMCD" = NA_character_) + expect_error(lyt_row %>% build_table(ex_adsl, alt_counts_df = DM_tmp), + regexp = paste0("alt_counts_df split variable\\(s\\) \\[ARMCD\\] *")) + + # Mismatch of levels + armcd_col <- factor(sample(c("arm A", "arm B", "arm C"), nrow(DM), replace = TRUE)) + DM_tmp <- DM %>% mutate("ARMCD" = armcd_col) + expect_error(lyt_row %>% build_table(ex_adsl, alt_counts_df = DM_tmp), + regexp = paste0("alt_counts_df split variable\\(s\\) \\[ARMCD\\] *")) + + # Mix mismatch of levels + armcd_col <- factor(sample(c("arm A", "ARM B", "ARM C"), nrow(DM), replace = TRUE)) + DM_tmp <- DM %>% mutate("ARMCD" = armcd_col) + expect_error(lyt_row %>% build_table(ex_adsl, alt_counts_df = DM_tmp), + regexp = paste0("alt_counts_df split variable\\(s\\) \\[ARMCD\\] *")) + + # Mismatch in the number of levels + armcd_col2 <- factor(sample(levels(ex_adsl$ARMCD)[c(1, 2)], nrow(DM), replace = TRUE)) + DM_tmp <- DM %>% mutate("ARMCD" = armcd_col2) + expect_error(lyt_row %>% build_table(ex_adsl, alt_counts_df = DM_tmp), + regexp = paste0("alt_counts_df split variable\\(s\\) \\[ARMCD\\] *")) + + # Another order -> should work? yes, split is valid + levels(armcd_col) <- levels(ex_adsl$ARMCD)[c(1, 3, 2)] + DM_tmp <- DM %>% mutate("ARMCD" = armcd_col) + expect_silent(lyt_row %>% build_table(ex_adsl, alt_counts_df = DM_tmp)) + + # Mix mismatch of levels but covering them all -> valid split + armcd_col <- factor(sample(c("arm A", levels(ex_adsl$ARMCD)), nrow(DM), replace = TRUE)) + DM_tmp <- DM %>% mutate("ARMCD" = armcd_col) + expect_silent(lyt_row %>% build_table(ex_adsl, alt_counts_df = DM_tmp)) + + # Values are all NA, but the levels are correct + DM_tmp$ARMCD <- factor(NA, levels = levels(ex_adsl$ARMCD)) + expect_error(lyt_row %>% build_table(ex_adsl, alt_counts_df = DM_tmp), + regexp = paste0("alt_counts_df split variable\\(s\\) \\[ARMCD\\] *")) + + DM_tmp$ARMCD <- factor(NA, levels = levels(ex_adsl$ARMCD)) + DM_tmp$ARMCD[seq_along(levels(ex_adsl$ARMCD))] <- levels(ex_adsl$ARMCD) + expect_silent(lyt_row %>% build_table(ex_adsl, alt_counts_df = DM_tmp)) +}) + context("Content functions (cfun)") test_that(".alt_df_row appears in cfun but not in afun.", {