From b1947727ecf3d1c5b96fb18254f4dcaedd52632e Mon Sep 17 00:00:00 2001 From: sl-eeper Date: Mon, 11 Nov 2024 07:48:00 +0000 Subject: [PATCH 1/4] add countby option in forestcox, add pairwise option in tableone --- R/CreateTableOneJS.R | 213 ++++++--- R/forestcox.R | 1000 +++++++++++++++++++++++++----------------- 2 files changed, 735 insertions(+), 478 deletions(-) diff --git a/R/CreateTableOneJS.R b/R/CreateTableOneJS.R index e65aeb4..aabca0a 100644 --- a/R/CreateTableOneJS.R +++ b/R/CreateTableOneJS.R @@ -50,23 +50,23 @@ CreateTableOne2 <- function(data, strata, vars, factorVars, includeNA = F, test testNormal = oneway.test, argsNormal = list(var.equal = F), testNonNormal = kruskal.test, argsNonNormal = list(NULL), showAllLevels = T, printToggle = F, quote = F, smd = F, Labels = F, exact = NULL, nonnormal = NULL, - catDigits = 1, contDigits = 2, pDigits = 3, labeldata = NULL, minMax = F, showpm = T, addOverall = F) { + catDigits = 1, contDigits = 2, pDigits = 3, labeldata = NULL, minMax = F, showpm = T, addOverall = F, pairwise = F) { setkey <- variable <- level <- . <- val_label <- NULL - + if (length(strata) != 1) { stop("Please select only 1 strata") } - + vars.ex <- names(which(sapply(vars, function(x) { !(class(data[[x]]) %in% c("integer", "numeric", "factor", "character")) }))) - + if (length(vars.ex) > 0) { warning("Variables other than numeric or factor types are excluded.") vars <- setdiff(vars, vars.ex) } - - + + res <- tableone::CreateTableOne( vars = vars, strata = strata, data = data, factorVars = factorVars, includeNA = includeNA, test = test, testApprox = testApprox, argsApprox = argsApprox, @@ -74,10 +74,10 @@ CreateTableOne2 <- function(data, strata, vars, factorVars, includeNA = F, test testNormal = testNormal, argsNormal = argsNormal, testNonNormal = testNonNormal, argsNonNormal = argsNonNormal, smd = smd, addOverall = addOverall ) - + # factor_vars <- vars[sapply(vars, function(x){class(data[[x]]) %in% c("factor", "character")})] factor_vars <- res[["MetaData"]][["varFactors"]] - + if (Labels & !is.null(labeldata)) { labelled::var_label(data) <- sapply(names(data), function(v) { as.character(labeldata[get("variable") == v, "var_label"][1]) @@ -91,41 +91,41 @@ CreateTableOne2 <- function(data, strata, vars, factorVars, includeNA = F, test res0$CatTable[[i]][[j]]$level <- labeldata[.(j, lvs), val_label] } } - + ptb1.res0 <- print(res0, - showAllLevels = showAllLevels, printToggle = printToggle, quote = quote, varLabels = Labels, nonnormal = nonnormal, - catDigits = catDigits, contDigits = contDigits, minMax = minMax + showAllLevels = showAllLevels, printToggle = printToggle, quote = quote, varLabels = Labels, nonnormal = nonnormal, + catDigits = catDigits, contDigits = contDigits, minMax = minMax ) ptb1.rn <- rownames(ptb1.res0) ptb1.rn <- gsub("(mean (SD))", "", ptb1.rn, fixed = T) } - + vars.fisher <- sapply(factor_vars, function(x) { is(tryCatch(chisq.test(table(data[[strata]], data[[x]])), error = function(e) e, warning = function(w) w), "warning") }) vars.fisher <- factor_vars[unlist(vars.fisher)] - + if (is.null(exact) & length(vars.fisher) > 0) { exact <- vars.fisher } - + ptb1 <- print(res, - showAllLevels = showAllLevels, printToggle = printToggle, quote = quote, smd = smd, varLabels = Labels, nonnormal = nonnormal, exact = exact, - catDigits = catDigits, contDigits = contDigits, pDigits = pDigits, minMax = minMax + showAllLevels = showAllLevels, printToggle = printToggle, quote = quote, smd = smd, varLabels = Labels, nonnormal = nonnormal, exact = exact, + catDigits = catDigits, contDigits = contDigits, pDigits = pDigits, minMax = minMax ) if (showpm) { ptb1[grepl("\\(mean \\(SD\\)\\)", rownames(ptb1)), ] <- gsub("\\(", "\u00B1 ", ptb1[grepl("\\(mean \\(SD\\)\\)", rownames(ptb1)), ]) ptb1[grepl("\\(mean \\(SD\\)\\)", rownames(ptb1)), ] <- gsub("\\)", "", ptb1[grepl("\\(mean \\(SD\\)\\)", rownames(ptb1)), ]) } - + rownames(ptb1) <- gsub("(mean (SD))", "", rownames(ptb1), fixed = T) if (Labels & !is.null(labeldata)) { rownames(ptb1) <- ptb1.rn if (showAllLevels == T) ptb1[, 1] <- ptb1.res0[, 1] } - + # cap.tb1 = paste("Table 1: Stratified by ", strata, sep="") - + if (Labels & !is.null(labeldata)) { colname.group_var <- unlist(labeldata[.(strata, names(res$CatTable)), val_label]) if (is.na(colname.group_var[1]) & addOverall) { @@ -140,7 +140,83 @@ CreateTableOne2 <- function(data, strata, vars, factorVars, includeNA = F, test # ptb1[,1] = vals.tb1 # cap.tb1 = paste("Table 1: Stratified by ", labeldata[variable == strata, "var_label"][1], sep="") } - + + if (pairwise && length(unique(data[[strata]])) > 2) { + # 정규성 테스트 수행 + normality_results <- sapply(vars, function(x) { + is_continuous <- is.numeric(data[[x]]) || is.integer(data[[x]]) + if (is_continuous && nrow(data) <= 5000) { + shapiro_test_result <- tryCatch(stats::shapiro.test(data[[x]])$p.value, error = function(e) NA) + return(!is.na(shapiro_test_result) && shapiro_test_result >= 0.05) # p >= 0.05면 정규성 만족 + } else { + return(TRUE) # 큰 샘플이거나 정규성 검사가 불가할 때는 정규성 만족으로 처리 + } + }) + + pairwise_comparisons <- combn(colnames(ptb1)[(which(colnames(ptb1) == "level") + 1):(which(colnames(ptb1) == "p") - 1)], 2, simplify = FALSE) + pairwise_pvalues_list <- lapply(vars, function(x) { + sapply(pairwise_comparisons, function(pair) { + subset_data <- data[data[[strata]] %in% pair, ] + is_continuous <- is.numeric(data[[x]]) || is.integer(data[[x]]) + test_result <- if (is_continuous) { + if (normality_results[x]) { + tryCatch({ + test <- t.test(subset_data[[x]] ~ subset_data[[strata]], var.equal = FALSE) # Welch's t-test + list(p_value = test$p.value, test_used = "t-test") + }, error = function(e) { + list(p_value = NA, test_used = NA) + }) + } else { + tryCatch({ + test <- wilcox.test(subset_data[[x]] ~ subset_data[[strata]]) + list(p_value = test$p.value, test_used = "wilcox") + }, error = function(e) { + list(p_value = NA, test_used = NA) + }) + } + } else { + # 범주형 데이터에 대해 Chi-square 또는 Fisher's exact test 수행 + tryCatch({ + test <- chisq.test(table(subset_data[[strata]], subset_data[[x]])) + list(p_value = test$p.value, test_used = "chisq") + }, warning = function(w) { + test <- fisher.test(table(subset_data[[strata]], subset_data[[x]])) + list(p_value = test$p.value, test_used = "exact") + }, error = function(e) { + list(p_value = NA, test_used = NA) + }) + } + return(test_result) + }, simplify = FALSE) + }) + names(pairwise_pvalues_list) <- vars + + for (i in seq_along(pairwise_comparisons)) { + col_name <- paste0("p(", pairwise_comparisons[[i]][1], "vs", pairwise_comparisons[[i]][2], ")") + test_name <- paste0("test(", pairwise_comparisons[[i]][1], "vs", pairwise_comparisons[[i]][2], ")") + ptb1 <- cbind(ptb1, col_name = "", test_name = "") + colnames(ptb1)[ncol(ptb1) - 1] <- col_name + colnames(ptb1)[ncol(ptb1)] <- test_name + } + + for (i in seq_along(pairwise_comparisons)) { + col_name <- paste0("p(", pairwise_comparisons[[i]][1], "vs", pairwise_comparisons[[i]][2], ")") + test_name <- paste0("test(", pairwise_comparisons[[i]][1], "vs", pairwise_comparisons[[i]][2], ")") + for (x in vars) { + p_value <- pairwise_pvalues_list[[x]][[i]]$p_value + test_used <- pairwise_pvalues_list[[x]][[i]]$test_used + cleaned_row_names <- gsub("\\s+|\\(\\%\\)", "", rownames(ptb1)) + cleaned_var_name <- gsub("\\s+|\\(\\%\\)", "", x) + first_row <- which(cleaned_row_names == cleaned_var_name)[1] + p_value <- ifelse(p_value < 0.001, "<0.001", as.character(round(p_value, 2))) + ptb1[first_row, col_name] <- p_value + ptb1[first_row, test_name] <- test_used + } + } + } + cols_to_remove <- grep("^test\\(", colnames(ptb1)) + ptb1 <- ptb1[, -cols_to_remove] + sig <- ifelse(ptb1[, "p"] == "<0.001", "0", ptb1[, "p"]) sig <- as.numeric(as.vector(sig)) sig <- ifelse(sig <= 0.05, "**", "") @@ -204,14 +280,14 @@ CreateTableOneJS <- function(vars, strata = NULL, strata2 = NULL, data, factorVa testNonNormal = kruskal.test, argsNonNormal = list(NULL), showAllLevels = T, printToggle = F, quote = F, smd = F, Labels = F, exact = NULL, nonnormal = NULL, catDigits = 1, contDigits = 2, pDigits = 3, labeldata = NULL, psub = T, minMax = F, showpm = T, - addOverall = F, normalityTest = F) { + addOverall = F, normalityTest = F, pairwise = F) { . <- level <- variable <- val_label <- V1 <- V2 <- NULL # if (Labels & !is.null(labeldata)){ # var_label(data) = sapply(names(data), function(v){as.character(labeldata[get("variable") == v, "var_label"][1])}, simplify = F) # vals.tb1 = c(NA, unlist(sapply(vars, function(v){labeldata[get("variable") == v, "val_label"]}))) # } data <- data - + # 모든 변수에 대해 shapiro test 수행 if (normalityTest == T) { if (nrow(data) > 5000) { @@ -226,7 +302,7 @@ CreateTableOneJS <- function(vars, strata = NULL, strata2 = NULL, data, factorVa })] } } - + if (is.null(strata)) { if (Labels & !is.null(labeldata)) { labelled::var_label(data) <- sapply(names(data), function(v) { @@ -234,7 +310,7 @@ CreateTableOneJS <- function(vars, strata = NULL, strata2 = NULL, data, factorVa }, simplify = F) data.table::setkey(labeldata, variable, level) } - + res <- tableone::CreateTableOne( vars = vars, data = data, factorVars = factorVars, includeNA = includeNA, test = test, testApprox = testApprox, argsApprox = argsApprox, @@ -242,9 +318,9 @@ CreateTableOneJS <- function(vars, strata = NULL, strata2 = NULL, data, factorVa testNormal = testNormal, argsNormal = argsNormal, testNonNormal = testNonNormal, argsNonNormal = argsNonNormal, smd = smd ) - + factor_vars <- res[["MetaData"]][["varFactors"]] - + if (Labels & !is.null(labeldata)) { for (i in seq_along(res$CatTable)) { for (j in factor_vars) { @@ -254,17 +330,17 @@ CreateTableOneJS <- function(vars, strata = NULL, strata2 = NULL, data, factorVa } # vals.tb1 <- c(NA, unlist(sapply(vars, function(v){labeldata[get("variable") == v, "val_label"]}))) } - + ptb1 <- print(res, - showAllLevels = showAllLevels, printToggle = printToggle, quote = quote, smd = smd, varLabels = Labels, nonnormal = nonnormal, - catDigits = catDigits, contDigits = contDigits, pDigits = pDigits, minMax = minMax + showAllLevels = showAllLevels, printToggle = printToggle, quote = quote, smd = smd, varLabels = Labels, nonnormal = nonnormal, + catDigits = catDigits, contDigits = contDigits, pDigits = pDigits, minMax = minMax ) - + if (showpm) { ptb1[grepl("\\(mean \\(SD\\)\\)", rownames(ptb1)), ] <- gsub("\\(", "\u00B1 ", ptb1[grepl("\\(mean \\(SD\\)\\)", rownames(ptb1)), ]) ptb1[grepl("\\(mean \\(SD\\)\\)", rownames(ptb1)), ] <- gsub("\\)", "", ptb1[grepl("\\(mean \\(SD\\)\\)", rownames(ptb1)), ]) } - + rownames(ptb1) <- gsub("(mean (SD))", "", rownames(ptb1), fixed = T) cap.tb1 <- "Total" # if (Labels & !is.null(labeldata)){ @@ -279,12 +355,11 @@ CreateTableOneJS <- function(vars, strata = NULL, strata2 = NULL, data, factorVa testNormal = testNormal, argsNormal = argsNormal, testNonNormal = testNonNormal, argsNonNormal = argsNonNormal, smd = smd, showAllLevels = showAllLevels, printToggle = printToggle, quote = quote, Labels = Labels, nonnormal = nonnormal, exact = exact, - catDigits = catDigits, contDigits = contDigits, pDigits = pDigits, labeldata = labeldata, minMax = minMax, showpm = showpm, addOverall = addOverall + catDigits = catDigits, contDigits = contDigits, pDigits = pDigits, labeldata = labeldata, minMax = minMax, showpm = showpm, addOverall = addOverall, pairwise = pairwise ) - - + cap.tb1 <- paste("Stratified by ", strata, sep = "") - + if (Labels & !is.null(labeldata)) { cap.tb1 <- paste("Stratified by ", labeldata[get("variable") == strata, "var_label"][1], sep = "") # ptb1[,1] = vals.tb1 @@ -294,15 +369,15 @@ CreateTableOneJS <- function(vars, strata = NULL, strata2 = NULL, data, factorVa # data.strata <- lapply(levels(data[[strata]]), function(x){data[data[[strata]] == x, ]}) data.strata <- split(data, data[[strata]]) ptb1.list <- lapply(data.strata, CreateTableOne2, - vars = vars, strata = strata2, factorVars = factorVars, includeNA = includeNA, test = test, - testApprox = testApprox, argsApprox = argsApprox, - testExact = testExact, argsExact = argsExact, - testNormal = testNormal, argsNormal = argsNormal, - testNonNormal = testNonNormal, argsNonNormal = argsNonNormal, smd = smd, - showAllLevels = showAllLevels, printToggle = printToggle, quote = quote, Labels = F, nonnormal = nonnormal, exact = exact, - catDigits = catDigits, contDigits = contDigits, pDigits = pDigits, minMax = minMax, showpm = T, addOverall = F + vars = vars, strata = strata2, factorVars = factorVars, includeNA = includeNA, test = test, + testApprox = testApprox, argsApprox = argsApprox, + testExact = testExact, argsExact = argsExact, + testNormal = testNormal, argsNormal = argsNormal, + testNonNormal = testNonNormal, argsNonNormal = argsNonNormal, smd = smd, + showAllLevels = showAllLevels, printToggle = printToggle, quote = quote, Labels = F, nonnormal = nonnormal, exact = exact, + catDigits = catDigits, contDigits = contDigits, pDigits = pDigits, minMax = minMax, showpm = T, addOverall = F ) - + if (showAllLevels == T) { ptb1.cbind <- Reduce(cbind, c(list(ptb1.list[[1]]), lapply(2:length(ptb1.list), function(x) { ptb1.list[[x]][, -1] @@ -310,8 +385,8 @@ CreateTableOneJS <- function(vars, strata = NULL, strata2 = NULL, data, factorVa } else { ptb1.cbind <- Reduce(cbind, ptb1.list) } - - + + # colnum.test = which(colnames(ptb1.cbind) == "test") # ptb1.2group = ptb1.cbind[, c(setdiff(1:ncol(ptb1.cbind), colnum.test), colnum.test[1])] cap.tb1 <- paste("Stratified by ", strata, "(", paste(levels(data[[strata]]), collapse = ", "), ") & ", strata2, sep = "") @@ -321,7 +396,7 @@ CreateTableOneJS <- function(vars, strata = NULL, strata2 = NULL, data, factorVa }, simplify = F) # vals.tb1 <- c(NA, unlist(sapply(vars, function(v){labeldata[get("variable") == v, "val_label"]}))) data.table::setkey(labeldata, variable, level) - + res <- tableone::CreateTableOne(vars = vars, data = data.strata[[1]], factorVars = factorVars, includeNA = includeNA) factor_vars <- res[["MetaData"]][["varFactors"]] for (i in seq_along(res$CatTable)) { @@ -330,20 +405,20 @@ CreateTableOneJS <- function(vars, strata = NULL, strata2 = NULL, data, factorVa res$CatTable[[i]][[j]]$level <- labeldata[.(j, lvs), val_label] } } - + ptb1.res <- print(res, - showAllLevels = showAllLevels, printToggle = printToggle, quote = quote, varLabels = Labels, nonnormal = nonnormal, - catDigits = catDigits, contDigits = contDigits, minMax = minMax + showAllLevels = showAllLevels, printToggle = printToggle, quote = quote, varLabels = Labels, nonnormal = nonnormal, + catDigits = catDigits, contDigits = contDigits, minMax = minMax ) ptb1.rn <- rownames(ptb1.res) rownames(ptb1.cbind) <- gsub("(mean (SD))", "", ptb1.rn, fixed = T) if (showAllLevels == T) { ptb1.cbind[, 1] <- ptb1.res[, 1] } - + cap.tb1 <- paste("Stratified by ", labeldata[get("variable") == strata, "var_label"][1], "(", paste(unlist(labeldata[get("variable") == strata, "val_label"]), collapse = ", "), ") & ", labeldata[get("variable") == strata2, "var_label"][1], sep = "") } - + return(list(table = ptb1.cbind, caption = cap.tb1)) } else { res <- tableone::CreateTableOne( @@ -353,20 +428,20 @@ CreateTableOneJS <- function(vars, strata = NULL, strata2 = NULL, data, factorVa testNormal = oneway.test, argsNormal = list(var.equal = F), testNonNormal = kruskal.test, argsNonNormal = list(NULL), addOverall = addOverall ) - + factor_vars <- res[["MetaData"]][["varFactors"]] # factor_vars <- vars[sapply(vars, function(x){class(data[[x]]) %in% c("factor", "character")})] var.strata <- paste(data[[strata2]], data[[strata]], sep = "_") - + vars.fisher <- sapply(factor_vars, function(x) { is(tryCatch(chisq.test(table(var.strata, data[[x]])), error = function(e) e, warning = function(w) w), "warning") }) vars.fisher <- factor_vars[unlist(vars.fisher)] - + if (is.null(exact) & length(vars.fisher) > 0) { exact <- vars.fisher } - + if (Labels & !is.null(labeldata)) { labelled::var_label(data) <- sapply(names(data), function(v) { as.character(labeldata[get("variable") == v, "var_label"][1]) @@ -379,28 +454,28 @@ CreateTableOneJS <- function(vars, strata = NULL, strata2 = NULL, data, factorVa res0$CatTable[[i]][[j]]$level <- labeldata[.(j, lvs), val_label] } } - + ptb1.res0 <- print(res0, - showAllLevels = showAllLevels, printToggle = printToggle, quote = quote, varLabels = Labels, nonnormal = nonnormal, - catDigits = catDigits, contDigits = contDigits, minMax = minMax + showAllLevels = showAllLevels, printToggle = printToggle, quote = quote, varLabels = Labels, nonnormal = nonnormal, + catDigits = catDigits, contDigits = contDigits, minMax = minMax ) ptb1.rn <- rownames(ptb1.res0) ptb1.rn <- gsub("(mean (SD))", "", ptb1.rn, fixed = T) - + # vals.tb1 <- c(NA, unlist(sapply(vars, function(v){labeldata[get("variable") == v, "val_label"]}))) } - + ptb1 <- print(res, - showAllLevels = showAllLevels, - printToggle = F, quote = F, smd = smd, varLabels = Labels, exact = exact, nonnormal = nonnormal, - catDigits = catDigits, contDigits = contDigits, pDigits = pDigits, minMax = minMax + showAllLevels = showAllLevels, + printToggle = F, quote = F, smd = smd, varLabels = Labels, exact = exact, nonnormal = nonnormal, + catDigits = catDigits, contDigits = contDigits, pDigits = pDigits, minMax = minMax ) - + if (showpm) { ptb1[grepl("\\(mean \\(SD\\)\\)", rownames(ptb1)), ] <- gsub("\\(", "\u00B1 ", ptb1[grepl("\\(mean \\(SD\\)\\)", rownames(ptb1)), ]) ptb1[grepl("\\(mean \\(SD\\)\\)", rownames(ptb1)), ] <- gsub("\\)", "", ptb1[grepl("\\(mean \\(SD\\)\\)", rownames(ptb1)), ]) } - + rownames(ptb1) <- gsub("(mean (SD))", "", rownames(ptb1), fixed = T) if (Labels & !is.null(labeldata)) { rownames(ptb1) <- ptb1.rn @@ -408,14 +483,14 @@ CreateTableOneJS <- function(vars, strata = NULL, strata2 = NULL, data, factorVa ptb1[, 1] <- ptb1.res0[, 1] } } - + sig <- ifelse(ptb1[, "p"] == "<0.001", "0", ptb1[, "p"]) sig <- as.numeric(as.vector(sig)) sig <- ifelse(sig <= 0.05, "**", "") ptb1 <- cbind(ptb1, sig) cap.tb1 <- paste("Table 1: Stratified by ", strata, " and ", strata2, sep = "") - - + + # Column name if (Labels & !is.null(labeldata)) { val_combination <- data.table::CJ(labeldata[variable == strata, val_label], labeldata[variable == strata2, val_label], sorted = F) @@ -434,7 +509,7 @@ CreateTableOneJS <- function(vars, strata = NULL, strata2 = NULL, data, factorVa colnames(ptb1)[1:length(colname.group_var)] <- colname.group_var } } - + # caption cap.tb1 <- paste("Stratified by ", labeldata[variable == strata, var_label][1], " and ", labeldata[variable == strata2, var_label][1], sep = "") # val_label diff --git a/R/forestcox.R b/R/forestcox.R index 0314852..5edbedf 100644 --- a/R/forestcox.R +++ b/R/forestcox.R @@ -52,450 +52,632 @@ #' @importFrom stats confint coefficients #' @importFrom utils tail - -TableSubgroupCox <- function(formula, var_subgroup = NULL, var_cov = NULL, data, time_eventrate = 3 * 365, decimal.hr = 2, decimal.percent = 1, decimal.pvalue = 3, cluster = NULL, strata = NULL, weights = NULL) { - . <- NULL - - ### var_subgroup이 categorical variable이 아닌 경우 중단 ### - if (any(class(data) == "survey.design" & !is.null(var_subgroup))) { - if (is.numeric(data$variables[[var_subgroup]])) stop("var_subgroup must categorical.") - # if (length(levels(data$variables[[as.character(formula[[3]])]])) != 2) stop("Independent variable must have 2 levels.") - } else if (any(class(data) == "data.frame" & !is.null(var_subgroup))) { - if (is.numeric(data[[var_subgroup]])) stop("var_subgroup must categorical.") - # if (length(levels(data[[as.character(formula[[3]])]])) != 2) stop("Independent variable must have 2 levels.") +count_event_by <- function(formula,data, count_by_var = NULL, var_subgroup = NULL, decimal.percent = 1) { + if (inherits(data, "survey.design")) { + data<- data$variables + } else { + data <- data } - - ## functions with error - possible_table <- purrr::possibly(table, NA) - possible_prop.table <- purrr::possibly(function(x) { - prop.table(x, 1)[2, ] * 100 - }, NA) - possible_pv <- purrr::possibly(function(x) { - summary(x)[["coefficients"]][1, ] %>% tail(1) - }, NA) - possible_coxph <- purrr::possibly(survival::coxph, NA) - possible_svycoxph <- purrr::possibly(survey::svycoxph, NA) - possible_confint <- purrr::possibly(stats::confint, NA) - possible_modely <- purrr::possibly(function(x) { - purrr::map_dbl(x, .[["y"]], 1) - }, NA) - possible_rowone <- purrr::possibly(function(x) { - x[1, ] - }, NA) - - formula.km <- formula - var_cov <- setdiff(var_cov, c(as.character(formula[[3]]), var_subgroup)) - xlabel <- setdiff(as.character(formula)[[3]], "+")[1] - - ncoef <- ifelse(any(class(data) == "survey.design"), ifelse(length(levels(data$variables[[xlabel]])) <= 2, 1, length(levels(data$variables[[xlabel]])) - 1), - ifelse(length(levels(data[[xlabel]])) <= 2 || is.numeric(data[[xlabel]]), 1, length(levels(data[[xlabel]])) - 1) + + event_col <- as.character(formula[[2]][[3]]) + total_count <- nrow(data) + total_event_count <- sum(data[[event_col]] == 1, na.rm = TRUE) + total_event_rate <- paste0(total_event_count, "/", total_count, " (", round(total_event_count / total_count * 100, decimal.percent), "%)") + + if (!is.null(count_by_var) && !is.null(var_subgroup)) { + # count_by_var와 var_subgroup이 모두 있을 때 + counts <- data %>% + tidyr::drop_na(!!sym(var_subgroup)) %>% + dplyr::group_by(!!sym(count_by_var), !!sym(var_subgroup)) %>% + dplyr::summarize(Count = n(), Event_Count = sum(!!sym(event_col) == 1, na.rm = TRUE), .groups = 'drop') %>% + dplyr::mutate(Event_Rate = paste0(Event_Count, "/", Count, " (", round(Event_Count / Count * 100, decimal.percent), "%)")) + + overall_counts <- data %>% + dplyr::group_by(!!sym(count_by_var)) %>% + dplyr::summarize(Count = n(), Event_Count = sum(!!sym(event_col) == 1, na.rm = TRUE), .groups = 'drop') %>% + dplyr::mutate(Event_Rate = paste0(Event_Count, "/", Count, " (", round(Event_Count / Count * 100, decimal.percent), "%)"), + !!sym(var_subgroup) := "Overall") + + counts <- counts %>% + dplyr::bind_rows(overall_counts) %>% + dplyr::arrange(!!sym(count_by_var), !!sym(var_subgroup)) + + } else if (is.null(count_by_var) && !is.null(var_subgroup)) { + # var_subgroup만 있을 때 + counts <- data %>% + tidyr::drop_na(!!sym(var_subgroup)) %>% + dplyr::group_by(!!sym(var_subgroup)) %>% + dplyr::summarize(Count = n(), Event_Count = sum(!!sym(event_col) == 1, na.rm = TRUE), .groups = 'drop') %>% + dplyr::mutate(Event_Rate = paste0(Event_Count, "/", Count, " (", round(Event_Count / Count * 100, decimal.percent), "%)")) + + } else if (!is.null(count_by_var) && is.null(var_subgroup)) { + # count_by_var만 있을 때 + counts <- data %>% + dplyr::group_by(!!sym(count_by_var)) %>% + dplyr::summarize(Count = n(), Event_Count = sum(!!sym(event_col) == 1, na.rm = TRUE), .groups = 'drop') %>% + dplyr::mutate(Event_Rate = paste0(Event_Count, "/", Count, " (", round(Event_Count / Count * 100, decimal.percent), "%)")) + } else { + # count_by_var와 var_subgroup이 NULL일 때는 전체 데이터만 Total로 계산 + counts <- tibble( + Total = "Total", + Count = total_count, + Event_Count = total_event_count, + Event_Rate = total_event_rate + ) + return(counts) + } + # Total 행을 추가 + total_row <- tibble( + Count = total_count, + Event_Count = total_event_count, + Event_Rate = total_event_rate ) + + counts <- bind_rows(counts, total_row) + + return(counts) +} - if (is.null(var_subgroup)) { - ### subgroup 지정 안 한 경우 ### - # 공변량 있는 경우 formula 변경 - if (!is.null(var_cov)) { - formula <- as.formula(paste0(deparse(formula), " + ", paste(var_cov, collapse = "+"))) - } - - # Strata !is.null인 경우 formula 변경 - if (!is.null(strata)) { - formula <- as.formula(paste0(deparse(formula), " + ", paste0("strata(", strata, ")"))) +TableSubgroupCox <- function(formula, var_subgroup = NULL, var_cov = NULL, data, time_eventrate = 3 * 365, decimal.hr = 2, decimal.percent = 1, decimal.pvalue = 3, cluster = NULL, strata = NULL, weights = NULL, event = FALSE, count_by= NULL) { + . <- NULL + if(is.null(count_by)&&!(event)){ + ### var_subgroup이 categorical variable이 아닌 경우 중단 ### + if (any(class(data) == "survey.design" & !is.null(var_subgroup))) { + if (is.numeric(data$variables[[var_subgroup]])) stop("var_subgroup must categorical.") + # if (length(levels(data$variables[[as.character(formula[[3]])]])) != 2) stop("Independent variable must have 2 levels.") + } else if (any(class(data) == "data.frame" & !is.null(var_subgroup))) { + if (is.numeric(data[[var_subgroup]])) stop("var_subgroup must categorical.") + # if (length(levels(data[[as.character(formula[[3]])]])) != 2) stop("Independent variable must have 2 levels.") } - - if (any(class(data) == "survey.design")) { - ### survey data인 경우 ### - model <- survey::svycoxph(formula, design = data, x = T) - # if (!is.null(model$xlevels) & length(model$xlevels[[1]]) != 2) stop("Categorical independent variable must have 2 levels.") - - # KM 구하기(categorical인 경우) - if (is.numeric(data$variables[[xlabel]])) { - prop <- NULL - } else { - res.kap <- survey::svykm(formula.km, design = data) - prop <- round(100 * sapply(res.kap, function(x) { - 1 - x[["surv"]][which.min(abs(x[["time"]] - time_eventrate))] - }), decimal.percent) - names(prop) <- paste0(xlabel, "=", model$xlevels[[1]]) - } - } else { - ### survey data가 아닌 경우 ### - weights <- if (!is.null(weights)) { - data[[weights]] - } else { - NULL + + ## functions with error + possible_table <- purrr::possibly(table, NA) + possible_prop.table <- purrr::possibly(function(x) { + prop.table(x, 1)[2, ] * 100 + }, NA) + possible_pv <- purrr::possibly(function(x) { + summary(x)[["coefficients"]][1, ] %>% tail(1) + }, NA) + possible_coxph <- purrr::possibly(survival::coxph, NA) + possible_svycoxph <- purrr::possibly(survey::svycoxph, NA) + possible_confint <- purrr::possibly(stats::confint, NA) + possible_modely <- purrr::possibly(function(x) { + purrr::map_dbl(x, .[["y"]], 1) + }, NA) + possible_rowone <- purrr::possibly(function(x) { + x[1, ] + }, NA) + + formula.km <- formula + var_cov <- setdiff(var_cov, c(as.character(formula[[3]]), var_subgroup)) + xlabel <- setdiff(as.character(formula)[[3]], "+")[1] + + ncoef <- ifelse(any(class(data) == "survey.design"), ifelse(length(levels(data$variables[[xlabel]])) <= 2, 1, length(levels(data$variables[[xlabel]])) - 1), + ifelse(length(levels(data[[xlabel]])) <= 2 || is.numeric(data[[xlabel]]), 1, length(levels(data[[xlabel]])) - 1) + ) + + if (is.null(var_subgroup)) { + ### subgroup 지정 안 한 경우 ### + # 공변량 있는 경우 formula 변경 + if (!is.null(var_cov)) { + formula <- as.formula(paste0(deparse(formula), " + ", paste(var_cov, collapse = "+"))) } - if (!is.null(cluster)) { - formula.1 <- as.formula( - paste0(deparse(formula), " + ", "cluster(", cluster, ")") - ) - cc <- substitute( - survival::coxph(formula.1, data = data, x = T, weights = .weights), - list(.weights = weights) - ) - model <- eval(cc) - } else { - cc <- substitute( - survival::coxph(formula, data = data, x = T, weights = .weights), - list(.weights = weights) - ) - model <- eval(cc) + + # Strata !is.null인 경우 formula 변경 + if (!is.null(strata)) { + formula <- as.formula(paste0(deparse(formula), " + ", paste0("strata(", strata, ")"))) } - # if (!is.null(model$xlevels) & length(model$xlevels[[1]]) != 2) stop("Categorical independent variable must have 2 levels.") - - # KM 구하기(categorical인 경우) - if (is.numeric(data[[xlabel]])) { - prop <- NULL + + if (any(class(data) == "survey.design")) { + ### survey data인 경우 ### + model <- survey::svycoxph(formula, design = data, x = T) + # if (!is.null(model$xlevels) & length(model$xlevels[[1]]) != 2) stop("Categorical independent variable must have 2 levels.") + + # KM 구하기(categorical인 경우) + if (is.numeric(data$variables[[xlabel]])) { + prop <- NULL + } else { + res.kap <- survey::svykm(formula.km, design = data) + prop <- round(100 * sapply(res.kap, function(x) { + 1 - x[["surv"]][which.min(abs(x[["time"]] - time_eventrate))] + }), decimal.percent) + names(prop) <- paste0(xlabel, "=", model$xlevels[[1]]) + } } else { - res.kap <- survival::survfit(formula.km, data = data) - res.kap.times <- summary(res.kap, times = time_eventrate, extend = T) - prop <- round(100 * (1 - res.kap.times[["surv"]]), decimal.percent) - names(prop) <- paste0(xlabel, "=", model$xlevels[[1]]) + ### survey data가 아닌 경우 ### + weights <- if (!is.null(weights)) { + data[[weights]] + } else { + NULL + } + if (!is.null(cluster)) { + formula.1 <- as.formula( + paste0(deparse(formula), " + ", "cluster(", cluster, ")") + ) + cc <- substitute( + survival::coxph(formula.1, data = data, x = T, weights = .weights), + list(.weights = weights) + ) + model <- eval(cc) + } else { + cc <- substitute( + survival::coxph(formula, data = data, x = T, weights = .weights), + list(.weights = weights) + ) + model <- eval(cc) + } + # if (!is.null(model$xlevels) & length(model$xlevels[[1]]) != 2) stop("Categorical independent variable must have 2 levels.") + + # KM 구하기(categorical인 경우) + if (is.numeric(data[[xlabel]])) { + prop <- NULL + } else { + res.kap <- survival::survfit(formula.km, data = data) + res.kap.times <- summary(res.kap, times = time_eventrate, extend = T) + prop <- round(100 * (1 - res.kap.times[["surv"]]), decimal.percent) + names(prop) <- paste0(xlabel, "=", model$xlevels[[1]]) + } + # out.kap <- paste(res.kap.times[["n.event"]], " (", round(100 * (1 - res.kap.times[["surv"]]), decimal.percent), ")", sep = "") } - # out.kap <- paste(res.kap.times[["n.event"]], " (", round(100 * (1 - res.kap.times[["surv"]]), decimal.percent), ")", sep = "") - } - - # PE, CI, PV 구하기 - Point.Estimate <- round(exp(coef(model)), decimal.hr)[1:ncoef] - - # if (length(Point.Estimate) > 1){ - # stop("Formula must contain 1 independent variable only.") - # } - - CI <- round(exp(confint(model)[1:ncoef, ]), decimal.hr) - event <- purrr::map_dbl(model$y, 1) %>% tail(model$n) - # prop <- round(prop.table(table(event, model$x[, 1]), 2)[2, ] * 100, decimal.percent) - pv <- round(summary(model)$coefficients[1:ncoef, "Pr(>|z|)"], decimal.pvalue) - - # output 만들기 - if (ncoef < 2) { - out <- data.frame(Variable = "Overall", Count = model$n, Percent = 100, `Point Estimate` = Point.Estimate, Lower = CI[1], Upper = CI[2], check.names = F) %>% - mutate(`P value` = ifelse(pv >= 0.001, pv, "<0.001"), `P for interaction` = NA) - - if (!is.null(names(prop))) { + + # PE, CI, PV 구하기 + Point.Estimate <- round(exp(coef(model)), decimal.hr)[1:ncoef] + + # if (length(Point.Estimate) > 1){ + # stop("Formula must contain 1 independent variable only.") + # } + + CI <- round(exp(confint(model)[1:ncoef, ]), decimal.hr) + event <- purrr::map_dbl(model$y, 1) %>% tail(model$n) + # prop <- round(prop.table(table(event, model$x[, 1]), 2)[2, ] * 100, decimal.percent) + pv <- round(summary(model)$coefficients[1:ncoef, "Pr(>|z|)"], decimal.pvalue) + + # output 만들기 + if (ncoef < 2) { out <- data.frame(Variable = "Overall", Count = model$n, Percent = 100, `Point Estimate` = Point.Estimate, Lower = CI[1], Upper = CI[2], check.names = F) %>% - cbind(t(prop)) %>% mutate(`P value` = ifelse(pv >= 0.001, pv, "<0.001"), `P for interaction` = NA) - } - } else { - out <- data.frame( - Variable = c("Overall", rep("", length(Point.Estimate))), Count = c(model$n, rep("", length(Point.Estimate))), Percent = c(100, rep("", length(Point.Estimate))), - Levels = paste0(xlabel, "=", model$xlevels[[1]]), `Point Estimate` = c("Reference", Point.Estimate), Lower = c("", CI[, 1]), Upper = c("", CI[, 2]), check.names = F - ) %>% - mutate(`P value` = c("", ifelse(pv >= 0.001, pv, "<0.001")), `P for interaction` = NA) - - if (!is.null(names(prop))) { + + if (!is.null(names(prop))) { + out <- data.frame(Variable = "Overall", Count = model$n, Percent = 100, `Point Estimate` = Point.Estimate, Lower = CI[1], Upper = CI[2], check.names = F) %>% + cbind(t(prop)) %>% + mutate(`P value` = ifelse(pv >= 0.001, pv, "<0.001"), `P for interaction` = NA) + } + } else { out <- data.frame( Variable = c("Overall", rep("", length(Point.Estimate))), Count = c(model$n, rep("", length(Point.Estimate))), Percent = c(100, rep("", length(Point.Estimate))), - Levels = paste0(xlabel, "=", model$xlevels[[1]]), `Point Estimate` = c("Reference", Point.Estimate), Lower = c("", CI[, 1]), Upper = c("", CI[, 2]), KM = prop, check.names = F + Levels = paste0(xlabel, "=", model$xlevels[[1]]), `Point Estimate` = c("Reference", Point.Estimate), Lower = c("", CI[, 1]), Upper = c("", CI[, 2]), check.names = F ) %>% mutate(`P value` = c("", ifelse(pv >= 0.001, pv, "<0.001")), `P for interaction` = NA) - } - - rownames(out) <- NULL - } - - return(out) - } else if (length(var_subgroup) > 1 | any(grepl(var_subgroup, formula))) { - stop("Please input correct subgroup variable.") - } else { - ### subgroup 지정 한 경우 ### - # 공변량 있는 경우 formula 변경 - if (!is.null(var_cov)) { - formula <- as.formula(paste0(deparse(formula), " + ", paste(var_cov, collapse = "+"))) - } - - # Strata !is.null인 경우 formula 변경 - if (!is.null(strata)) { - formula <- as.formula(paste0(deparse(formula), " + ", paste0("strata(", strata, ")"))) - } - - if (any(class(data) == "survey.design")) { - ### survey data인 경우 ### - data$variables[[var_subgroup]] <- factor(data$variables[[var_subgroup]]) - data$variables[[var_subgroup]] %>% - table() %>% - names() -> label_val - label_val %>% purrr::map(~ possible_svycoxph(formula, design = subset(data, get(var_subgroup) == .), x = TRUE)) -> model - xlev <- survey::svycoxph(formula, design = data)$xlevels - - # pv_int 구하기 - pv_int <- tryCatch( - { - pvs_int <- possible_svycoxph(as.formula(gsub(xlabel, paste0(xlabel, "*", var_subgroup), deparse(formula))), design = data) %>% - summary() %>% - coefficients() - pv_int <- round(pvs_int[nrow(pvs_int), ncol(pvs_int)], decimal.pvalue) - pv_int - }, - error = function(e) { - return(NA) + + if (!is.null(names(prop))) { + out <- data.frame( + Variable = c("Overall", rep("", length(Point.Estimate))), Count = c(model$n, rep("", length(Point.Estimate))), Percent = c(100, rep("", length(Point.Estimate))), + Levels = paste0(xlabel, "=", model$xlevels[[1]]), `Point Estimate` = c("Reference", Point.Estimate), Lower = c("", CI[, 1]), Upper = c("", CI[, 2]), KM = prop, check.names = F + ) %>% + mutate(`P value` = c("", ifelse(pv >= 0.001, pv, "<0.001")), `P for interaction` = NA) } - ) - - ## interaction 여러개인 경우 pv_int 구하기 - model.int <- possible_svycoxph(as.formula(gsub(xlabel, paste0(xlabel, "*", var_subgroup), deparse(formula))), design = data) - - if (any(is.na(model.int))) { - } else if (sum(grepl(":", names(coef(model.int)))) > 1) { - model.int$call$formula <- as.formula(gsub(xlabel, paste0(xlabel, "*", var_subgroup), deparse(formula))) - pv_anova <- survey::regTermTest(model.int, as.formula(paste0("~", xlabel, ":", var_subgroup))) - pv_int <- round(pv_anova$p[1], decimal.pvalue) + + rownames(out) <- NULL } - - # KM 구하기(categorical인 경우만) - if (!is.numeric(data$variables[[xlabel]])) { - prop <- NULL - try( + + return(out) + } else if (length(var_subgroup) > 1 | any(grepl(var_subgroup, formula))) { + stop("Please input correct subgroup variable.") + } else { + ### subgroup 지정 한 경우 ### + # 공변량 있는 경우 formula 변경 + if (!is.null(var_cov)) { + formula <- as.formula(paste0(deparse(formula), " + ", paste(var_cov, collapse = "+"))) + } + + # Strata !is.null인 경우 formula 변경 + if (!is.null(strata)) { + formula <- as.formula(paste0(deparse(formula), " + ", paste0("strata(", strata, ")"))) + } + + if (any(class(data) == "survey.design")) { + ### survey data인 경우 ### + data$variables[[var_subgroup]] <- factor(data$variables[[var_subgroup]]) + data$variables[[var_subgroup]] %>% + table() %>% + names() -> label_val + label_val %>% purrr::map(~ possible_svycoxph(formula, design = subset(data, get(var_subgroup) == .), x = TRUE)) -> model + xlev <- survey::svycoxph(formula, design = data)$xlevels + + # pv_int 구하기 + pv_int <- tryCatch( { - res.kap <- purrr::map(label_val, ~ survey::svykm(formula.km, design = subset(data, get(var_subgroup) == .))) - mkz <- function(reskap) { - round(100 * sapply(reskap, function(x) { - 1 - x[["surv"]][which.min(abs(x[["time"]] - time_eventrate))] - }), decimal.percent) - } - prop <- purrr::map(res.kap, mkz) %>% - dplyr::bind_cols() %>% - t() - # prop <- purrr::map(res.kap, ~round(100 * sapply(., function(x){1 - x[["surv"]][which.min(abs(x[["time"]] - time_eventrate))]}), decimal.percent)) - colnames(prop) <- paste0(xlabel, "=", xlev[[1]]) + pvs_int <- possible_svycoxph(as.formula(gsub(xlabel, paste0(xlabel, "*", var_subgroup), deparse(formula))), design = data) %>% + summary() %>% + coefficients() + pv_int <- round(pvs_int[nrow(pvs_int), ncol(pvs_int)], decimal.pvalue) + pv_int }, - silent = TRUE + error = function(e) { + return(NA) + } ) - } else { - prop <- NULL - } - } else { - ### survey data가 아닌 경우 ### - weights_option <- if (!is.null(weights)) TRUE else FALSE - data[[var_subgroup]] <- factor(data[[var_subgroup]]) - # Coxph 함수를 각 subgroup에 대해 적용시키기 위한 함수 - run_coxph <- function(subgroup_var, subgroup_value, data, formula, weights_option) { - subset_data <- data[data[[subgroup_var]] == subgroup_value, ] - - if (nrow(subset_data) == 0) { - return(NULL) + + ## interaction 여러개인 경우 pv_int 구하기 + model.int <- possible_svycoxph(as.formula(gsub(xlabel, paste0(xlabel, "*", var_subgroup), deparse(formula))), design = data) + + if (any(is.na(model.int))) { + } else if (sum(grepl(":", names(coef(model.int)))) > 1) { + model.int$call$formula <- as.formula(gsub(xlabel, paste0(xlabel, "*", var_subgroup), deparse(formula))) + pv_anova <- survey::regTermTest(model.int, as.formula(paste0("~", xlabel, ":", var_subgroup))) + pv_int <- round(pv_anova$p[1], decimal.pvalue) } - - subset_weights <- if (weights_option) { - as.numeric(as.character(subset_data[[weights]])) + + # KM 구하기(categorical인 경우만) + if (!is.numeric(data$variables[[xlabel]])) { + prop <- NULL + try( + { + res.kap <- purrr::map(label_val, ~ survey::svykm(formula.km, design = subset(data, get(var_subgroup) == .))) + mkz <- function(reskap) { + round(100 * sapply(reskap, function(x) { + 1 - x[["surv"]][which.min(abs(x[["time"]] - time_eventrate))] + }), decimal.percent) + } + prop <- purrr::map(res.kap, mkz) %>% + dplyr::bind_cols() %>% + t() + # prop <- purrr::map(res.kap, ~round(100 * sapply(., function(x){1 - x[["surv"]][which.min(abs(x[["time"]] - time_eventrate))]}), decimal.percent)) + colnames(prop) <- paste0(xlabel, "=", xlev[[1]]) + }, + silent = TRUE + ) } else { - NULL + prop <- NULL } - cc <- substitute( - survival::coxph(formula, data = subset_data, x = T, weights = .weights), - list(.weights = subset_weights) - ) - eval(cc) - } - - if (is.null(cluster)) { - model <- sapply(var_subgroup, function(var) { - if (is.factor(data[[var]])) { - unique_vals <- levels(data[[var]]) - } else { - unique_vals <- sort(setdiff(unique(data[[var]]), NA)) - } - lapply(unique_vals, function(value) { - result <- run_coxph(var, value, data, formula, weights_option) - }) - }) } else { - formula <- as.formula(paste0(deparse(formula), " + ", "cluster(", cluster, ")")) - - model <- sapply(var_subgroup, function(var) { - if (is.factor(data[[var]])) { - unique_vals <- levels(data[[var]]) + ### survey data가 아닌 경우 ### + weights_option <- if (!is.null(weights)) TRUE else FALSE + data[[var_subgroup]] <- factor(data[[var_subgroup]]) + # Coxph 함수를 각 subgroup에 대해 적용시키기 위한 함수 + run_coxph <- function(subgroup_var, subgroup_value, data, formula, weights_option) { + subset_data <- data[data[[subgroup_var]] == subgroup_value, ] + + if (nrow(subset_data) == 0) { + return(NULL) + } + + subset_weights <- if (weights_option) { + as.numeric(as.character(subset_data[[weights]])) } else { - unique_vals <- sort(setdiff(unique(data[[var]]), NA)) + NULL } - lapply(unique_vals, function(value) { - result <- run_coxph(var, value, data, formula, weights_option) + cc <- substitute( + survival::coxph(formula, data = subset_data, x = T, weights = .weights), + list(.weights = subset_weights) + ) + eval(cc) + } + + if (is.null(cluster)) { + model <- sapply(var_subgroup, function(var) { + if (is.factor(data[[var]])) { + unique_vals <- levels(data[[var]]) + } else { + unique_vals <- sort(setdiff(unique(data[[var]]), NA)) + } + lapply(unique_vals, function(value) { + result <- run_coxph(var, value, data, formula, weights_option) + }) }) - }) - } - weights <- if (!is.null(weights)) { - data[[weights]] - } else { - NULL - } - - data %>% - filter(!is.na(get(var_subgroup))) %>% - select(dplyr::all_of(var_subgroup)) %>% - table() %>% - names() -> label_val - xlev <- survival::coxph(formula, data = data)$xlevels - - - # strata만 공식에 추가하는 경우 P for interaction에서 가 나타나는 문제가 있어 수정 - if (is.null(cluster) & is.null(weights) & !is.null(strata)) { - model.int <- possible_coxph(as.formula(gsub(xlabel, paste0(xlabel, "*", var_subgroup), deparse(formula))), data = data) - } else { - model.int <- tryCatch(eval(substitute(coxph(as.formula(gsub(xlabel, paste0(xlabel, "*", var_subgroup), deparse(formula))), data = data, weights = .weights), list(.weights = weights))), error = function(e) NA) - # if (!is.null(cluster)) { - # model.int <- eval(substitute(possible_coxph(as.formula(gsub(xlabel, paste0(xlabel, "*", var_subgroup), deparse(formula))), data = data, weights = .weights), list(.weights = weights))) - # } else { - # model.int <- tryCatch(eval(substitute(coxph(as.formula(gsub(xlabel, paste0(xlabel, "*", var_subgroup), deparse(formula))), data = data, weights = .weights), list(.weights = weights))), error = function(e) NA) - # } - } - - - # KM 구하기(categorical인 경우만) - if (!is.numeric(data[[xlabel]])) { - prop <- NULL - try({ - res.kap.times <- data %>% - filter(!is.na(get(var_subgroup))) %>% - split(.[[var_subgroup]]) %>% - purrr::map(~ survival::survfit(formula.km, data = .)) %>% - purrr::map(~ summary(., times = time_eventrate, extend = T)) - - prop <- matrix(nrow = length(res.kap.times), ncol = length(xlev[[1]])) - colnames(prop) <- paste0(xlabel, "=", xlev[[1]]) - rownames(prop) <- names(res.kap.times) - - sub_xlev <- data %>% - filter(!is.na(get(var_subgroup))) %>% - split(.[[var_subgroup]]) %>% - lapply(function(x) { - sort(setdiff(unique(x[[xlabel]]), NA)) + } else { + formula <- as.formula(paste0(deparse(formula), " + ", "cluster(", cluster, ")")) + + model <- sapply(var_subgroup, function(var) { + if (is.factor(data[[var]])) { + unique_vals <- levels(data[[var]]) + } else { + unique_vals <- sort(setdiff(unique(data[[var]]), NA)) + } + lapply(unique_vals, function(value) { + result <- run_coxph(var, value, data, formula, weights_option) }) - - for (i in rownames(prop)) { - if (length(sub_xlev[[i]]) == 1) { - # prop[i, paste0(xlabel, "=", sub_xlev[[i]])] <- res.kap.times[["0"]][["surv"]] - prop[i, ] <- res.kap.times[["0"]][["surv"]] - } else if (length(sub_xlev[[i]]) > 1) { - surv.df <- data.frame(res.kap.times[[i]][c("strata", "surv")]) - for (j in colnames(prop)) { - tryCatch(prop[i, j] <- surv.df[surv.df$strata == j, "surv"], - error = function(e) { - prop[i, j] <- NA - } - ) + }) + } + weights <- if (!is.null(weights)) { + data[[weights]] + } else { + NULL + } + + data %>% + filter(!is.na(get(var_subgroup))) %>% + select(dplyr::all_of(var_subgroup)) %>% + table() %>% + names() -> label_val + xlev <- survival::coxph(formula, data = data)$xlevels + + + # strata만 공식에 추가하는 경우 P for interaction에서 가 나타나는 문제가 있어 수정 + if (is.null(cluster) & is.null(weights) & !is.null(strata)) { + model.int <- possible_coxph(as.formula(gsub(xlabel, paste0(xlabel, "*", var_subgroup), deparse(formula))), data = data) + } else { + model.int <- tryCatch(eval(substitute(coxph(as.formula(gsub(xlabel, paste0(xlabel, "*", var_subgroup), deparse(formula))), data = data, weights = .weights), list(.weights = weights))), error = function(e) NA) + # if (!is.null(cluster)) { + # model.int <- eval(substitute(possible_coxph(as.formula(gsub(xlabel, paste0(xlabel, "*", var_subgroup), deparse(formula))), data = data, weights = .weights), list(.weights = weights))) + # } else { + # model.int <- tryCatch(eval(substitute(coxph(as.formula(gsub(xlabel, paste0(xlabel, "*", var_subgroup), deparse(formula))), data = data, weights = .weights), list(.weights = weights))), error = function(e) NA) + # } + } + + + # KM 구하기(categorical인 경우만) + if (!is.numeric(data[[xlabel]])) { + prop <- NULL + try({ + res.kap.times <- data %>% + filter(!is.na(get(var_subgroup))) %>% + split(.[[var_subgroup]]) %>% + purrr::map(~ survival::survfit(formula.km, data = .)) %>% + purrr::map(~ summary(., times = time_eventrate, extend = T)) + + prop <- matrix(nrow = length(res.kap.times), ncol = length(xlev[[1]])) + colnames(prop) <- paste0(xlabel, "=", xlev[[1]]) + rownames(prop) <- names(res.kap.times) + + sub_xlev <- data %>% + filter(!is.na(get(var_subgroup))) %>% + split(.[[var_subgroup]]) %>% + lapply(function(x) { + sort(setdiff(unique(x[[xlabel]]), NA)) + }) + + for (i in rownames(prop)) { + if (length(sub_xlev[[i]]) == 1) { + # prop[i, paste0(xlabel, "=", sub_xlev[[i]])] <- res.kap.times[["0"]][["surv"]] + prop[i, ] <- res.kap.times[["0"]][["surv"]] + } else if (length(sub_xlev[[i]]) > 1) { + surv.df <- data.frame(res.kap.times[[i]][c("strata", "surv")]) + for (j in colnames(prop)) { + tryCatch(prop[i, j] <- surv.df[surv.df$strata == j, "surv"], + error = function(e) { + prop[i, j] <- NA + } + ) + } } } + + prop <- round(100 * (1 - prop), decimal.percent) + }, silent = ) + } else { + prop <- NULL + } + + # pv_int 구하기 + if (any(is.na(model.int))) { + pv_int <- NA + } else if (sum(grepl(":", names(coef(model.int)))) == 1) { + pvs_int <- model.int %>% + summary() %>% + coefficients() + pv_int <- round(pvs_int[nrow(pvs_int), ncol(pvs_int)], decimal.pvalue) + # if (!is.null(xlev) & length(xlev[[1]]) != 2) stop("Categorical independent variable must have 2 levels.") + } else { + model.int$call$formula <- as.formula(gsub(xlabel, paste0(xlabel, "*", var_subgroup), deparse(formula))) + model.int$call$data <- as.name("data") + pv_anova <- tryCatch(anova(model.int), error = function(e) NA) + if (is.logical(pv_anova) & !is.null(cluster)) { + warning("Warning: Anova test is not available for cluster data. So Interaction P value is NA when 3 and more categorical subgroup variable.") } - - prop <- round(100 * (1 - prop), decimal.percent) - }, silent = ) + pv_int <- tryCatch(round(pv_anova[nrow(pv_anova), 4], decimal.pvalue), error = function(e) NA) + } + } + + # Count, PE, CI, PV 계산 + model %>% purrr::map_dbl("n", .default = NA) -> Count + + if (ncoef < 2) { + model %>% + purrr::map("coefficients", .default = NA) %>% + lapply(function(x) { + round(exp(x[1:ncoef]), decimal.hr) + }) %>% + unlist() -> Point.Estimate + + model %>% + lapply(function(x) { + tryCatch( + { + round(exp(stats::confint(x)[1, ]), decimal.hr) + }, + error = function(e) { + return(matrix(nrow = 1, ncol = 2, dimnames = list(paste0(xlabel, xlev[[1]][-1]), c("2.5 %", "97.5 %")))) + } + ) + }) %>% + Reduce(rbind, .) -> CI + + model %>% + purrr::map(possible_pv) %>% + purrr::map_dbl(~ round(., decimal.pvalue)) -> pv } else { - prop <- NULL + model %>% + purrr::map("coefficients", .default = NA) %>% + lapply(function(x) { + round(exp(x[1:ncoef]), decimal.hr) + }) -> Point.Estimate + + model %>% + purrr::map(possible_confint) %>% + lapply(function(x) { + tryCatch( + { + round(exp(x[1:ncoef, ]), decimal.hr) + }, + error = function(e) { + return(matrix(nrow = length(xlev[[1]]) - 1, ncol = 2, dimnames = list(paste0(xlabel, xlev[[1]][-1]), c("2.5 %", "97.5 %")))) + } + ) + }) -> CI + + model %>% + lapply(function(x) { + tryCatch( + { + round(summary(x)$coefficients[1:ncoef, 5], decimal.pvalue) + }, + error = function(e) { + return(rep(NA, length(xlev[[1]]) - 1)) + } + ) + }) -> pv } - - # pv_int 구하기 - if (any(is.na(model.int))) { - pv_int <- NA - } else if (sum(grepl(":", names(coef(model.int)))) == 1) { - pvs_int <- model.int %>% - summary() %>% - coefficients() - pv_int <- round(pvs_int[nrow(pvs_int), ncol(pvs_int)], decimal.pvalue) - # if (!is.null(xlev) & length(xlev[[1]]) != 2) stop("Categorical independent variable must have 2 levels.") + + # output 만들기 + if (ncoef < 2) { + out <- data.frame(Variable = paste(" ", label_val), Count = Count, Percent = round(Count / sum(Count) * 100, decimal.percent), `Point Estimate` = Point.Estimate, Lower = CI[, 1], Upper = CI[, 2], check.names = F, row.names = NULL) %>% + mutate(`P value` = unlist(ifelse(pv >= 0.001, pv, "<0.001")), `P for interaction` = NA) + + if (!is.null(prop)) { + out <- data.frame(Variable = paste(" ", label_val), Count = Count, Percent = round(Count / sum(Count) * 100, decimal.percent), `Point Estimate` = Point.Estimate, Lower = CI[, 1], Upper = CI[, 2], check.names = F, row.names = NULL) %>% + cbind(prop) %>% + mutate(`P value` = unlist(ifelse(pv >= 0.001, pv, "<0.001")), `P for interaction` = NA) + } + + rownames(out) <- NULL + + return(rbind(c(var_subgroup, rep(NA, ncol(out) - 2), ifelse(pv_int >= 0.001, pv_int, "<0.001")), out)) } else { - model.int$call$formula <- as.formula(gsub(xlabel, paste0(xlabel, "*", var_subgroup), deparse(formula))) - model.int$call$data <- as.name("data") - pv_anova <- tryCatch(anova(model.int), error = function(e) NA) - if (is.logical(pv_anova) & !is.null(cluster)) { - warning("Warning: Anova test is not available for cluster data. So Interaction P value is NA when 3 and more categorical subgroup variable.") + out <- data.frame( + Variable = unlist(lapply(label_val, function(x) c(x, rep("", length(xlev[[1]]) - 1)))), Count = unlist(lapply(Count, function(x) c(x, rep("", length(xlev[[1]]) - 1)))), Percent = unlist(lapply(round(Count / sum(Count) * 100, decimal.percent), function(x) c(x, rep("", length(xlev[[1]]) - 1)))), + Levels = rep(paste0(xlabel, "=", xlev[[1]]), length(label_val)), `Point Estimate` = unlist(lapply(Point.Estimate, function(x) c("Reference", x))), Lower = unlist(lapply(CI, function(x) c("", x[, 1]))), Upper = unlist(lapply(CI, function(x) c("", x[, 2]))), check.names = F + ) %>% + mutate(`P value` = unlist(lapply(pv, function(x) c("", ifelse(x >= 0.001, x, "<0.001")))), `P for interaction` = NA) + + if (!is.null(prop)) { + out <- data.frame( + Variable = unlist(lapply(label_val, function(x) c(x, rep("", length(xlev[[1]]) - 1)))), Count = unlist(lapply(Count, function(x) c(x, rep("", length(xlev[[1]]) - 1)))), Percent = unlist(lapply(round(Count / sum(Count) * 100, decimal.percent), function(x) c(x, rep("", length(xlev[[1]]) - 1)))), + Levels = rep(paste0(xlabel, "=", xlev[[1]]), length(label_val)), `Point Estimate` = unlist(lapply(Point.Estimate, function(x) c("Reference", x))), Lower = unlist(lapply(CI, function(x) c("", x[, 1]))), Upper = unlist(lapply(CI, function(x) c("", x[, 2]))), check.names = F + ) %>% + mutate(KM = as.vector(t(prop)), `P value` = unlist(lapply(pv, function(x) c("", ifelse(x >= 0.001, x, "<0.001")))), `P for interaction` = NA) } - pv_int <- tryCatch(round(pv_anova[nrow(pv_anova), 4], decimal.pvalue), error = function(e) NA) + + rownames(out) <- NULL + + return(rbind(c(var_subgroup, rep(NA, ncol(out) - 2), ifelse(pv_int >= 0.001, pv_int, "<0.001")), out)) + } } - - # Count, PE, CI, PV 계산 - model %>% purrr::map_dbl("n", .default = NA) -> Count - - if (ncoef < 2) { - model %>% - purrr::map("coefficients", .default = NA) %>% - lapply(function(x) { - round(exp(x[1:ncoef]), decimal.hr) - }) %>% - unlist() -> Point.Estimate - - model %>% - lapply(function(x) { - tryCatch( - { - round(exp(stats::confint(x)[1, ]), decimal.hr) - }, - error = function(e) { - return(matrix(nrow = 1, ncol = 2, dimnames = list(paste0(xlabel, xlev[[1]][-1]), c("2.5 %", "97.5 %")))) - } - ) - }) %>% - Reduce(rbind, .) -> CI - - model %>% - purrr::map(possible_pv) %>% - purrr::map_dbl(~ round(., decimal.pvalue)) -> pv + } + ##add event, count_by options + if((event)&& is.null(count_by)){ + original_output<-TableSubgroupCox(formula = formula, var_subgroup = var_subgroup, var_cov = var_cov, data = data , time_eventrate = time_eventrate, decimal.hr = decimal.hr, decimal.percent = decimal.percent, decimal.pvalue = decimal.pvalue, cluster = cluster, strata = strata, weights = weights, event = FALSE, count_by= count_by) + count_output<-count_event_by(formula = formula,data = data, count_by_var = count_by, var_subgroup = var_subgroup, decimal.percent = 1) + if(!is.null(var_subgroup)){ + for (i in 1:nrow(original_output)) { + clean_variable <- trimws(original_output$Variable[i]) + if (clean_variable != "" && clean_variable %in% count_output[[var_subgroup]]) { + match_row <- which(count_output[[var_subgroup]] == clean_variable) + if (length(match_row) > 0) { + original_output$Count[i] <- count_output$Event_Rate[match_row] + } + } + } + return(original_output) + } + else{ + original_output$Count[1]=count_output$Event_Rate[1] + return(original_output) + } + } + if((event)&& !is.null(count_by)){ + original_output<-TableSubgroupCox(formula = formula, var_subgroup = var_subgroup, var_cov = var_cov, data = data , time_eventrate = time_eventrate, decimal.hr = decimal.hr, decimal.percent = decimal.percent, decimal.pvalue = decimal.pvalue, cluster = cluster, strata = strata, weights = weights, event = FALSE, count_by= NULL) + count_output<-count_event_by(formula = formula,data = data, count_by_var = count_by, var_subgroup = var_subgroup, decimal.percent = 1) + if (inherits(data, "survey.design")) { + data<- data$variables } else { - model %>% - purrr::map("coefficients", .default = NA) %>% - lapply(function(x) { - round(exp(x[1:ncoef]), decimal.hr) - }) -> Point.Estimate - - model %>% - purrr::map(possible_confint) %>% - lapply(function(x) { - tryCatch( - { - round(exp(x[1:ncoef, ]), decimal.hr) - }, - error = function(e) { - return(matrix(nrow = length(xlev[[1]]) - 1, ncol = 2, dimnames = list(paste0(xlabel, xlev[[1]][-1]), c("2.5 %", "97.5 %")))) - } - ) - }) -> CI - - model %>% - lapply(function(x) { - tryCatch( - { - round(summary(x)$coefficients[1:ncoef, 5], decimal.pvalue) - }, - error = function(e) { - return(rep(NA, length(xlev[[1]]) - 1)) + data <- data + } + count_by_levels<-sort(unique(data[[count_by]]), decreasing = TRUE) + if(!is.null(var_subgroup)){ + subgroup_levels<-unique(data[[var_subgroup]]) + for(countlevel in count_by_levels){ + event_rate_col <- paste0("Count(",count_by, "=", countlevel,")") + original_output <- original_output %>% + tibble::add_column(!!event_rate_col := NA, .after = "Count") + for (level in subgroup_levels) { + value_to_insert <- count_output[count_output[[count_by]] == countlevel & count_output[[var_subgroup]] == level, "Event_Rate"] + value_to_insert <- value_to_insert[!is.na(value_to_insert)] + if (length(value_to_insert) > 0) { + if (!is.na(value_to_insert[1])) { + original_output[[event_rate_col]][trimws(original_output[["Variable"]]) == level] <- value_to_insert[1] + } else { + original_output[[event_rate_col]][trimws(original_output[["Variable"]]) == level] <- "" } - ) - }) -> pv + } else { + original_output[[event_rate_col]][trimws(original_output[["Variable"]]) == level] <- "" + } + } + } + for (i in 1:nrow(original_output)) { + count_output<-count_event_by(formula = formula,data = data, count_by_var = NULL, var_subgroup = var_subgroup, decimal.percent = 1) + clean_variable <- trimws(original_output$Variable[i]) + if (clean_variable != "" && clean_variable %in% count_output[[var_subgroup]]) { + match_row <- which(count_output[[var_subgroup]] == clean_variable) + if (length(match_row) > 0) { + original_output$Count[i] <- count_output$Event_Rate[match_row] + } + } + } + return(original_output) } - - # output 만들기 - if (ncoef < 2) { - out <- data.frame(Variable = paste(" ", label_val), Count = Count, Percent = round(Count / sum(Count) * 100, decimal.percent), `Point Estimate` = Point.Estimate, Lower = CI[, 1], Upper = CI[, 2], check.names = F, row.names = NULL) %>% - mutate(`P value` = unlist(ifelse(pv >= 0.001, pv, "<0.001")), `P for interaction` = NA) - - if (!is.null(prop)) { - out <- data.frame(Variable = paste(" ", label_val), Count = Count, Percent = round(Count / sum(Count) * 100, decimal.percent), `Point Estimate` = Point.Estimate, Lower = CI[, 1], Upper = CI[, 2], check.names = F, row.names = NULL) %>% - cbind(prop) %>% - mutate(`P value` = unlist(ifelse(pv >= 0.001, pv, "<0.001")), `P for interaction` = NA) + else{ + for(countlevel in count_by_levels){ + event_rate_col <- paste0("Count(",count_by, "=", countlevel,")") + original_output <- original_output %>% + tibble::add_column(!!event_rate_col := NA, .after = "Count") + value_to_insert <- count_output[count_output[[count_by]] == countlevel, "Event_Rate"] + value_to_insert <- value_to_insert[!is.na(value_to_insert)] + + original_output[[event_rate_col]][trimws(original_output[["Variable"]]) == "Overall"] <- value_to_insert[1] } - - rownames(out) <- NULL - - return(rbind(c(var_subgroup, rep(NA, ncol(out) - 2), ifelse(pv_int >= 0.001, pv_int, "<0.001")), out)) + count_output<-count_event_by(formula = formula,data = data, count_by_var = NULL, var_subgroup = var_subgroup, decimal.percent = 1) + original_output$Count[1]=count_output$Event_Rate[1] + return(original_output) + } + } + if(!(event)&& !is.null(count_by)){ + original_output<-TableSubgroupCox(formula = formula, var_subgroup = var_subgroup, var_cov = var_cov, data = data , time_eventrate = time_eventrate, decimal.hr = decimal.hr, decimal.percent = decimal.percent, decimal.pvalue = decimal.pvalue, cluster = cluster, strata = strata, weights = weights, event = event, count_by= NULL) + count_output<-count_event_by(formula = formula,data = data, count_by_var = count_by, var_subgroup = var_subgroup, decimal.percent = 1) + if (inherits(data, "survey.design")) { + data<- data$variables } else { - out <- data.frame( - Variable = unlist(lapply(label_val, function(x) c(x, rep("", length(xlev[[1]]) - 1)))), Count = unlist(lapply(Count, function(x) c(x, rep("", length(xlev[[1]]) - 1)))), Percent = unlist(lapply(round(Count / sum(Count) * 100, decimal.percent), function(x) c(x, rep("", length(xlev[[1]]) - 1)))), - Levels = rep(paste0(xlabel, "=", xlev[[1]]), length(label_val)), `Point Estimate` = unlist(lapply(Point.Estimate, function(x) c("Reference", x))), Lower = unlist(lapply(CI, function(x) c("", x[, 1]))), Upper = unlist(lapply(CI, function(x) c("", x[, 2]))), check.names = F - ) %>% - mutate(`P value` = unlist(lapply(pv, function(x) c("", ifelse(x >= 0.001, x, "<0.001")))), `P for interaction` = NA) - - if (!is.null(prop)) { - out <- data.frame( - Variable = unlist(lapply(label_val, function(x) c(x, rep("", length(xlev[[1]]) - 1)))), Count = unlist(lapply(Count, function(x) c(x, rep("", length(xlev[[1]]) - 1)))), Percent = unlist(lapply(round(Count / sum(Count) * 100, decimal.percent), function(x) c(x, rep("", length(xlev[[1]]) - 1)))), - Levels = rep(paste0(xlabel, "=", xlev[[1]]), length(label_val)), `Point Estimate` = unlist(lapply(Point.Estimate, function(x) c("Reference", x))), Lower = unlist(lapply(CI, function(x) c("", x[, 1]))), Upper = unlist(lapply(CI, function(x) c("", x[, 2]))), check.names = F - ) %>% - mutate(KM = as.vector(t(prop)), `P value` = unlist(lapply(pv, function(x) c("", ifelse(x >= 0.001, x, "<0.001")))), `P for interaction` = NA) + data <- data + } + count_by_levels<-sort(unique(data[[count_by]]), decreasing = TRUE) + if(!is.null(var_subgroup)){ + subgroup_levels<-unique(data[[var_subgroup]]) + for(countlevel in count_by_levels){ + event_rate_col <- paste0("Count(",count_by, "=", countlevel,")") + original_output <- original_output %>% + tibble::add_column(!!event_rate_col := NA, .after = "Count") + for (level in subgroup_levels) { + value_to_insert <- count_output[count_output[[count_by]] == countlevel & count_output[[var_subgroup]] == level, "Count"] + value_to_insert <- value_to_insert[!is.na(value_to_insert)] + original_output[[event_rate_col]][trimws(original_output[["Variable"]]) == level] <- value_to_insert[1] + } } - - rownames(out) <- NULL - - return(rbind(c(var_subgroup, rep(NA, ncol(out) - 2), ifelse(pv_int >= 0.001, pv_int, "<0.001")), out)) + + return(original_output) + } + + else{ + for(countlevel in count_by_levels){ + event_rate_col <- paste0("Count(",count_by, "=", countlevel,")") + original_output <- original_output %>% + tibbble::add_column(!!event_rate_col := NA, .after = "Count") + value_to_insert <- count_output[count_output[[count_by]] == countlevel, "Count"] + value_to_insert <- value_to_insert[!is.na(value_to_insert)] + + original_output[[event_rate_col]][trimws(original_output[["Variable"]]) == "Overall"] <- value_to_insert[1] + } + return(original_output) } } } @@ -547,20 +729,20 @@ TableSubgroupCox <- function(formula, var_subgroup = NULL, var_cov = NULL, data, #' @importFrom magrittr %>% #' @importFrom dplyr bind_rows -TableSubgroupMultiCox <- function(formula, var_subgroups = NULL, var_cov = NULL, data, time_eventrate = 3 * 365, decimal.hr = 2, decimal.percent = 1, decimal.pvalue = 3, line = F, cluster = NULL, strata = NULL, weights = NULL) { +TableSubgroupMultiCox <- function(formula, var_subgroups = NULL, var_cov = NULL, data, time_eventrate = 3 * 365, decimal.hr = 2, decimal.percent = 1, decimal.pvalue = 3, line = F, cluster = NULL, strata = NULL, weights = NULL, event = FALSE, count_by= NULL) { . <- NULL xlabel <- setdiff(as.character(formula)[[3]], "+")[1] - - out.all <- TableSubgroupCox(formula, var_subgroup = NULL, var_cov = var_cov, data = data, time_eventrate = time_eventrate, decimal.hr = decimal.hr, decimal.percent = decimal.percent, decimal.pvalue = decimal.pvalue, cluster = cluster, strata = strata, weights = weights) + + out.all <- TableSubgroupCox(formula, var_subgroup = NULL, var_cov = var_cov, data = data, time_eventrate = time_eventrate, decimal.hr = decimal.hr, decimal.percent = decimal.percent, decimal.pvalue = decimal.pvalue, cluster = cluster, strata = strata, weights = weights, event = event, count_by = count_by) out.all <- dplyr::mutate_all(out.all, as.character) - + if (is.null(var_subgroups)) { return(out.all) } else { out.list <- lapply(var_subgroups, function(subgroup) { - TableSubgroupCox(formula, var_subgroup = subgroup, var_cov = var_cov, data = data, time_eventrate = time_eventrate, decimal.hr = decimal.hr, decimal.percent = decimal.percent, decimal.pvalue = decimal.pvalue, cluster = cluster, strata = strata, weights = weights) + TableSubgroupCox(formula, var_subgroup = subgroup, var_cov = var_cov, data = data, time_eventrate = time_eventrate, decimal.hr = decimal.hr, decimal.percent = decimal.percent, decimal.pvalue = decimal.pvalue, cluster = cluster, strata = strata, weights = weights, event = event, count_by = count_by) }) - + # out.list <- purrr::map(var_subgroups, ~ TableSubgroupCox(formula, var_subgroup = ., var_cov = var_cov, data = data, time_eventrate = time_eventrate, decimal.hr = decimal.hr, decimal.percent = decimal.percent, decimal.pvalue = decimal.pvalue, cluster = cluster, weights = weights_vec)) if (line) { out.newline <- out.list %>% purrr::map(~ rbind(NA, .)) @@ -573,4 +755,4 @@ TableSubgroupMultiCox <- function(formula, var_subgroups = NULL, var_cov = NULL, return(result) } } -} +} From a92fd2084fa2ad301f41746957199f27a374f52b Mon Sep 17 00:00:00 2001 From: sl-eeper Date: Mon, 11 Nov 2024 08:08:33 +0000 Subject: [PATCH 2/4] add options --- R/CreateTableOneJS.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/CreateTableOneJS.R b/R/CreateTableOneJS.R index aabca0a..b5213cc 100644 --- a/R/CreateTableOneJS.R +++ b/R/CreateTableOneJS.R @@ -213,10 +213,9 @@ CreateTableOne2 <- function(data, strata, vars, factorVars, includeNA = F, test ptb1[first_row, test_name] <- test_used } } + cols_to_remove <- grep("^test\\(", colnames(ptb1)) + ptb1 <- ptb1[, -cols_to_remove] } - cols_to_remove <- grep("^test\\(", colnames(ptb1)) - ptb1 <- ptb1[, -cols_to_remove] - sig <- ifelse(ptb1[, "p"] == "<0.001", "0", ptb1[, "p"]) sig <- as.numeric(as.vector(sig)) sig <- ifelse(sig <= 0.05, "**", "") @@ -357,7 +356,7 @@ CreateTableOneJS <- function(vars, strata = NULL, strata2 = NULL, data, factorVa showAllLevels = showAllLevels, printToggle = printToggle, quote = quote, Labels = Labels, nonnormal = nonnormal, exact = exact, catDigits = catDigits, contDigits = contDigits, pDigits = pDigits, labeldata = labeldata, minMax = minMax, showpm = showpm, addOverall = addOverall, pairwise = pairwise ) - + cap.tb1 <- paste("Stratified by ", strata, sep = "") if (Labels & !is.null(labeldata)) { @@ -519,3 +518,4 @@ CreateTableOneJS <- function(vars, strata = NULL, strata2 = NULL, data, factorVa return(list(table = ptb1, caption = cap.tb1)) } } + From f853ab0761958ff110fbc23cb079bc557123de9b Mon Sep 17 00:00:00 2001 From: sl-eeper Date: Tue, 12 Nov 2024 01:55:25 +0000 Subject: [PATCH 3/4] edit description, news --- DESCRIPTION | 7 ++++--- NEWS.md | 4 ++++ 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index f0a122a..4c97190 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,12 +1,13 @@ Package: jstable Title: Create Tables from Different Types of Regression -Version: 1.3.5 -Date: 2024-10-20 +Version: 1.3.6 +Date: 2024-11-12 Authors@R: c(person("Jinseob", "Kim", email = "jinseob2kim@gmail.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-9403-605X")), person("Zarathu", role = c("cph", "fnd")), person("Yoonkyoung","Jeon", role = c("aut")), person("Jaehun", "Shon", role = c("aut")), - person("Hyojong", "Myung", role = c("aut")) + person("Hyojong", "Myung", role = c("aut")), + person("Hyungwoo", "Jo", role = c("aut")) ) Description: Create regression tables from generalized linear model(GLM), generalized estimating equation(GEE), generalized linear mixed-effects model(GLMM), Cox proportional hazards model, survey-weighted generalized linear model(svyglm) and survey-weighted Cox model results for publication. Depends: R (>= 3.4.0) diff --git a/NEWS.md b/NEWS.md index b57dc45..e5e1cfb 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,7 @@ +# jstable 1.3.6 +* Update: Add event, count_by option in TableSubgroupCox, TableSubgroupMultiCox +* Update: Add pairwise option in CreateTableOne2, CreateTableOneJS + # jstable 1.3.5 * Fix: error in `TableSubgroupMultiGLM` when covariates From c9bc6541d76af026fae57dfea5cb916512bc1f32 Mon Sep 17 00:00:00 2001 From: sl-eeper Date: Tue, 12 Nov 2024 03:09:58 +0000 Subject: [PATCH 4/4] svytableone edit --- NEWS.md | 2 +- R/CreateTableOneJS.R | 3 +- R/svyCreateTableOneJS.R | 84 +++++++++++++++++++++++++++++++++++++++-- 3 files changed, 83 insertions(+), 6 deletions(-) diff --git a/NEWS.md b/NEWS.md index e5e1cfb..d4efe51 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,6 @@ # jstable 1.3.6 * Update: Add event, count_by option in TableSubgroupCox, TableSubgroupMultiCox -* Update: Add pairwise option in CreateTableOne2, CreateTableOneJS +* Update: Add pairwise option in CreateTableOne2, CreateTableOneJS, svyCreateTableOne2, svyCreateTableOneJS # jstable 1.3.5 * Fix: error in `TableSubgroupMultiGLM` when covariates diff --git a/R/CreateTableOneJS.R b/R/CreateTableOneJS.R index b5213cc..0d96c7d 100644 --- a/R/CreateTableOneJS.R +++ b/R/CreateTableOneJS.R @@ -31,6 +31,7 @@ #' @param minMax Whether to use [min,max] instead of [p25,p75] for nonnormal variables. The default is FALSE. #' @param showpm Logical, show normal distributed continuous variables as Mean ± SD. Default: T #' @param addOverall (optional, only used if strata are supplied) Adds an overall column to the table. Smd and p-value calculations are performed using only the stratifed clolumns. Default: F +#' @param pairwise (optional, only used if strata are supplied) When there are three or more strata, it displays the p-values for pairwise comparisons. Default: F #' @return A matrix object containing what you see is also invisibly returned. This can be assinged a name and exported via write.csv. #' @details DETAILS #' @examples @@ -258,7 +259,7 @@ CreateTableOne2 <- function(data, strata, vars, factorVars, includeNA = F, test #' @param showpm Logical, show normal distributed continuous variables as Mean ± SD. Default: T #' @param addOverall (optional, only used if strata are supplied) Adds an overall column to the table. Smd and p-value calculations are performed using only the stratifed clolumns. Default: F #' @param normalityTest Logical, perform the Shapiro test for all variables. Default: F -#' @return A matrix object containing what you see is also invisibly returned. This can be assinged a name and exported via write.csv. +#' @param pairwise (optional, only used if strata are supplied) When there are three or more strata, it displays the p-values for pairwise comparisons. Default: F#' @return A matrix object containing what you see is also invisibly returned. This can be assinged a name and exported via write.csv. #' @details DETAILS #' @examples #' library(survival) diff --git a/R/svyCreateTableOneJS.R b/R/svyCreateTableOneJS.R index 084ae1b..5e455fe 100644 --- a/R/svyCreateTableOneJS.R +++ b/R/svyCreateTableOneJS.R @@ -22,6 +22,7 @@ #' @param minMax Whether to use [min,max] instead of [p25,p75] for nonnormal variables. The default is FALSE. #' @param showpm Logical, show normal distributed continuous variables as Mean ± SD. Default: T #' @param addOverall (optional, only used if strata are supplied) Adds an overall column to the table. Smd and p-value calculations are performed using only the stratifed clolumns. Default: F +#' @param pairwise (optional, only used if strata are supplied) When there are three or more strata, it displays the p-values for pairwise comparisons. Default: F #' @return A matrix object containing what you see is also invisibly returned. This can be assinged a name and exported via write.csv. #' @details DETAILS #' @examples @@ -46,7 +47,7 @@ svyCreateTableOne2 <- function(data, strata, vars, factorVars, includeNA = F, test = T, showAllLevels = T, printToggle = F, quote = F, smd = F, nonnormal = NULL, catDigits = 1, contDigits = 2, pDigits = 3, Labels = F, labeldata = NULL, minMax = F, showpm = T, - addOverall = F) { + addOverall = F, pairwise = F) { setkey <- variable <- level <- . <- val_label <- NULL if (length(strata) != 1) { @@ -122,6 +123,70 @@ svyCreateTableOne2 <- function(data, strata, vars, factorVars, includeNA = F, te } } + if (pairwise && length(unique(data$variables[[strata]])) > 2) { + print('enter') + pairwise_comparisons <- combn( + colnames(ptb1)[(which(colnames(ptb1) == "level") + 1):(which(colnames(ptb1) == "p") - 1)], 2, simplify = FALSE) + pairwise_pvalues_list <- list() + for (x in vars) { + pairwise_pvalues_list[[x]] <- list() + is_continuous <- !(x %in% factorVars) && !is.factor(data$variables[[x]]) + for (pair in pairwise_comparisons) { + subset_data <- subset(data, data$variables[[strata]] %in% pair) + if (is_continuous) { + test_result <- if (x %in% nonnormal) { + tryCatch({ + test <- svyranktest(as.formula(paste(x, "~", strata)), design = subset_data) + list(p_value = test$p.value, test_used = "svyranktest") + }, error = function(e) { + list(p_value = NA, test_used = NA) + }) + } else { + tryCatch({ + test <- svyttest(as.formula(paste(x, "~", strata)), design = subset_data) + list(p_value = test$p.value, test_used = "svyttest") + }, error = function(e) { + list(p_value = NA, test_used = NA) + }) + } + } else { + test_result <- tryCatch({ + test <- svychisq(as.formula(paste("~", x, "+", strata)), design = subset_data, method = "RaoScott") + list(p_value = test$p.value, test_used = "svychisq") + }, error = function(e) { + list(p_value = NA, test_used = NA) + }) + } + pairwise_pvalues_list[[x]][[paste(pair, collapse = "_")]] <- test_result + } + } + for (i in seq_along(pairwise_comparisons)) { + col_name <- paste0("p(", pairwise_comparisons[[i]][1], " vs ", pairwise_comparisons[[i]][2], ")") + test_name <- paste0("test(", pairwise_comparisons[[i]][1], " vs ", pairwise_comparisons[[i]][2], ")") + ptb1 <- cbind(ptb1, col_name = "", test_name = "") + colnames(ptb1)[ncol(ptb1) - 1] <- col_name + colnames(ptb1)[ncol(ptb1)] <- test_name + } + for (x in vars) { + cleaned_var_name <- gsub("\\s+|\\(\\%\\)", "", x) + first_row <- which(gsub("\\s+|\\(\\%\\)", "", rownames(ptb1)) == cleaned_var_name)[1] + + for (i in seq_along(pairwise_comparisons)) { + pair_key <- paste(pairwise_comparisons[[i]], collapse = "_") + p_value <- pairwise_pvalues_list[[x]][[pair_key]]$p_value + test_used <- pairwise_pvalues_list[[x]][[pair_key]]$test_used + col_name <- paste0("p(", pairwise_comparisons[[i]][1], " vs ", pairwise_comparisons[[i]][2], ")") + test_name <- paste0("test(", pairwise_comparisons[[i]][1], " vs ", pairwise_comparisons[[i]][2], ")") + p_value <- ifelse(p_value < 0.001, "<0.001", as.character(round(p_value, 2))) + ptb1[first_row, col_name] <- p_value + ptb1[first_row, test_name] <- test_used + } + } + cols_to_remove <- grep("^test\\(", colnames(ptb1)) + ptb1 <- ptb1[, -cols_to_remove] + } + + sig <- ifelse(ptb1[, "p"] == "<0.001", "0", ptb1[, "p"]) sig <- as.numeric(as.vector(sig)) sig <- ifelse(sig <= 0.05, "**", "") @@ -129,7 +194,17 @@ svyCreateTableOne2 <- function(data, strata, vars, factorVars, includeNA = F, te return(ptb1) } - +nhanes$SDMVPSU <- as.factor(nhanes$SDMVPSU) +nhanesSvy <- svydesign( + ids = ~SDMVPSU, strata = ~SDMVSTRA, weights = ~WTMEC2YR, + nest = TRUE, data = nhanes +) +svyCreateTableOneJS( + vars = c("HI_CHOL", "race", "agecat", "RIAGENDR"), + strata = "race", data = nhanesSvy, + factorVars = c("HI_CHOL", "race", "RIAGENDR"), pairwise = T +) +names(nhanes) #' @title svyCreateTableOneJS: Modified CreateTableOne function in tableone package #' @description Combine svyCreateTableOne & print function in tableone package @@ -154,6 +229,7 @@ svyCreateTableOne2 <- function(data, strata, vars, factorVars, includeNA = F, te #' @param minMax Whether to use [min,max] instead of [p25,p75] for nonnormal variables. The default is FALSE. #' @param showpm Logical, show normal distributed continuous variables as Mean ± SD. Default: T #' @param addOverall (optional, only used if strata are supplied) Adds an overall column to the table. Smd and p-value calculations are performed using only the stratifed clolumns. Default: F +#' @param pairwise (optional, only used if strata are supplied) When there are three or more strata, it displays the p-values for pairwise comparisons. Default: F #' @return A matrix object containing what you see is also invisibly returned. This can be assinged a name and exported via write.csv. #' @details DETAILS #' @examples @@ -179,7 +255,7 @@ svyCreateTableOne2 <- function(data, strata, vars, factorVars, includeNA = F, te svyCreateTableOneJS <- function(vars, strata = NULL, strata2 = NULL, data, factorVars = NULL, includeNA = F, test = T, showAllLevels = T, printToggle = F, quote = F, smd = F, Labels = F, nonnormal = NULL, catDigits = 1, contDigits = 2, pDigits = 3, labeldata = NULL, psub = T, minMax = F, showpm = T, - addOverall = F) { + addOverall = F, pairwise = F) { . <- level <- variable <- val_label <- V1 <- V2 <- NULL # if (Labels & !is.null(labeldata)){ @@ -231,7 +307,7 @@ svyCreateTableOneJS <- function(vars, strata = NULL, strata2 = NULL, data, facto strata = strata, vars = vars, data = data, factorVars = factorVars, includeNA = includeNA, test = test, smd = smd, showAllLevels = showAllLevels, printToggle = printToggle, quote = quote, Labels = Labels, nonnormal = nonnormal, catDigits = catDigits, contDigits = contDigits, pDigits = pDigits, labeldata = labeldata, minMax = minMax, showpm = showpm, - addOverall = addOverall + addOverall = addOverall, pairwise = pairwise ) cap.tb1 <- paste("Stratified by ", strata, "- weighted data", sep = "")