diff --git a/DESCRIPTION b/DESCRIPTION index 90b9cf61..da2068b5 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: gDRutils Type: Package Title: A package with helper functions for processing drug response data -Version: 1.3.13 -Date: 2024-09-16 +Version: 1.3.14 +Date: 2024-10-03 Authors@R: c(person("Bartosz", "Czech", role=c("aut"), comment = c(ORCID = "0000-0002-9908-3007")), person("Arkadiusz", "Gladki", role=c("cre", "aut"), email="gladki.arkadiusz@gmail.com", diff --git a/NEWS.md b/NEWS.md index b0cd85c0..d6c0d80e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,6 @@ +## gDRutils 1.3.14 - 2024-10-03 +* fixed issue in average_biological_replicated (fit_type) + ## gDRutils 1.3.13 - 2024-09-16 * add functions set_unique_cl_names_dt and set_unique_drug_names_dt diff --git a/R/headers_list.R b/R/headers_list.R index 995c035d..df09f265 100644 --- a/R/headers_list.R +++ b/R/headers_list.R @@ -191,8 +191,20 @@ "GR50", "GEC50", "IC50", - "EC50" - ) + "EC50", + "GR_xc50", + "RV_xc50", + "GR_ec50", + "RV_ec50" + ), + fit_type = c( + "fit_type", + "Fit Type", + "Fit Type RV", + "Fit Type GR", + "RV_fit_type", + "GR_fit_type" + ) ) } diff --git a/R/utils.R b/R/utils.R index 007a1cb7..85895572 100644 --- a/R/utils.R +++ b/R/utils.R @@ -433,6 +433,8 @@ geometric_mean <- function(x, fixed = TRUE, maxlog10Concentration = 1) { #' @param fixed Flag indicating whether to add a fix for -Inf in the geometric mean. #' @param geometric_average_fields Character vector of column names in \code{dt} #' to take the geometric average of. +#' @param fit_type_average_fields Character vector of column names in \code{dt} +#' that should be treated as a column with fit type data #' @param add_sd Flag indicating whether to add standard deviation and count columns. #' #' @examples @@ -449,8 +451,16 @@ average_biological_replicates_dt <- function( prettified = FALSE, fixed = TRUE, geometric_average_fields = get_header("metric_average_fields")$geometric_mean, + fit_type_average_fields = get_header("metric_average_fields")$fit_type, add_sd = FALSE) { + checkmate::assert_data_table(dt) + checkmate::assert_string(var) + checkmate::assert_flag(prettified) + checkmate::assert_character(geometric_average_fields) + checkmate::assert_character(fit_type_average_fields) + checkmate::assert_flag(add_sd) + data <- data.table::copy(dt) if (prettified) { @@ -465,7 +475,8 @@ average_biological_replicates_dt <- function( average_fields <- setdiff(names(Filter(is.numeric, data)), c(unlist(pidfs), var, iso_cols)) geometric_average_fields <- intersect(geometric_average_fields, names(dt)) - group_by <- setdiff(names(data), c(average_fields, var, id_cols, "fit_type", "Fit Type")) + fit_type_average_fields <- intersect(fit_type_average_fields, names(dt)) + group_by <- setdiff(names(data), c(average_fields, var, id_cols, fit_type_average_fields)) if (add_sd) { # Calculate standard deviation for both average_fields and geometric_average_fields diff --git a/man/average_biological_replicates_dt.Rd b/man/average_biological_replicates_dt.Rd index 8f712fa6..ba726af1 100644 --- a/man/average_biological_replicates_dt.Rd +++ b/man/average_biological_replicates_dt.Rd @@ -10,6 +10,7 @@ average_biological_replicates_dt( prettified = FALSE, fixed = TRUE, geometric_average_fields = get_header("metric_average_fields")$geometric_mean, + fit_type_average_fields = get_header("metric_average_fields")$fit_type, add_sd = FALSE ) } @@ -25,6 +26,9 @@ average_biological_replicates_dt( \item{geometric_average_fields}{Character vector of column names in \code{dt} to take the geometric average of.} +\item{fit_type_average_fields}{Character vector of column names in \code{dt} +that should be treated as a column with fit type data} + \item{add_sd}{Flag indicating whether to add standard deviation and count columns.} } \value{ diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R index 11f775fb..143fa9f4 100644 --- a/tests/testthat/test-utils.R +++ b/tests/testthat/test-utils.R @@ -223,6 +223,36 @@ test_that("average_biological_replicates_dt works as expected", { expect_equal(dim(avg_metrics_data2), c(40, 44)) expect_equal(sum(grepl("_sd", names(avg_metrics_data2))), 15) expect_true("count" %in% names(avg_metrics_data2)) + + # protection against regression + # fit_type correctly recognized in wide and long format + sdata <- get_synthetic_data("finalMAE_small") + smetrics_data <- convert_se_assay_to_dt(sdata[[1]], "Metrics") + tdata <- smetrics_data[1:8, ] + tdata$Gnumber <- tdata$Gnumber[1] + tdata$DrugName <- tdata$DrugName[1] + tdata$source_id <- paste0("DS", rep(1:4, each = 2)) + tdata$fit_type <- letters[1:8] + + av1b <- average_biological_replicates_dt(tdata, var = "source_id") + av1f <- gDRutils::flatten( + av1b, + groups = c("normalization_type", "fit_source"), + wide_cols = gDRutils::get_header("response_metrics") + ) + + av2f <- gDRutils::flatten( + tdata, + groups = c("normalization_type", "fit_source"), + wide_cols = gDRutils::get_header("response_metrics") + ) + av2b <- average_biological_replicates_dt(av2f, var = "source_id") + expect_true(all.equal(av1f, av2b)) + expect_true(nrow(av1f) == 1) + av1i <- average_biological_replicates_dt(tdata, var = "source_id", fit_type_average_fields = "bad_value") + expect_true(nrow(av1i) == 8) + + }) test_that("get_duplicated_rows works as expected", {