From 313355c49e79f7b7d7d4e2d1505f7f83abea92db Mon Sep 17 00:00:00 2001 From: Arkadiusz Gladki Date: Fri, 11 Oct 2024 13:48:26 +0200 Subject: [PATCH 1/4] feat: make duplicates' helpers supporting combo assays as well --- DESCRIPTION | 4 +-- NEWS.md | 3 ++ R/identifiers_list.R | 8 +++-- inst/settings.json | 3 +- tests/testthat/test-duplicates.R | 52 ++++++++++++++++++++------------ 5 files changed, 46 insertions(+), 24 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index c236715e..65105279 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: gDRutils Type: Package Title: A package with helper functions for processing drug response data -Version: 1.3.15 -Date: 2024-10-07 +Version: 1.3.16 +Date: 2024-10-11 Authors@R: c(person("Bartosz", "Czech", role=c("aut"), comment = c(ORCID = "0000-0002-9908-3007")), person("Arkadiusz", "Gladki", role=c("cre", "aut"), email="gladki.arkadiusz@gmail.com", diff --git a/NEWS.md b/NEWS.md index 149fe90b..b7ede461 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,6 @@ +## gDRutils 1.3.16 - 2024-10-11 +* make duplicates' helpers supporting combo assays as well + ## gDRutils 1.3.15 - 2024-10-07 * refactor the logic for dealing with duplicates in assay data diff --git a/R/identifiers_list.R b/R/identifiers_list.R index b003a290..0ba580b9 100644 --- a/R/identifiers_list.R +++ b/R/identifiers_list.R @@ -43,7 +43,10 @@ IDENTIFIERS_LIST <- list( data_source = "data_source", # replicate - replicate = "Replicate" + replicate = "Replicate", + + # normalization type + normalization_type = "normalization_type" ) REQ_COL_IDENTIFIERS <- c( @@ -79,7 +82,8 @@ EXPECT_ONE_IDENTIFIERS <- c( "drug_name3", "drug_moa3", "concentration3", - "data_source" + "data_source", + "normalization_type" ) SYNONYMS_LIST <- list( diff --git a/inst/settings.json b/inst/settings.json index 1ed6d4c5..f0e158d3 100644 --- a/inst/settings.json +++ b/inst/settings.json @@ -29,6 +29,7 @@ "cellline_name", "concentration2", "duration", - "data_source" + "data_source", + "normalization_type" ] } diff --git a/tests/testthat/test-duplicates.R b/tests/testthat/test-duplicates.R index 37f870d6..9930817f 100644 --- a/tests/testthat/test-duplicates.R +++ b/tests/testthat/test-duplicates.R @@ -52,21 +52,40 @@ test_that("get_duplicated_rows works as expected", { }) test_that("[has|get]_assay_dt_duplicated_rows works as expected", { - + + # single-agent data sdata <- get_synthetic_data("finalMAE_small") smetrics_data <- convert_se_assay_to_dt(sdata[[1]], "Metrics") - smetrics_data_f <- gDRutils::flatten( - smetrics_data, - groups = c("normalization_type", "fit_source"), - wide_cols = gDRutils::get_header("response_metrics") + smetrics_dup_data <- rbind(smetrics_data, smetrics_data[1:10, ]) + expect_false(has_assay_dt_duplicated_rows(smetrics_data)) + expect_true(has_assay_dt_duplicated_rows(smetrics_dup_data)) + + expect_equal(get_assay_dt_duplicated_rows(smetrics_dup_data), + c(1:10, 201:210)) + expect_equal( + rbind(smetrics_data[1:10, ], smetrics_data[1:10, ]), + get_assay_dt_duplicated_rows(smetrics_dup_data, output = "data") ) - expect_true(has_assay_dt_duplicated_rows(smetrics_data)) - expect_false(has_assay_dt_duplicated_rows(smetrics_data_f)) + expect_equal(get_assay_dt_duplicated_rows(smetrics_data), integer(0)) + empty_dt <- get_assay_dt_duplicated_rows(smetrics_data, output = "data") + expect_true(nrow(empty_dt) == 0) + expect_is(empty_dt, "data.table") - expect_equal(get_assay_dt_duplicated_rows(smetrics_data), 1:200) - expect_equal(dim(smetrics_data), dim(get_assay_dt_duplicated_rows(smetrics_data, output = "data"))) - expect_equal(get_assay_dt_duplicated_rows(smetrics_data_f), integer(0)) - empty_dt <- get_assay_dt_duplicated_rows(smetrics_data_f, output = "data") + # combo data + cdata <- get_synthetic_data("finalMAE_combo_matrix_small") + cscores_data <- convert_se_assay_to_dt(cdata[[1]], "scores") + cscores_dup_data <- rbind(cscores_data, cscores_data[1:10, ]) + expect_false(has_assay_dt_duplicated_rows(cscores_data)) + expect_true(has_assay_dt_duplicated_rows(cscores_dup_data)) + + expect_equal(get_assay_dt_duplicated_rows(cscores_dup_data), + c(1:10, 25:34)) + expect_equal( + rbind(cscores_data[1:10, ], cscores_data[1:10, ]), + get_assay_dt_duplicated_rows(cscores_dup_data, output = "data") + ) + expect_equal(get_assay_dt_duplicated_rows(cscores_data), integer(0)) + empty_dt <- get_assay_dt_duplicated_rows(cscores_data, output = "data") expect_true(nrow(empty_dt) == 0) expect_is(empty_dt, "data.table") }) @@ -75,14 +94,9 @@ test_that("throw_msg_if_duplicates works as expected", { sdata <- get_synthetic_data("finalMAE_small") smetrics_data <- convert_se_assay_to_dt(sdata[[1]], "Metrics") - smetrics_data_f <- gDRutils::flatten( - smetrics_data, - groups = c("normalization_type", "fit_source"), - wide_cols = gDRutils::get_header("response_metrics") - ) - + smetrics_dup_data <- rbind(smetrics_data, smetrics_data[1:10, ]) exp_msg <- "rows are duplicated" - expect_error(throw_msg_if_duplicates(smetrics_data, "Metrics"), exp_msg) - expect_warning(throw_msg_if_duplicates(smetrics_data, "Metrics", msg_f = warning), exp_msg) + expect_error(throw_msg_if_duplicates(smetrics_dup_data, "Metrics"), exp_msg) + expect_warning(throw_msg_if_duplicates(smetrics_dup_data, "Metrics", msg_f = warning), exp_msg) }) From 304bb0219f13d98adecb82b6530a08ef884d7841 Mon Sep 17 00:00:00 2001 From: Arek Gladki <41166437+gladkia@users.noreply.github.com> Date: Mon, 14 Oct 2024 09:54:12 +0200 Subject: [PATCH 2/4] Update tests/testthat/test-duplicates.R Co-authored-by: j-smola <31825957+j-smola@users.noreply.github.com> --- tests/testthat/test-duplicates.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-duplicates.R b/tests/testthat/test-duplicates.R index 9930817f..5cd6d28b 100644 --- a/tests/testthat/test-duplicates.R +++ b/tests/testthat/test-duplicates.R @@ -55,7 +55,7 @@ test_that("[has|get]_assay_dt_duplicated_rows works as expected", { # single-agent data sdata <- get_synthetic_data("finalMAE_small") - smetrics_data <- convert_se_assay_to_dt(sdata[[1]], "Metrics") + smetrics_data <- convert_se_assay_to_dt(sdata[[get_supported_experiments("sa")]], "Metrics") smetrics_dup_data <- rbind(smetrics_data, smetrics_data[1:10, ]) expect_false(has_assay_dt_duplicated_rows(smetrics_data)) expect_true(has_assay_dt_duplicated_rows(smetrics_dup_data)) From 8f09b745f44359a5daa7f8c305ae0fb97fcc9bc5 Mon Sep 17 00:00:00 2001 From: Arek Gladki <41166437+gladkia@users.noreply.github.com> Date: Mon, 14 Oct 2024 09:54:35 +0200 Subject: [PATCH 3/4] Update tests/testthat/test-duplicates.R Co-authored-by: j-smola <31825957+j-smola@users.noreply.github.com> --- tests/testthat/test-duplicates.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-duplicates.R b/tests/testthat/test-duplicates.R index 5cd6d28b..3d85d64c 100644 --- a/tests/testthat/test-duplicates.R +++ b/tests/testthat/test-duplicates.R @@ -93,7 +93,7 @@ test_that("[has|get]_assay_dt_duplicated_rows works as expected", { test_that("throw_msg_if_duplicates works as expected", { sdata <- get_synthetic_data("finalMAE_small") - smetrics_data <- convert_se_assay_to_dt(sdata[[1]], "Metrics") + smetrics_data <- convert_se_assay_to_dt(sdata[[get_supported_experiments("sa")]], "Metrics") smetrics_dup_data <- rbind(smetrics_data, smetrics_data[1:10, ]) exp_msg <- "rows are duplicated" From 4adf661f8874ffdf888a392f32e8140badf4d1f0 Mon Sep 17 00:00:00 2001 From: Arek Gladki <41166437+gladkia@users.noreply.github.com> Date: Mon, 14 Oct 2024 09:55:01 +0200 Subject: [PATCH 4/4] Update tests/testthat/test-duplicates.R Co-authored-by: j-smola <31825957+j-smola@users.noreply.github.com> --- tests/testthat/test-duplicates.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-duplicates.R b/tests/testthat/test-duplicates.R index 3d85d64c..e25ca90e 100644 --- a/tests/testthat/test-duplicates.R +++ b/tests/testthat/test-duplicates.R @@ -73,7 +73,7 @@ test_that("[has|get]_assay_dt_duplicated_rows works as expected", { # combo data cdata <- get_synthetic_data("finalMAE_combo_matrix_small") - cscores_data <- convert_se_assay_to_dt(cdata[[1]], "scores") + cscores_data <- convert_se_assay_to_dt(cdata[[get_supported_experiments("combo")]], "scores") cscores_dup_data <- rbind(cscores_data, cscores_data[1:10, ]) expect_false(has_assay_dt_duplicated_rows(cscores_data)) expect_true(has_assay_dt_duplicated_rows(cscores_dup_data))