Skip to content

Commit

Permalink
Merge pull request #135 from gdrplatform/GDR-2591
Browse files Browse the repository at this point in the history
feat: make duplicates' helpers supporting combo assays as well
  • Loading branch information
gladkia authored Oct 17, 2024
2 parents ed34794 + 4adf661 commit a45d014
Show file tree
Hide file tree
Showing 5 changed files with 48 additions and 26 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Package: gDRutils
Type: Package
Title: A package with helper functions for processing drug response data
Version: 1.3.15
Date: 2024-10-07
Version: 1.3.16
Date: 2024-10-11
Authors@R: c(person("Bartosz", "Czech", role=c("aut"),
comment = c(ORCID = "0000-0002-9908-3007")),
person("Arkadiusz", "Gladki", role=c("cre", "aut"), email="[email protected]",
Expand Down
3 changes: 3 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
## gDRutils 1.3.16 - 2024-10-11
* make duplicates' helpers supporting combo assays as well

## gDRutils 1.3.15 - 2024-10-07
* refactor the logic for dealing with duplicates in assay data

Expand Down
8 changes: 6 additions & 2 deletions R/identifiers_list.R
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,10 @@ IDENTIFIERS_LIST <- list(
data_source = "data_source",

# replicate
replicate = "Replicate"
replicate = "Replicate",

# normalization type
normalization_type = "normalization_type"
)

REQ_COL_IDENTIFIERS <- c(
Expand Down Expand Up @@ -79,7 +82,8 @@ EXPECT_ONE_IDENTIFIERS <- c(
"drug_name3",
"drug_moa3",
"concentration3",
"data_source"
"data_source",
"normalization_type"
)

SYNONYMS_LIST <- list(
Expand Down
3 changes: 2 additions & 1 deletion inst/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
"cellline_name",
"concentration2",
"duration",
"data_source"
"data_source",
"normalization_type"
]
}
56 changes: 35 additions & 21 deletions tests/testthat/test-duplicates.R
Original file line number Diff line number Diff line change
Expand Up @@ -52,37 +52,51 @@ test_that("get_duplicated_rows works as expected", {
})

test_that("[has|get]_assay_dt_duplicated_rows works as expected", {


# single-agent data
sdata <- get_synthetic_data("finalMAE_small")
smetrics_data <- convert_se_assay_to_dt(sdata[[1]], "Metrics")
smetrics_data_f <- gDRutils::flatten(
smetrics_data,
groups = c("normalization_type", "fit_source"),
wide_cols = gDRutils::get_header("response_metrics")
smetrics_data <- convert_se_assay_to_dt(sdata[[get_supported_experiments("sa")]], "Metrics")
smetrics_dup_data <- rbind(smetrics_data, smetrics_data[1:10, ])
expect_false(has_assay_dt_duplicated_rows(smetrics_data))
expect_true(has_assay_dt_duplicated_rows(smetrics_dup_data))

expect_equal(get_assay_dt_duplicated_rows(smetrics_dup_data),
c(1:10, 201:210))
expect_equal(
rbind(smetrics_data[1:10, ], smetrics_data[1:10, ]),
get_assay_dt_duplicated_rows(smetrics_dup_data, output = "data")
)
expect_true(has_assay_dt_duplicated_rows(smetrics_data))
expect_false(has_assay_dt_duplicated_rows(smetrics_data_f))
expect_equal(get_assay_dt_duplicated_rows(smetrics_data), integer(0))
empty_dt <- get_assay_dt_duplicated_rows(smetrics_data, output = "data")
expect_true(nrow(empty_dt) == 0)
expect_is(empty_dt, "data.table")

expect_equal(get_assay_dt_duplicated_rows(smetrics_data), 1:200)
expect_equal(dim(smetrics_data), dim(get_assay_dt_duplicated_rows(smetrics_data, output = "data")))
expect_equal(get_assay_dt_duplicated_rows(smetrics_data_f), integer(0))
empty_dt <- get_assay_dt_duplicated_rows(smetrics_data_f, output = "data")
# combo data
cdata <- get_synthetic_data("finalMAE_combo_matrix_small")
cscores_data <- convert_se_assay_to_dt(cdata[[get_supported_experiments("combo")]], "scores")
cscores_dup_data <- rbind(cscores_data, cscores_data[1:10, ])
expect_false(has_assay_dt_duplicated_rows(cscores_data))
expect_true(has_assay_dt_duplicated_rows(cscores_dup_data))

expect_equal(get_assay_dt_duplicated_rows(cscores_dup_data),
c(1:10, 25:34))
expect_equal(
rbind(cscores_data[1:10, ], cscores_data[1:10, ]),
get_assay_dt_duplicated_rows(cscores_dup_data, output = "data")
)
expect_equal(get_assay_dt_duplicated_rows(cscores_data), integer(0))
empty_dt <- get_assay_dt_duplicated_rows(cscores_data, output = "data")
expect_true(nrow(empty_dt) == 0)
expect_is(empty_dt, "data.table")
})

test_that("throw_msg_if_duplicates works as expected", {

sdata <- get_synthetic_data("finalMAE_small")
smetrics_data <- convert_se_assay_to_dt(sdata[[1]], "Metrics")
smetrics_data_f <- gDRutils::flatten(
smetrics_data,
groups = c("normalization_type", "fit_source"),
wide_cols = gDRutils::get_header("response_metrics")
)

smetrics_data <- convert_se_assay_to_dt(sdata[[get_supported_experiments("sa")]], "Metrics")
smetrics_dup_data <- rbind(smetrics_data, smetrics_data[1:10, ])

exp_msg <- "rows are duplicated"
expect_error(throw_msg_if_duplicates(smetrics_data, "Metrics"), exp_msg)
expect_warning(throw_msg_if_duplicates(smetrics_data, "Metrics", msg_f = warning), exp_msg)
expect_error(throw_msg_if_duplicates(smetrics_dup_data, "Metrics"), exp_msg)
expect_warning(throw_msg_if_duplicates(smetrics_dup_data, "Metrics", msg_f = warning), exp_msg)
})

0 comments on commit a45d014

Please sign in to comment.