Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Gdr 2516 #149

Merged
merged 14 commits into from
May 17, 2024
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ Type: Package
Package: gDRcore
Title: Processing functions and interface to process and analyze drug
dose-response data
Version: 1.1.20
Date: 2024-05-08
Version: 1.1.21
Date: 2024-05-13
Authors@R: c(
person("Bartosz", "Czech", , "[email protected]", role = "aut",
comment = c(ORCID = "0000-0002-9908-3007")),
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@ export(create_and_normalize_SE)
export(data_model)
export(fit_SE)
export(fit_SE.combinations)
export(get_cellline_annotation_from_dt)
export(get_default_nested_identifiers)
export(get_drug_annotation_from_dt)
export(grr_matches)
export(identify_data_type)
export(identify_keys)
Expand Down
3 changes: 3 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
## gDRcore 1.1.21 - 2024-05-13
* add functions for retrieving annotation data from input dt

## gDRcore 1.1.20 - 2024-05-08
* fix typo

Expand Down
241 changes: 160 additions & 81 deletions R/add_annotation.R
Original file line number Diff line number Diff line change
Expand Up @@ -53,18 +53,23 @@ add_CellLine_annotation <- function(
# Assertions:
checkmate::assert_data_table(dt_metadata)
checkmate::assert_string(fill, null.ok = TRUE)
checkmate::assert_string(externalSource, null.ok = TRUE)
checkmate::assert(checkmate::check_string(externalSource, null.ok = TRUE),
bczech marked this conversation as resolved.
Show resolved Hide resolved
checkmate::check_data_table(externalSource,
col.names = "named"))


cellline <- gDRutils::get_env_identifiers("cellline")
cellline_name <- gDRutils::get_env_identifiers("cellline_name")
add_clid <- gDRutils::get_header("add_clid")

if (all(c(cellline, cellline_name, add_clid) %in% names(dt_metadata))) {
return(dt_metadata)
dt_metadata[, (unlist(add_clid)) := NULL]
}

CLs_info <- if (nchar(externalSource) && file.exists(externalSource)) {
CLs_info <- if (is.character(externalSource) && nchar(externalSource) && file.exists(externalSource)) {
data.table::fread(externalSource)
} else if (data.table::is.data.table(externalSource)) {
externalSource
} else if (annotationPackage == "gDRtestData") {
data.table::fread(
system.file("annotation_data", fname, package = annotationPackage), header = TRUE
Expand Down Expand Up @@ -154,8 +159,9 @@ add_Drug_annotation <- function(
# Assertions:
checkmate::assert_data_table(dt_metadata)
checkmate::assert_string(fill, null.ok = TRUE)
checkmate::assert_string(externalSource, null.ok = TRUE)

checkmate::assert(checkmate::check_string(externalSource, null.ok = TRUE),
checkmate::check_data_table(externalSource,
col.names = "named"))
nrows_df <- nrow(dt_metadata)

drug <- unlist(gDRutils::get_env_identifiers(c(
Expand All @@ -171,86 +177,88 @@ add_Drug_annotation <- function(
if (all(c(drug[["drug"]],
drug_name[["drug_name"]],
drug_moa[["drug_moa"]]) %in% names(dt_metadata))) {
return(dt_metadata)
} else {
drug_full_identifiers <- c(
drug[drug_idx],
drug_name[drug_idx],
drug_moa[drug_idx]
dt_metadata[, (unlist(intersect(c(drug_name, drug_moa), names(dt_metadata)))) := NULL]
}
drug_full_identifiers <- c(
drug[drug_idx],
drug_name[drug_idx],
drug_moa[drug_idx]
)
drug_ids <- stringr::str_extract(names(drug_full_identifiers), "[0-9]")
drug_ids <- ifelse(is.na(drug_ids), 1, drug_ids)
drug_identifiers_list <- split(drug_full_identifiers, drug_ids)
names(drug_identifiers_list) <- drug[drug_idx]

# Read local drugs annotations

Drug_info <- if (is.character(externalSource) && nchar(externalSource) && file.exists(externalSource)) {
data.table::fread(externalSource)
} else if (data.table::is.data.table(externalSource)) {
externalSource
} else if (annotationPackage == "gDRtestData") {
data.table::fread(
system.file("annotation_data", fname, package = annotationPackage), header = TRUE
)
drug_ids <- stringr::str_extract(names(drug_full_identifiers), "[0-9]")
drug_ids <- ifelse(is.na(drug_ids), 1, drug_ids)
drug_identifiers_list <- split(drug_full_identifiers, drug_ids)
names(drug_identifiers_list) <- drug[drug_idx]

# Read local drugs annotations
Drug_info <- if (nchar(externalSource) && file.exists(externalSource)) {
data.table::fread(externalSource)
} else if (annotationPackage == "gDRtestData") {
data.table::fread(
system.file("annotation_data", fname, package = annotationPackage), header = TRUE
)
} else {
eval(parse(text = paste0(annotationPackage,
"::",
"get_drug_annotations")))()
}


Drug_info <- Drug_info[, c("gnumber", "drug_name", "drug_moa"), with = FALSE]
data.table::setnames(Drug_info, c("gnumber", "drug_name", "drug_moa"),
c("drug", "drug_name", "drug_moa"))
drugsTreated <- drugsTreated[!drugsTreated %in% untreated_tag]
validatedDrugs <-
remove_drug_batch(drugsTreated) %in% remove_drug_batch(Drug_info[["drug"]])
#### function should be parallelizeized
missingTblDrugs <- NULL
if (!is.null(fill) && any(!validatedDrugs)) {
missingTblDrugs <- data.table::data.table(
drug = remove_drug_batch(drugsTreated[!validatedDrugs]),
drug_name = drugsTreated[!validatedDrugs],
drug_moa = fill
)
}

if (nrow(Drug_info) == 0) {
drug_name <- intersect(drug_name, colnames(dt_metadata))
drug <- intersect(drug, colnames(dt_metadata))
dt_metadata[, (drug_name) := get(drug)]
return(dt_metadata)
}
} else {
eval(parse(text = paste0(annotationPackage,
"::",
"get_drug_annotations")))()
}

Drug_info <- rbind(Drug_info, missingTblDrugs)

Drug_info$drug <- remove_drug_batch(Drug_info$drug)
Drug_info <-
rbind(data.table::data.table(
drug = untreated_tag,
drug_name = untreated_tag,
drug_moa = untreated_tag
),
Drug_info)
Drug_info <- Drug_info[!duplicated(Drug_info[["drug"]]), ]
if (any(!remove_drug_batch(drugsTreated) %in% Drug_info$drug) &&
!is.null(missingTblDrugs)) {
Drug_info <- rbind(Drug_info, stats::setNames(
missingTblDrugs[!(remove_drug_batch(missingTblDrugs$drug) %in%
Drug_info$drug), ],
names(Drug_info)
))
}
for (drug_idf in names(drug_identifiers_list)) {
colnames(Drug_info) <- drug_identifiers_list[[drug_idf]]
dt_metadata$batch <- dt_metadata[[drug_idf]]
dt_metadata[[drug_idf]] <- remove_drug_batch(dt_metadata[[drug_idf]])
req_col <- c(drug_idf, setdiff(colnames(dt_metadata), colnames(Drug_info)))
dt_metadata <- Drug_info[dt_metadata[, req_col, with = FALSE], on = drug_idf]
dt_metadata[[drug_idf]] <- dt_metadata$batch
dt_metadata$batch <- NULL
}
stopifnot(nrows_df == nrow(dt_metadata))
dt_metadata
Drug_info <- Drug_info[, c("gnumber", "drug_name", "drug_moa"), with = FALSE]
data.table::setnames(Drug_info, c("gnumber", "drug_name", "drug_moa"),
c("drug", "drug_name", "drug_moa"))
drugsTreated <- drugsTreated[!drugsTreated %in% untreated_tag]
validatedDrugs <-
remove_drug_batch(drugsTreated) %in% remove_drug_batch(Drug_info[["drug"]])
#### function should be parallelizeized
missingTblDrugs <- NULL
if (!is.null(fill) && any(!validatedDrugs)) {
missingTblDrugs <- data.table::data.table(
drug = remove_drug_batch(drugsTreated[!validatedDrugs]),
drug_name = drugsTreated[!validatedDrugs],
drug_moa = fill
)
}

if (nrow(Drug_info) == 0) {
drug_name <- intersect(drug_name, colnames(dt_metadata))
drug <- intersect(drug, colnames(dt_metadata))
dt_metadata[, (drug_name) := get(drug)]
return(dt_metadata)
}

Drug_info <- rbind(Drug_info, missingTblDrugs)

Drug_info$drug <- remove_drug_batch(Drug_info$drug)
Drug_info <-
rbind(data.table::data.table(
drug = untreated_tag,
drug_name = untreated_tag,
drug_moa = untreated_tag
),
Drug_info)
Drug_info <- Drug_info[!duplicated(Drug_info[["drug"]]), ]
if (any(!remove_drug_batch(drugsTreated) %in% Drug_info$drug) &&
!is.null(missingTblDrugs)) {
Drug_info <- rbind(Drug_info, stats::setNames(
missingTblDrugs[!(remove_drug_batch(missingTblDrugs$drug) %in%
Drug_info$drug), ],
names(Drug_info)
))
}
for (drug_idf in names(drug_identifiers_list)) {
colnames(Drug_info) <- drug_identifiers_list[[drug_idf]]
dt_metadata$batch <- dt_metadata[[drug_idf]]
dt_metadata[[drug_idf]] <- remove_drug_batch(dt_metadata[[drug_idf]])
req_col <- c(drug_idf, setdiff(colnames(dt_metadata), colnames(Drug_info)))
dt_metadata <- Drug_info[dt_metadata[, req_col, with = FALSE], on = drug_idf]
dt_metadata[[drug_idf]] <- dt_metadata$batch
dt_metadata$batch <- NULL
}
stopifnot(nrows_df == nrow(dt_metadata))
dt_metadata
}

#' Remove batch from Gnumber
Expand All @@ -266,3 +274,74 @@ add_Drug_annotation <- function(
remove_drug_batch <- function(drug) {
gsub("\\.[0-9]+.*", "", drug)
}


#' Retrieve the drug annotation from the annotated dt input
#'
#' @param dt annotated data.table
#'
#' @return data.table with drug annotation
#' @export
#'
#' @examples
#' dt <- data.table::data.table(Gnumber = "A",
#' DrugName = "drugA",
#' drug_moa = "drug_moa_A")
#' get_drug_annotation_from_dt(dt)
get_drug_annotation_from_dt <- function(dt) {
checkmate::assert_data_table(dt)
require(data.table)
drug_cols <- intersect(gDRutils::get_env_identifiers()
[grep("drug", names(gDRutils::get_env_identifiers()))],
names(dt))
dt_drug <- dt[, unlist(drug_cols), with = FALSE]
dt_long <- data.table::melt(dt_drug,
measure.vars = patterns(paste0("^",
unlist(drug_cols[c("drug",
"drug_name",
"drug_moa")]))),
value.name = unlist(drug_cols[c("drug",
"drug_name",
"drug_moa")]))
dt_long[, "variable" := NULL]
data.table::setnames(dt_long,
unlist(drug_cols[c("drug",
"drug_name",
"drug_moa")]),
c("gnumber", "drug_name", "drug_moa"))
unique_dt <- unique(dt_long)
unique_dt[!unique_dt$gnumber %in% gDRutils::get_env_identifiers("untreated_tag"), ]
}


#' Retrieve the cell line annotation from the annotated dt input
#'
#' @param dt annotated data.table
#'
#' @return data.table with cell line annotation
#' @export
#'
#' @examples
#' dt <- data.table::data.table(Gnumber = "A",
#' clid = "CL123",
#' CellLineName = "cl name",
#' Tissue = "Bone",
#' parental_identifier = "some cl",
#' subtype = "cortical",
#' ReferenceDivisionTime = 5)
#' get_cellline_annotation_from_dt(dt)
get_cellline_annotation_from_dt <- function(dt) {
checkmate::assert_data_table(dt)
cell_cols <- c(gDRutils::get_env_identifiers("cellline"),
gDRutils::get_header("add_clid"))
cell_dt <- dt[, unlist(cell_cols), with = FALSE]
data.table::setnames(cell_dt,
unlist(cell_cols),
c("cell_line_identifier",
"cell_line_name",
"primary_tissue",
"parental_identifier",
"subtype",
"doubling_time"))
unique(cell_dt)
}
27 changes: 27 additions & 0 deletions man/get_cellline_annotation_from_dt.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

23 changes: 23 additions & 0 deletions man/get_drug_annotation_from_dt.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

30 changes: 18 additions & 12 deletions tests/testthat/test-add_annotation.R
Original file line number Diff line number Diff line change
Expand Up @@ -115,22 +115,28 @@ test_that("remove_drug_batch works", {
expect_equal(gnumber_without_batch, "DRUG")
})

test_that("add_CellLine_annotation works with custom annotations", {
dt_unknown <- data.table::data.table(ReadoutValue = runif(5),

test_that("get_drug_annotation_from_dt works as expected", {
dt_example <- data.table::data.table(Gnumber = "drug_id",
DrugName = "DrugName",
drug_moa = "drug_moa",
some_col = "value")
annotation <- get_drug_annotation_from_dt(dt_example)
testthat::expect_true(data.table::is.data.table(annotation))
testthat::expect_equal(dim(annotation), c(1, 3))
})

test_that("get_cellline_annotation_from_dt worksas expected", {
dt_example <- data.table::data.table(ReadoutValue = runif(5),
clid = paste0("CL", 1:5),
CellLineName = paste0("RandomName", 1:5),
Tissue = paste0("Tissue", 1:5),
ReferenceDivisionTime = 1:5,
parental_identifier = 1:5,
subtype = "subtype")
dt_unknown_annotated <- add_CellLine_annotation(dt_unknown)
expect_identical(dt_unknown, dt_unknown_annotated)
subtype = "subtype",
some_col = "value")
annotation <- get_cellline_annotation_from_dt(dt_example)
testthat::expect_true(data.table::is.data.table(annotation))
testthat::expect_equal(dim(annotation), c(5, 6))
})

test_that("add_Drug_annotation works with custom annotations", {
dt_unknown <- data.table::data.table(Gnumber = "drug_id",
DrugName = "DrugName",
drug_moa = "drug_moa")
dt_unknown_annotated <- add_Drug_annotation(dt_unknown)
expect_identical(dt_unknown, dt_unknown_annotated)
})
Loading