Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

108 create rl map cellphonedb input checks and readins #109

Merged
merged 7 commits into from
Mar 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
92 changes: 19 additions & 73 deletions R/import_fxns.R
Original file line number Diff line number Diff line change
Expand Up @@ -29,90 +29,36 @@ NULL
create_rl_map_cellphonedb <- function(
genes, proteins, interactions, complexes = NULL, database_name = "CellPhoneDB",
gene_conv = NULL, gene_conv_host = "https://www.ensembl.org", alternate_convert = FALSE, alternate_convert_table = NULL) {

# Check input structures:
stopifnot(`genes argument must be file path or dataframe` = (is(genes, "data.frame") | is(
genes,
"character"
)))
stopifnot(`proteins argument must be file path or dataframe` = (is(proteins, "data.frame") | is(
proteins,
"character"
)))
stopifnot(`interactions argument must be file path or dataframe` = (is(interactions, "data.frame") |
is(interactions, "character")))
stopifnot(`complexes argument must be NULL, file path or dataframe` = (is.null(complexes) | is(
complexes,
"data.frame"
) | is(complexes, "character")))
stopifnot(`Database name must be a string` = is(database_name, "character") & length(database_name) ==
1)
stopifnot(`Gene conversion must be NULL or a character vector with 2 items` = (is.null(gene_conv) |
(is(gene_conv, "character") & length(gene_conv) == 2)))
stopifnot(`Gene conversion host must be a string` = is(gene_conv_host, "character") & length(gene_conv_host) ==
1)
stopifnot(`Alternate conversion argument (not recommended) must be TRUE or FALSE` = is(
alternate_convert,
"logical"
))
if (alternate_convert & is.null(alternate_convert_table)) {
stop("If using alternate conversion table (not recommended), a table must be provided")
}
check_arg(genes, c("character", "data.frame"))
Copy link

@drbergman drbergman Mar 13, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it looks like the read_if_char function acts on genes below in a way that requires genes to be a single character. So, should this also include a check as below of allow_len = c(1)?

Similarly for next few lines.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

valid point, addressed in bd01e01

check_arg(proteins, c("character", "data.frame"))
check_arg(interactions, c("character", "data.frame"))
check_arg(complexes, c("character", "data.frame", "NULL"))
check_arg(database_name, c("character"), allow_len = c(1))
check_arg(gene_conv, c("NULL", "character"), allow_len = c(0, 2))
check_arg(gene_conv_host, c("character"), allow_len = c(1))

# Read in files if needed:
if (is(genes, "character")) {
genes <- read.csv(genes, stringsAsFactors = FALSE)
}
if (is(proteins, "character")) {
proteins <- read.csv(proteins, stringsAsFactors = FALSE)
}
if (is(interactions, "character")) {
interactions <- read.csv(interactions, stringsAsFactors = FALSE)
}
if (is(complexes, "character")) {
complexes <- read.csv(complexes, stringsAsFactors = FALSE)
}
genes <- read_if_char(genes)
proteins <- read_if_char(proteins)
interactions <- read_if_char(interactions)
complexes <- read_if_char(complexes)

# replace empty cells in columns annotating gene properties with 'False' There are some
# unannotated genes in database v2.0 that seem to have been fixed in v4.0
gene_features <- c(
"transmembrane", "peripheral", "secreted", "secreted_highlight", "receptor",
"integrin", "other"
)
proteins[proteins$receptor == "", colnames(proteins) %in% gene_features] <- "False"

# change cases of True/False syntax from Python to TRUE/FALSE R syntax
for (x in colnames(genes)) {
if (identical(unique(genes[[x]]), c("True", "False")) | identical(unique(genes[[x]]), c(
"False",
"True"
))) {
genes[[x]] <- ifelse(genes[[x]] == "True", TRUE, FALSE)
}
}
for (x in colnames(proteins)) {
if (identical(unique(proteins[[x]]), c("True", "False")) | identical(
unique(proteins[[x]]),
c("False", "True")
)) {
proteins[[x]] <- ifelse(proteins[[x]] == "True", TRUE, FALSE)
}
}
for (x in colnames(interactions)) {
if (identical(unique(interactions[[x]]), c("True", "False")) | identical(
unique(interactions[[x]]),
c("False", "True")
)) {
interactions[[x]] <- ifelse(interactions[[x]] == "True", TRUE, FALSE)
}
}
if (!is.null(complexes)) {
for (x in colnames(complexes)) {
if (identical(unique(complexes[[x]]), c("True", "False")) | identical(
unique(complexes[[x]]),
c("False", "True")
)) {
complexes[[x]] <- ifelse(complexes[[x]] == "True", TRUE, FALSE)
}
}
}
genes <- conv_py_bools(genes)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any concern that any of these might contain all True's or all False's? It looks like conv_py_bools will only make changes if both are present.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jmitchell81 could there be cases with only True or only False that we need to care about?

proteins <- conv_py_bools(proteins)
interactions <- conv_py_bools(interactions)
complexes <- conv_py_bools(complexes)

# gene conversions
if (!is.null(gene_conv) & !identical(gene_conv[1], gene_conv[2])) {
# obtain conversion dictionary
Expand Down
1 change: 1 addition & 0 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,7 @@ check_arg <- function(arg, allow_class = NULL, allow_len = NULL,
#' @return obj Object itself in case its not a character
read_if_char <- function(obj) {
if (is(obj, "character")) {
check_arg(obj, allow_class = "character", allow_len = 1)
obj <- read.csv(obj, stringsAsFactors = FALSE)
}
return(obj)
Expand Down
15 changes: 8 additions & 7 deletions tests/testthat/test-import_fxns.R
Original file line number Diff line number Diff line change
Expand Up @@ -81,37 +81,38 @@ test_that("create_rl_map_cellphonedb fails on wrong input arg type.", {
expect_error(create_rl_map_cellphonedb(
genes = list(), proteins = proteins_tiny,
interactions = interactions_tiny, complexes = complexes_tiny
))
), "Class of genes must be one of: character,data.frame")

expect_error(create_rl_map_cellphonedb(
genes = genes_tiny, proteins = list(),
interactions = interactions_tiny, complexes = complexes_tiny
))
), "Class of proteins must be one of: character,data.frame")

expect_error(create_rl_map_cellphonedb(
genes = genes_tiny, proteins = proteins_tiny,
interactions = list(), complexes = complexes_tiny
))
), "Class of interactions must be one of: character,data.frame")

expect_error(create_rl_map_cellphonedb(
genes = genes_tiny, proteins = proteins_tiny,
interactions = interactions_tiny, complexes = list()
))
), "Class of complexes must be one of: character,data.frame")

expect_error(create_rl_map_cellphonedb(
genes = genes_tiny, proteins = proteins_tiny,
interactions = interactions_tiny, complexes = complexes_tiny,
database_name = list()
))
), "Class of database_name must be one of: character")

expect_error(create_rl_map_cellphonedb(
genes = genes_tiny, proteins = proteins_tiny,
interactions = interactions_tiny, complexes = complexes_tiny,
database_name = c("length", ">1")
))
), "Length of database_name must be one of: 1")
})

test_that("create_rl_map_cellphonedb fails on wrong input arg type.", {

test_that("create_domino fails on wrong input arg type.", {
#bad rl map
bad_rl_map <- "rl_map"
expect_error(create_domino(bad_rl_map,
Expand Down
1 change: 1 addition & 0 deletions tests/testthat/test-utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ test_that("bool conversion function works",{
test_that("read if char tries to read a file", {
expect_error(read_if_char("./file_that_not_exists.csv",
"cannot open the connection"))
expect_error(read_if_char(c('a', 'b')), "Length of obj must be one of: 1")
})

test_that("mandatory field absence yields error, presence does not", {
Expand Down
Loading