From db401c0fe5c31c459ab618bc9020c69058580f0c Mon Sep 17 00:00:00 2001 From: dimalvovs Date: Wed, 13 Mar 2024 11:37:28 -0400 Subject: [PATCH 1/5] rewrite input checks --- R/import_fxns.R | 35 +++++++------------------------ tests/testthat/test-import_fxns.R | 12 +++++------ 2 files changed, 14 insertions(+), 33 deletions(-) diff --git a/R/import_fxns.R b/R/import_fxns.R index 1a9b751..7a839f4 100644 --- a/R/import_fxns.R +++ b/R/import_fxns.R @@ -29,34 +29,15 @@ NULL create_rl_map_cellphonedb <- function( genes, proteins, interactions, complexes = NULL, database_name = "CellPhoneDB", gene_conv = NULL, gene_conv_host = "https://www.ensembl.org", alternate_convert = FALSE, alternate_convert_table = NULL) { + # Check input structures: - stopifnot(`genes argument must be file path or dataframe` = (is(genes, "data.frame") | is( - genes, - "character" - ))) - stopifnot(`proteins argument must be file path or dataframe` = (is(proteins, "data.frame") | is( - proteins, - "character" - ))) - stopifnot(`interactions argument must be file path or dataframe` = (is(interactions, "data.frame") | - is(interactions, "character"))) - stopifnot(`complexes argument must be NULL, file path or dataframe` = (is.null(complexes) | is( - complexes, - "data.frame" - ) | is(complexes, "character"))) - stopifnot(`Database name must be a string` = is(database_name, "character") & length(database_name) == - 1) - stopifnot(`Gene conversion must be NULL or a character vector with 2 items` = (is.null(gene_conv) | - (is(gene_conv, "character") & length(gene_conv) == 2))) - stopifnot(`Gene conversion host must be a string` = is(gene_conv_host, "character") & length(gene_conv_host) == - 1) - stopifnot(`Alternate conversion argument (not recommended) must be TRUE or FALSE` = is( - alternate_convert, - "logical" - )) - if (alternate_convert & is.null(alternate_convert_table)) { - stop("If using alternate conversion table (not recommended), a table must be provided") - } + check_arg(genes, c("character", "data.frame")) + check_arg(proteins, c("character", "data.frame")) + check_arg(interactions, c("character", "data.frame")) + check_arg(complexes, c("character", "data.frame", "NULL")) + check_arg(database_name, c("character"), allow_len = c(1)) + check_arg(gene_conv, c("NULL", "character"), allow_len = c(0, 2)) + check_arg(gene_conv_host, c("character"), allow_len = c(1)) # Read in files if needed: if (is(genes, "character")) { diff --git a/tests/testthat/test-import_fxns.R b/tests/testthat/test-import_fxns.R index 014715e..058b401 100644 --- a/tests/testthat/test-import_fxns.R +++ b/tests/testthat/test-import_fxns.R @@ -81,34 +81,34 @@ test_that("create_rl_map_cellphonedb fails on wrong input arg type.", { expect_error(create_rl_map_cellphonedb( genes = list(), proteins = proteins_tiny, interactions = interactions_tiny, complexes = complexes_tiny - )) + ), "Class of genes must be one of: character,data.frame") expect_error(create_rl_map_cellphonedb( genes = genes_tiny, proteins = list(), interactions = interactions_tiny, complexes = complexes_tiny - )) + ), "Class of proteins must be one of: character,data.frame") expect_error(create_rl_map_cellphonedb( genes = genes_tiny, proteins = proteins_tiny, interactions = list(), complexes = complexes_tiny - )) + ), "Class of interactions must be one of: character,data.frame") expect_error(create_rl_map_cellphonedb( genes = genes_tiny, proteins = proteins_tiny, interactions = interactions_tiny, complexes = list() - )) + ), "Class of complexes must be one of: character,data.frame") expect_error(create_rl_map_cellphonedb( genes = genes_tiny, proteins = proteins_tiny, interactions = interactions_tiny, complexes = complexes_tiny, database_name = list() - )) + ), "Class of database_name must be one of: character") expect_error(create_rl_map_cellphonedb( genes = genes_tiny, proteins = proteins_tiny, interactions = interactions_tiny, complexes = complexes_tiny, database_name = c("length", ">1") - )) + ), "Length of database_name must be one of: 1") }) From 9c3083ab58373dafc8e5b63c4e42b6b9f054fde4 Mon Sep 17 00:00:00 2001 From: dimalvovs Date: Wed, 13 Mar 2024 11:38:05 -0400 Subject: [PATCH 2/5] fix copypaste error --- tests/testthat/test-import_fxns.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-import_fxns.R b/tests/testthat/test-import_fxns.R index 058b401..e5acbad 100644 --- a/tests/testthat/test-import_fxns.R +++ b/tests/testthat/test-import_fxns.R @@ -112,7 +112,7 @@ test_that("create_rl_map_cellphonedb fails on wrong input arg type.", { }) -test_that("create_rl_map_cellphonedb fails on wrong input arg type.", { +test_that("create_domino fails on wrong input arg type.", { #bad rl map bad_rl_map <- "rl_map" expect_error(create_domino(bad_rl_map, From 4efa5f1651ae7d87f6db324f3b5acbf53d9dc00b Mon Sep 17 00:00:00 2001 From: dimalvovs Date: Wed, 13 Mar 2024 11:40:54 -0400 Subject: [PATCH 3/5] rewrite readins --- R/import_fxns.R | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/R/import_fxns.R b/R/import_fxns.R index 7a839f4..25e85b6 100644 --- a/R/import_fxns.R +++ b/R/import_fxns.R @@ -40,18 +40,11 @@ create_rl_map_cellphonedb <- function( check_arg(gene_conv_host, c("character"), allow_len = c(1)) # Read in files if needed: - if (is(genes, "character")) { - genes <- read.csv(genes, stringsAsFactors = FALSE) - } - if (is(proteins, "character")) { - proteins <- read.csv(proteins, stringsAsFactors = FALSE) - } - if (is(interactions, "character")) { - interactions <- read.csv(interactions, stringsAsFactors = FALSE) - } - if (is(complexes, "character")) { - complexes <- read.csv(complexes, stringsAsFactors = FALSE) - } + genes <- read_if_char(genes) + proteins <- read_if_char(proteins) + interactions <- read_if_char(interactions) + complexes <- read_if_char(complexes) + # replace empty cells in columns annotating gene properties with 'False' There are some # unannotated genes in database v2.0 that seem to have been fixed in v4.0 gene_features <- c( From 5fd637f28aed5e55917b469a48d0136d58e1adc8 Mon Sep 17 00:00:00 2001 From: dimalvovs Date: Wed, 13 Mar 2024 11:43:34 -0400 Subject: [PATCH 4/5] rewrite python/R bool conversion --- R/import_fxns.R | 40 ++++++---------------------------------- 1 file changed, 6 insertions(+), 34 deletions(-) diff --git a/R/import_fxns.R b/R/import_fxns.R index 25e85b6..04ce89c 100644 --- a/R/import_fxns.R +++ b/R/import_fxns.R @@ -52,41 +52,13 @@ create_rl_map_cellphonedb <- function( "integrin", "other" ) proteins[proteins$receptor == "", colnames(proteins) %in% gene_features] <- "False" + # change cases of True/False syntax from Python to TRUE/FALSE R syntax - for (x in colnames(genes)) { - if (identical(unique(genes[[x]]), c("True", "False")) | identical(unique(genes[[x]]), c( - "False", - "True" - ))) { - genes[[x]] <- ifelse(genes[[x]] == "True", TRUE, FALSE) - } - } - for (x in colnames(proteins)) { - if (identical(unique(proteins[[x]]), c("True", "False")) | identical( - unique(proteins[[x]]), - c("False", "True") - )) { - proteins[[x]] <- ifelse(proteins[[x]] == "True", TRUE, FALSE) - } - } - for (x in colnames(interactions)) { - if (identical(unique(interactions[[x]]), c("True", "False")) | identical( - unique(interactions[[x]]), - c("False", "True") - )) { - interactions[[x]] <- ifelse(interactions[[x]] == "True", TRUE, FALSE) - } - } - if (!is.null(complexes)) { - for (x in colnames(complexes)) { - if (identical(unique(complexes[[x]]), c("True", "False")) | identical( - unique(complexes[[x]]), - c("False", "True") - )) { - complexes[[x]] <- ifelse(complexes[[x]] == "True", TRUE, FALSE) - } - } - } + genes <- conv_py_bools(genes) + proteins <- conv_py_bools(proteins) + interactions <- conv_py_bools(interactions) + complexes <- conv_py_bools(complexes) + # gene conversions if (!is.null(gene_conv) & !identical(gene_conv[1], gene_conv[2])) { # obtain conversion dictionary From bd01e018f77d4aa58d6d4a017f7247923d228078 Mon Sep 17 00:00:00 2001 From: dimalvovs Date: Wed, 13 Mar 2024 15:29:09 -0400 Subject: [PATCH 5/5] address case of character vector passed as param --- R/utils.R | 1 + tests/testthat/test-utils.R | 1 + 2 files changed, 2 insertions(+) diff --git a/R/utils.R b/R/utils.R index 6c0d4ce..6edab66 100644 --- a/R/utils.R +++ b/R/utils.R @@ -328,6 +328,7 @@ check_arg <- function(arg, allow_class = NULL, allow_len = NULL, #' @return obj Object itself in case its not a character read_if_char <- function(obj) { if (is(obj, "character")) { + check_arg(obj, allow_class = "character", allow_len = 1) obj <- read.csv(obj, stringsAsFactors = FALSE) } return(obj) diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R index b96dcd1..1eb2e8c 100644 --- a/tests/testthat/test-utils.R +++ b/tests/testthat/test-utils.R @@ -16,6 +16,7 @@ test_that("bool conversion function works",{ test_that("read if char tries to read a file", { expect_error(read_if_char("./file_that_not_exists.csv", "cannot open the connection")) + expect_error(read_if_char(c('a', 'b')), "Length of obj must be one of: 1") }) test_that("mandatory field absence yields error, presence does not", {