From feb1d5492dd7cd14e457c2b79fa34a7d54a6a272 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Tue, 23 Apr 2024 14:40:24 -0400 Subject: [PATCH 01/11] refactor: rename constructor functions per tidy style --- R/deseq2.R | 2 +- R/renee-class.R | 10 +++++----- ...rames.Rd => create_reneeDataSet_from_dataframes.Rd} | 8 ++++---- ...tFromFiles.Rd => create_reneeDataSet_from_files.Rd} | 8 ++++---- tests/testthat/test-deseq2.R | 2 +- tests/testthat/test-renee-class.R | 8 ++++---- 6 files changed, 19 insertions(+), 19 deletions(-) rename man/{reneeDataSetFromDataFrames.Rd => create_reneeDataSet_from_dataframes.Rd} (77%) rename man/{reneeDataSetFromFiles.Rd => create_reneeDataSet_from_files.Rd} (77%) diff --git a/R/deseq2.R b/R/deseq2.R index 4c4654c..c19f3d5 100644 --- a/R/deseq2.R +++ b/R/deseq2.R @@ -8,7 +8,7 @@ #' @export #' #' @examples -#' renee_ds <- reneeDataSetFromFiles( +#' renee_ds <- create_reneeDataSet_from_files( #' system.file("extdata", #' "RSEM.genes.expected_count.all_samples.txt", #' package = "reneeTools" diff --git a/R/renee-class.R b/R/renee-class.R index f908a68..f1db8ae 100644 --- a/R/renee-class.R +++ b/R/renee-class.R @@ -22,14 +22,14 @@ reneeDataSet <- S7::new_class("renee", #' @export #' #' @examples -#' reneeDataSetFromFiles( +#' create_reneeDataSet_from_files( #' system.file("extdata", "RSEM.genes.expected_count.all_samples.txt", package = "reneeTools"), #' system.file("extdata", "sample_metadata.tsv", package = "reneeTools") #' ) -reneeDataSetFromFiles <- function(gene_counts_filepath, sample_meta_filepath) { +create_reneeDataSet_from_files <- function(gene_counts_filepath, sample_meta_filepath) { count_dat <- readr::read_tsv(gene_counts_filepath) sample_meta_dat <- readr::read_tsv(sample_meta_filepath) - return(reneeDataSetFromDataFrames(count_dat, sample_meta_dat)) + return(create_reneeDataSet_from_dataframes(count_dat, sample_meta_dat)) } #' Construct a reneeDataSet object from data frames @@ -50,8 +50,8 @@ reneeDataSetFromFiles <- function(gene_counts_filepath, sample_meta_filepath) { #' levels = c("wildtype", "knockout") #' ) #' ) -#' reneeDataSetFromDataFrames(gene_counts, sample_meta) -reneeDataSetFromDataFrames <- function(gene_counts_dat, sample_meta_dat) { +#' create_reneeDataSet_from_dataframes(gene_counts, sample_meta) +create_reneeDataSet_from_dataframes <- function(gene_counts_dat, sample_meta_dat) { count_mat <- gene_counts_dat %>% counts_dat_to_matrix() sample_meta_dat <- sample_meta_dat %>% meta_tbl_to_dat() diff --git a/man/reneeDataSetFromDataFrames.Rd b/man/create_reneeDataSet_from_dataframes.Rd similarity index 77% rename from man/reneeDataSetFromDataFrames.Rd rename to man/create_reneeDataSet_from_dataframes.Rd index f2f959d..66cc725 100644 --- a/man/reneeDataSetFromDataFrames.Rd +++ b/man/create_reneeDataSet_from_dataframes.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/renee-class.R -\name{reneeDataSetFromDataFrames} -\alias{reneeDataSetFromDataFrames} +\name{create_reneeDataSet_from_dataframes} +\alias{create_reneeDataSet_from_dataframes} \title{Construct a reneeDataSet object from data frames} \usage{ -reneeDataSetFromDataFrames(gene_counts_dat, sample_meta_dat) +create_reneeDataSet_from_dataframes(gene_counts_dat, sample_meta_dat) } \arguments{ \item{gene_counts_dat}{expected gene counts from RSEM as a data frame or tibble. @@ -27,5 +27,5 @@ sample_meta <- data.frame( levels = c("wildtype", "knockout") ) ) -reneeDataSetFromDataFrames(gene_counts, sample_meta) +create_reneeDataSet_from_dataframes(gene_counts, sample_meta) } diff --git a/man/reneeDataSetFromFiles.Rd b/man/create_reneeDataSet_from_files.Rd similarity index 77% rename from man/reneeDataSetFromFiles.Rd rename to man/create_reneeDataSet_from_files.Rd index 5ecffe1..faf93e9 100644 --- a/man/reneeDataSetFromFiles.Rd +++ b/man/create_reneeDataSet_from_files.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/renee-class.R -\name{reneeDataSetFromFiles} -\alias{reneeDataSetFromFiles} +\name{create_reneeDataSet_from_files} +\alias{create_reneeDataSet_from_files} \title{Construct a reneeDataSet object from tsv files.} \usage{ -reneeDataSetFromFiles(gene_counts_filepath, sample_meta_filepath) +create_reneeDataSet_from_files(gene_counts_filepath, sample_meta_filepath) } \arguments{ \item{gene_counts_filepath}{path to tsv file of expected gene counts from RSEM.} @@ -18,7 +18,7 @@ reneeDataSet object Construct a reneeDataSet object from tsv files. } \examples{ -reneeDataSetFromFiles( +create_reneeDataSet_from_files( system.file("extdata", "RSEM.genes.expected_count.all_samples.txt", package = "reneeTools"), system.file("extdata", "sample_metadata.tsv", package = "reneeTools") ) diff --git a/tests/testthat/test-deseq2.R b/tests/testthat/test-deseq2.R index c62e2cb..31ae0e5 100644 --- a/tests/testthat/test-deseq2.R +++ b/tests/testthat/test-deseq2.R @@ -1,6 +1,6 @@ set.seed(20231228) test_that("run_deseq2 works", { - renee_ds <- reneeDataSetFromFiles( + renee_ds <- create_reneeDataSet_from_files( system.file( "extdata", "RSEM.genes.expected_count.all_samples.txt", diff --git a/tests/testthat/test-renee-class.R b/tests/testthat/test-renee-class.R index 9193620..d8f75d6 100644 --- a/tests/testthat/test-renee-class.R +++ b/tests/testthat/test-renee-class.R @@ -1,5 +1,5 @@ -test_that("reneeDataSetFromFiles works", { - rds <- reneeDataSetFromFiles( +test_that("reneeDataSet from files works", { + rds <- create_reneeDataSet_from_files( system.file("extdata", "RSEM.genes.expected_count.all_samples.txt", package = "reneeTools"), system.file("extdata", "sample_metadata.tsv", package = "reneeTools") ) @@ -24,7 +24,7 @@ test_that("reneeDataSetFromFiles works", { ) }) -test_that("reneeDataSetFromDataFrames detect problems", { +test_that("reneeDataSet from data frames detect problems", { sample_meta <- data.frame( sample_id = c("KO_S3", "KO_S4", "WT_S1", "WT_S2"), condition = factor( @@ -33,7 +33,7 @@ test_that("reneeDataSetFromDataFrames detect problems", { ) ) expect_error( - reneeDataSetFromDataFrames(gene_counts[, 1:4], sample_meta), + create_reneeDataSet_from_dataframes(gene_counts[, 1:4], sample_meta), "Not all columns" ) }) From fe20f268c62dfaeecf4b6c9ebc5fac483453540b Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Tue, 23 Apr 2024 15:25:27 -0400 Subject: [PATCH 02/11] fix: ensure renee class is loaded first --- R/{renee-class.R => 0_renee-class.R} | 2 ++ 1 file changed, 2 insertions(+) rename R/{renee-class.R => 0_renee-class.R} (98%) diff --git a/R/renee-class.R b/R/0_renee-class.R similarity index 98% rename from R/renee-class.R rename to R/0_renee-class.R index f1db8ae..dd9723b 100644 --- a/R/renee-class.R +++ b/R/0_renee-class.R @@ -1,3 +1,5 @@ +#' RENEE data set class +#' @export reneeDataSet <- S7::new_class("renee", properties = list( counts = S7::new_S3_class("matrix"), From 72b3da2ac50140f8182ba8030c5d061c45623c03 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Tue, 23 Apr 2024 15:45:20 -0400 Subject: [PATCH 03/11] feat: prefilter counts for deseq --- NAMESPACE | 5 ++- R/0_renee-class.R | 11 +++--- R/counts.R | 2 +- R/deseq2.R | 13 +++++-- R/filter_low_counts.R | 6 +-- man/counts_dat_to_matrix.Rd | 2 +- man/create_reneeDataSet_from_dataframes.Rd | 10 ++--- man/create_reneeDataSet_from_files.Rd | 2 +- man/filter_low_counts.Rd | 4 +- man/reneeDataSet.Rd | 11 ++++++ man/run_deseq2.Rd | 2 +- ...est-renee-class.R => test-0_renee-class.R} | 31 ++++++++++----- tests/testthat/test-deseq2.R | 39 +++++++++---------- tests/testthat/test-filter_low_counts.R | 6 +-- tests/testthat/test-read_raw_counts.R | 3 -- 15 files changed, 85 insertions(+), 62 deletions(-) create mode 100644 man/reneeDataSet.Rd rename tests/testthat/{test-renee-class.R => test-0_renee-class.R} (60%) delete mode 100644 tests/testthat/test-read_raw_counts.R diff --git a/NAMESPACE b/NAMESPACE index fedbab0..c5e0739 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,10 +2,11 @@ export("%>%") export(counts_dat_to_matrix) +export(create_reneeDataSet_from_dataframes) +export(create_reneeDataSet_from_files) export(filter_low_counts) export(meta_tbl_to_dat) -export(reneeDataSetFromDataFrames) -export(reneeDataSetFromFiles) +export(reneeDataSet) export(run_deseq2) if (getRversion() < "4.3.0") importFrom("S7", "@") importFrom(dplyr,"%>%") diff --git a/R/0_renee-class.R b/R/0_renee-class.R index dd9723b..9e5074c 100644 --- a/R/0_renee-class.R +++ b/R/0_renee-class.R @@ -2,7 +2,7 @@ #' @export reneeDataSet <- S7::new_class("renee", properties = list( - counts = S7::new_S3_class("matrix"), + counts = S7::class_data.frame, sample_meta = S7::class_data.frame, analyses = S7::class_list ), @@ -53,14 +53,13 @@ create_reneeDataSet_from_files <- function(gene_counts_filepath, sample_meta_fil #' ) #' ) #' create_reneeDataSet_from_dataframes(gene_counts, sample_meta) -create_reneeDataSet_from_dataframes <- function(gene_counts_dat, sample_meta_dat) { - count_mat <- gene_counts_dat %>% counts_dat_to_matrix() +create_reneeDataSet_from_dataframes <- function(count_dat, sample_meta_dat) { sample_meta_dat <- sample_meta_dat %>% meta_tbl_to_dat() # sample IDs must be in the same order - if (!all(colnames(count_mat) == rownames(sample_meta_dat))) { - stop("Not all columns in the count matrix equal the rows in the sample metadata. Sample IDs must be in the same order.") + if (!all(colnames(count_dat %>% select(-gene_id, -GeneName)) == rownames(sample_meta_dat))) { + stop("Not all columns in the count data equal the rows in the sample metadata. Sample IDs must be in the same order.") } - return(reneeDataSet(count_mat, sample_meta_dat)) + return(reneeDataSet(count_dat, sample_meta_dat)) } diff --git a/R/counts.R b/R/counts.R index 8bb3a9b..a7d3c0f 100644 --- a/R/counts.R +++ b/R/counts.R @@ -7,7 +7,7 @@ #' #' @examples #' counts_dat_to_matrix(head(gene_counts)) -counts_dat_to_matrix <- function(counts_tbl) { +counts_dat_to_matrix <- function(counts_tbl, gene_id_colname = "gene_id") { gene_id <- GeneName <- NULL counts_dat <- counts_tbl %>% # deseq2 requires integer counts diff --git a/R/deseq2.R b/R/deseq2.R index c19f3d5..2600f15 100644 --- a/R/deseq2.R +++ b/R/deseq2.R @@ -18,12 +18,17 @@ #' ) #' ) #' renee_ds <- run_deseq2(renee_ds, ~condition) -run_deseq2 <- function(renee_ds, design, ...) { +run_deseq2 <- S7::new_generic("run_deseq2", "renee_ds", function(renee_ds, design, ...) { + S7::S7_dispatch() +}) + +S7::method(run_deseq2, reneeDataSet) <- function(renee_ds, design, min_count = 10, ...) { dds <- DESeq2::DESeqDataSetFromMatrix( - renee_ds@counts, - renee_ds@sample_meta, - design + countData = renee_ds@counts %>% filter_low_counts(min_count = min_count) %>% counts_dat_to_matrix(), + colData = renee_ds@sample_meta, + design = design ) renee_ds@analyses$deseq2_ds <- DESeq2::DESeq(dds, ...) + renee_ds@analyses$deseq2_results <- DESeq2::results(renee_ds@analyses$deseq2_ds) return(renee_ds) } diff --git a/R/filter_low_counts.R b/R/filter_low_counts.R index 6d32e54..f505cd0 100644 --- a/R/filter_low_counts.R +++ b/R/filter_low_counts.R @@ -1,7 +1,7 @@ #' filter_low_counts #' #' @param counts_dat expected gene counts from RSEM as a data frame or tibble -#' @param min_counts integer number of minimum counts across all samples (default: 0) +#' @param min_count integer number of minimum counts across all samples (default: 0) #' #' @return filtered counts dataframe #' @export @@ -11,7 +11,7 @@ #' filter_low_counts(gene_counts, min_counts = 100) filter_low_counts <- function( counts_dat, - min_counts = 0) { + min_count = 0) { gene_id <- count <- count_sum <- NULL genes_above_threshold <- counts_dat %>% tidyr::pivot_longer(!c("gene_id", "GeneName"), @@ -19,7 +19,7 @@ filter_low_counts <- function( ) %>% dplyr::group_by(gene_id) %>% dplyr::summarize(count_sum = sum(count)) %>% - dplyr::filter(count_sum >= min_counts) %>% + dplyr::filter(count_sum >= min_count) %>% dplyr::pull(gene_id) return( counts_dat %>% diff --git a/man/counts_dat_to_matrix.Rd b/man/counts_dat_to_matrix.Rd index 7f4917a..4ad4922 100644 --- a/man/counts_dat_to_matrix.Rd +++ b/man/counts_dat_to_matrix.Rd @@ -4,7 +4,7 @@ \alias{counts_dat_to_matrix} \title{Convert a data frame of gene counts to a matrix} \usage{ -counts_dat_to_matrix(counts_tbl) +counts_dat_to_matrix(counts_tbl, gene_id_colname = "gene_id") } \arguments{ \item{counts_tbl}{expected gene counts from RSEM as a data frame or tibble.} diff --git a/man/create_reneeDataSet_from_dataframes.Rd b/man/create_reneeDataSet_from_dataframes.Rd index 66cc725..83dabd9 100644 --- a/man/create_reneeDataSet_from_dataframes.Rd +++ b/man/create_reneeDataSet_from_dataframes.Rd @@ -1,17 +1,17 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/renee-class.R +% Please edit documentation in R/0_renee-class.R \name{create_reneeDataSet_from_dataframes} \alias{create_reneeDataSet_from_dataframes} \title{Construct a reneeDataSet object from data frames} \usage{ -create_reneeDataSet_from_dataframes(gene_counts_dat, sample_meta_dat) +create_reneeDataSet_from_dataframes(count_dat, sample_meta_dat) } \arguments{ -\item{gene_counts_dat}{expected gene counts from RSEM as a data frame or tibble. -Must contain a \code{gene_id} column and a column for each sample ID in the metadata.} - \item{sample_meta_dat}{sample metadata as a data frame or tibble. Must contain a \code{sample_ID} column.} + +\item{gene_counts_dat}{expected gene counts from RSEM as a data frame or tibble. +Must contain a \code{gene_id} column and a column for each sample ID in the metadata.} } \value{ reneeDataSet object diff --git a/man/create_reneeDataSet_from_files.Rd b/man/create_reneeDataSet_from_files.Rd index faf93e9..c5d3595 100644 --- a/man/create_reneeDataSet_from_files.Rd +++ b/man/create_reneeDataSet_from_files.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/renee-class.R +% Please edit documentation in R/0_renee-class.R \name{create_reneeDataSet_from_files} \alias{create_reneeDataSet_from_files} \title{Construct a reneeDataSet object from tsv files.} diff --git a/man/filter_low_counts.Rd b/man/filter_low_counts.Rd index d83cdc4..3a9bf79 100644 --- a/man/filter_low_counts.Rd +++ b/man/filter_low_counts.Rd @@ -4,12 +4,12 @@ \alias{filter_low_counts} \title{filter_low_counts} \usage{ -filter_low_counts(counts_dat, min_counts = 0) +filter_low_counts(counts_dat, min_count = 0) } \arguments{ \item{counts_dat}{expected gene counts from RSEM as a data frame or tibble} -\item{min_counts}{integer number of minimum counts across all samples (default: 0)} +\item{min_count}{integer number of minimum counts across all samples (default: 0)} } \value{ filtered counts dataframe diff --git a/man/reneeDataSet.Rd b/man/reneeDataSet.Rd new file mode 100644 index 0000000..a1d7d10 --- /dev/null +++ b/man/reneeDataSet.Rd @@ -0,0 +1,11 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/0_renee-class.R +\name{reneeDataSet} +\alias{reneeDataSet} +\title{RENEE data set class} +\usage{ +reneeDataSet(count_matrix, sample_meta_dat) +} +\description{ +RENEE data set class +} diff --git a/man/run_deseq2.Rd b/man/run_deseq2.Rd index 39732a4..8752bfe 100644 --- a/man/run_deseq2.Rd +++ b/man/run_deseq2.Rd @@ -20,7 +20,7 @@ reneeDataSet object with DESeq2 slot filled Run DESeq2 on a reneeDataSet } \examples{ -renee_ds <- reneeDataSetFromFiles( +renee_ds <- create_reneeDataSet_from_files( system.file("extdata", "RSEM.genes.expected_count.all_samples.txt", package = "reneeTools" diff --git a/tests/testthat/test-renee-class.R b/tests/testthat/test-0_renee-class.R similarity index 60% rename from tests/testthat/test-renee-class.R rename to tests/testthat/test-0_renee-class.R index d8f75d6..6454aa9 100644 --- a/tests/testthat/test-renee-class.R +++ b/tests/testthat/test-0_renee-class.R @@ -5,15 +5,28 @@ test_that("reneeDataSet from files works", { ) expect_equal( rds@counts %>% head(), - structure(c( - 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, - 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L - ), dim = c(6L, 4L), dimnames = list( - c( - "ENSG00000121410.11", "ENSG00000268895.5", "ENSG00000148584.15", - "ENSG00000175899.14", "ENSG00000245105.3", "ENSG00000166535.20" - ), c("KO_S3", "KO_S4", "WT_S1", "WT_S2") - )) + structure( + list( + gene_id = c( + "ENSG00000121410.11", + "ENSG00000268895.5", + "ENSG00000148584.15", + "ENSG00000175899.14", + "ENSG00000245105.3", + "ENSG00000166535.20" + ), + GeneName = c( + "A1BG", "A1BG-AS1", "A1CF", + "A2M", "A2M-AS1", "A2ML1" + ), + KO_S3 = c(0, 0, 0, 0, 0, 0), + KO_S4 = c(0, 0, 0, 0, 0, 0), + WT_S1 = c(0, 0, 0, 0, 0, 0), + WT_S2 = c(0, 0, 0, 0, 0, 0) + ), + row.names = c(NA, -6L), + class = c("tbl_df", "tbl", "data.frame") + ) ) expect_equal( rds@sample_meta, diff --git a/tests/testthat/test-deseq2.R b/tests/testthat/test-deseq2.R index 31ae0e5..c61c4cc 100644 --- a/tests/testthat/test-deseq2.R +++ b/tests/testthat/test-deseq2.R @@ -14,7 +14,8 @@ test_that("run_deseq2 works", { dplyr::mutate(condition = factor(condition, levels = c("wildtype", "knockout") )) - renee_ds <- run_deseq2(renee_ds, design = ~condition, fitType = "local") + renee_ds <- + run_deseq2(renee_ds, design = ~condition, fitType = "local") dds <- renee_ds@analyses$deseq2_ds expect_equal( dds@colData %>% as.data.frame(), @@ -29,10 +30,10 @@ test_that("run_deseq2 works", { class = "factor" ), sizeFactor = c( - 0.759835685651593, - 0.718608223926169, - 1.24466595457696, - 1.68179283050743 + 0.739974960000608, + 0.717118872451827, + 1.34164078649987, + 1.69303431346171 ) ), class = "data.frame", @@ -56,22 +57,18 @@ test_that("run_deseq2 works", { 104L, 0L, 8L ), - mu.1 = c(24.1516682192168, 13.3092987017581, 23.1890203839711), - mu.2 = c(22.8412375617529, 12.5871575689046, 21.9308214491443), - mu.3 = c(75.6181929866826, 0.158242295472865, 7.74702804399805), - mu.4 = c(102.175314069834, 0.213817014139956, 10.4677854923446), - H.1 = c(0.511844332569779, 0.504138999697003, 0.50668916671393), - H.2 = c(0.48815561528062, 0.495860729372594, 0.493310737076545), - H.3 = c(0.454896663393873, 0.499997728127273, 0.447711231007698), - H.4 = c(0.545103297274594, 0.499997728127273, 0.552288438770429), - cooks.1 = c( - 0.00787282712563543, - 0.0262422886768066, - 0.0478403297046226 - ), - cooks.2 = c(0.00740399240405431, 0.025741204681852, 0.0464156166543046), - cooks.3 = c(0.00250160501520332, 0.127533456822227, 0.0779681548830785), - cooks.4 = c(0.00307311736197857, 0.161328557073165, 0.100411826759898) + mu.1 = c(23.8682703018296, 13.1709993621292, 22.8847421174049), + mu.2 = c(23.131035523431, 12.7641781441173, 22.1778862132993), + mu.3 = c(78.660598086526, 0.163744933328453, 8.02129153499216), + mu.4 = c(99.2628526338571, 0.20663190443383, 10.1221742389411), + H.1 = c(0.507689382133648, 0.502052019764842, 0.503779946658291), + H.2 = c(0.492310572696009, 0.497947673454118, 0.496219957488281), + H.3 = c(0.446124908477968, 0.499997692127505, 0.459429993613616), + H.4 = c(0.553875066333655, 0.499997692127505, 0.540569676700414), + cooks.1 = c(0.0157542703049353, 0.0289351707384892, 0.0400561954057841), + cooks.2 = c(0.0151418002975461, 0.0286548510838955, 0.0393806789741918), + cooks.3 = c(0.0212216674791077, 0.130797161405866, 0.058560061396526), + cooks.4 = c(0.026938300726748, 0.156792088506772, 0.0711568961752663) ), class = "data.frame", row.names = c( diff --git a/tests/testthat/test-filter_low_counts.R b/tests/testthat/test-filter_low_counts.R index 441d338..f744c95 100644 --- a/tests/testthat/test-filter_low_counts.R +++ b/tests/testthat/test-filter_low_counts.R @@ -8,7 +8,7 @@ test_that("filter_low_counts works", { ) expect_equal(filter_low_counts(test_dat), test_dat) expect_equal( - filter_low_counts(test_dat, min_counts = 1), + filter_low_counts(test_dat, min_count = 1), data.frame( gene_id = c("B", "C"), GeneName = c("geneB", "geneC"), @@ -16,14 +16,14 @@ test_that("filter_low_counts works", { ) ) expect_equal( - filter_low_counts(test_dat, min_counts = 2), + filter_low_counts(test_dat, min_count = 2), data.frame( gene_id = "C", GeneName = "geneC", s1 = 0, s2 = 0, s3 = 3 ) ) expect_equal( - filter_low_counts(test_dat, min_counts = 5), + filter_low_counts(test_dat, min_count = 5), data.frame( gene_id = character(0), GeneName = character(0), diff --git a/tests/testthat/test-read_raw_counts.R b/tests/testthat/test-read_raw_counts.R deleted file mode 100644 index 4e547c4..0000000 --- a/tests/testthat/test-read_raw_counts.R +++ /dev/null @@ -1,3 +0,0 @@ -test_that("multiplication works", { - expect_equal(2 * 2, 4) # TODO write real tests -}) From 437551f1782ed70c8c17a68f82cfef93b4aca3e3 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Tue, 23 Apr 2024 15:47:29 -0400 Subject: [PATCH 04/11] feat: setup infra for intro vignette --- .gitignore | 1 + DESCRIPTION | 6 +++++- vignettes/.gitignore | 2 ++ vignettes/intro.Rmd | 29 +++++++++++++++++++++++++++++ 4 files changed, 37 insertions(+), 1 deletion(-) create mode 100644 vignettes/.gitignore create mode 100644 vignettes/intro.Rmd diff --git a/.gitignore b/.gitignore index 457525e..8a6f2db 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,4 @@ .DS_Store .quarto docs +inst/doc diff --git a/DESCRIPTION b/DESCRIPTION index f3896e3..0aab490 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -21,13 +21,17 @@ Imports: S7, tidyr Suggests: + knitr, readr, + rmarkdown, styler, testthat (>= 3.0.0), usethis +VignetteBuilder: + knitr biocViews: Config/testthat/edition: 3 Encoding: UTF-8 LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.3.0 +RoxygenNote: 7.3.1 diff --git a/vignettes/.gitignore b/vignettes/.gitignore new file mode 100644 index 0000000..097b241 --- /dev/null +++ b/vignettes/.gitignore @@ -0,0 +1,2 @@ +*.html +*.R diff --git a/vignettes/intro.Rmd b/vignettes/intro.Rmd new file mode 100644 index 0000000..b146553 --- /dev/null +++ b/vignettes/intro.Rmd @@ -0,0 +1,29 @@ +--- +title: "intro" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{intro} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +```{r setup} +library(reneeTools) +library(dplyr) +# replace this line with actual path to your gene counts +gene_counts_tsv <- system.file("extdata", "RSEM.genes.expected_count.all_samples.txt", package = "reneeTools") +metadata_tsv <- system.file("extdata", "sample_metadata.tsv", package = "reneeTools") + +# create reneeDataSet object +renee_ds <- create_reneeDataSet_from_files(gene_counts_tsv, metadata_tsv) %>% + run_deseq2(design = ~condition) + +renee_ds@analyses$deseq2_results %>% head() +``` From 572df28e8db07e900ce219b33bcc9b724e771134 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 23 Apr 2024 19:54:19 +0000 Subject: [PATCH 05/11] chore: update CITATION.cff --- CITATION.cff | 67 ++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 60 insertions(+), 7 deletions(-) diff --git a/CITATION.cff b/CITATION.cff index 6609958..8811f15 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -1,7 +1,7 @@ -# ----------------------------------------------------------- -# CITATION file created with {cffr} R package, v0.5.0 +# -------------------------------------------- +# CITATION file created with {cffr} R package # See also: https://docs.ropensci.org/cffr/ -# ----------------------------------------------------------- +# -------------------------------------------- cff-version: 1.2.0 message: 'To cite package "reneeTools" in publications use:' @@ -33,11 +33,10 @@ references: url: https://www.R-project.org/ authors: - name: R Core Team - location: - name: Vienna, Austria - year: "2024" institution: name: R Foundation for Statistical Computing + address: Vienna, Austria + year: "2024" version: ">= 2.10" - type: software title: DESeq2 @@ -56,7 +55,6 @@ references: - family-names: Huber given-names: Wolfgang year: "2024" - doi: 10.18129/B9.bioc.DESeq2 - type: software title: dplyr abstract: "dplyr: A Grammar of Data Manipulation" @@ -125,6 +123,18 @@ references: - family-names: Girlich given-names: Maximilian year: "2024" + - type: software + title: knitr + abstract: "knitr: A General-Purpose Package for Dynamic Report Generation in R" + notes: Suggests + url: https://yihui.org/knitr/ + repository: https://CRAN.R-project.org/package=knitr + authors: + - family-names: Xie + given-names: Yihui + email: xie@yihui.name + orcid: https://orcid.org/0000-0003-0645-5666 + year: "2024" - type: software title: readr abstract: "readr: Read Rectangular Text Data" @@ -142,6 +152,49 @@ references: email: jenny@posit.co orcid: https://orcid.org/0000-0002-6983-2759 year: "2024" + - type: software + title: rmarkdown + abstract: "rmarkdown: Dynamic Documents for R" + notes: Suggests + url: https://pkgs.rstudio.com/rmarkdown/ + repository: https://CRAN.R-project.org/package=rmarkdown + authors: + - family-names: Allaire + given-names: JJ + email: jj@posit.co + - family-names: Xie + given-names: Yihui + email: xie@yihui.name + orcid: https://orcid.org/0000-0003-0645-5666 + - family-names: Dervieux + given-names: Christophe + email: cderv@posit.co + orcid: https://orcid.org/0000-0003-4474-2498 + - family-names: McPherson + given-names: Jonathan + email: jonathan@posit.co + - family-names: Luraschi + given-names: Javier + - family-names: Ushey + given-names: Kevin + email: kevin@posit.co + - family-names: Atkins + given-names: Aron + email: aron@posit.co + - family-names: Wickham + given-names: Hadley + email: hadley@posit.co + - family-names: Cheng + given-names: Joe + email: joe@posit.co + - family-names: Chang + given-names: Winston + email: winston@posit.co + - family-names: Iannone + given-names: Richard + email: rich@posit.co + orcid: https://orcid.org/0000-0003-3925-190X + year: "2024" - type: software title: styler abstract: "styler: Non-Invasive Pretty Printing of R Code" From ae613df112b2a3ddfb004969e8b44238c2b3f8fa Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Tue, 23 Apr 2024 15:55:30 -0400 Subject: [PATCH 06/11] docs: update news --- NEWS.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/NEWS.md b/NEWS.md index 4b57317..f73a0ea 100644 --- a/NEWS.md +++ b/NEWS.md @@ -6,8 +6,8 @@ This is the first release of reneeTools 🎉 ## Main functions & classes -- `reneeDataSet` (#16) - - `reneeDataSetFromFiles()` (#16) - - `reneeDataSetFromDataFrames()` (#16) - - `run_deseq2()` (#16) -- `filter_low_counts()` (#10) +- `reneeDataSet` + - `create_reneeDataSet_from_files()` + - `create_reneeDataSet_from_dataframes()` + - `run_deseq2()` +- `filter_low_counts()` From b466b50139dc9794613b1b9aa8ef076ba43f912d Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Tue, 23 Apr 2024 15:58:53 -0400 Subject: [PATCH 07/11] fix: R check warnings for global vars --- R/0_renee-class.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/R/0_renee-class.R b/R/0_renee-class.R index 9e5074c..0d69288 100644 --- a/R/0_renee-class.R +++ b/R/0_renee-class.R @@ -54,10 +54,12 @@ create_reneeDataSet_from_files <- function(gene_counts_filepath, sample_meta_fil #' ) #' create_reneeDataSet_from_dataframes(gene_counts, sample_meta) create_reneeDataSet_from_dataframes <- function(count_dat, sample_meta_dat) { + gene_id <- GeneName <- NULL + sample_meta_dat <- sample_meta_dat %>% meta_tbl_to_dat() # sample IDs must be in the same order - if (!all(colnames(count_dat %>% select(-gene_id, -GeneName)) == rownames(sample_meta_dat))) { + if (!all(colnames(count_dat %>% dplyr::select(-gene_id, -GeneName)) == rownames(sample_meta_dat))) { stop("Not all columns in the count data equal the rows in the sample metadata. Sample IDs must be in the same order.") } From b9e9467e74f687534ad1987c30c5c3f9ef831acf Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Tue, 23 Apr 2024 17:52:53 -0400 Subject: [PATCH 08/11] docs: streamline param inheritance --- R/0_renee-class.R | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/R/0_renee-class.R b/R/0_renee-class.R index 0d69288..2d2dedd 100644 --- a/R/0_renee-class.R +++ b/R/0_renee-class.R @@ -1,14 +1,20 @@ -#' RENEE data set class -#' @export +#' reneeDataSet class +#' +#' @param count_dat expected gene counts from RSEM as a data frame or tibble. +#' Must contain a `gene_id` column and a column for each sample ID in the metadata. +#' @param sample_meta_dat sample metadata as a data frame or tibble. +#' Must contain a `sample_ID` column. +#' +#' reneeDataSet <- S7::new_class("renee", properties = list( counts = S7::class_data.frame, sample_meta = S7::class_data.frame, analyses = S7::class_list ), - constructor = function(count_matrix, sample_meta_dat) { + constructor = function(count_dat, sample_meta_dat) { S7::new_object(S7::S7_object(), - counts = count_matrix, + counts = count_dat, sample_meta = sample_meta_dat, analyses = list() ) @@ -36,10 +42,7 @@ create_reneeDataSet_from_files <- function(gene_counts_filepath, sample_meta_fil #' Construct a reneeDataSet object from data frames #' -#' @param gene_counts_dat expected gene counts from RSEM as a data frame or tibble. -#' Must contain a `gene_id` column and a column for each sample ID in the metadata. -#' @param sample_meta_dat sample metadata as a data frame or tibble. -#' Must contain a `sample_ID` column. +#' @inheritParams reneeDataSet #' #' @return reneeDataSet object #' @export From 9d1e633026df0825cc933dc9b556cd544d7b2fe1 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Tue, 23 Apr 2024 17:54:48 -0400 Subject: [PATCH 09/11] fix: silence R CMD Check warning about unused imports DESeq2 is used in an S7 method. It seems R CMD Check did not recognize it in that context? --- R/zzz.R | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/R/zzz.R b/R/zzz.R index 3a6f2a2..d01a195 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -7,3 +7,9 @@ # source: https://rconsortium.github.io/S7/articles/packages.html#backward-compatibility #' @rawNamespace if (getRversion() < "4.3.0") importFrom("S7", "@") NULL + + +# Suppress R CMD check note 'All declared Imports should be used'. +# These packages are used within S7 methods. +#' @importFrom DESeq2 DESeq +NULL From 12132add6708517812ac015f09faeff61e3e5772 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Tue, 23 Apr 2024 17:55:24 -0400 Subject: [PATCH 10/11] docs: update params & man/ --- NAMESPACE | 2 +- R/counts.R | 2 +- R/filter_low_counts.R | 2 +- man/counts_dat_to_matrix.Rd | 2 +- man/create_reneeDataSet_from_dataframes.Rd | 6 +++--- man/filter_low_counts.Rd | 2 +- man/reneeDataSet.Rd | 13 ++++++++++--- 7 files changed, 18 insertions(+), 11 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index c5e0739..c6031cb 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -6,7 +6,7 @@ export(create_reneeDataSet_from_dataframes) export(create_reneeDataSet_from_files) export(filter_low_counts) export(meta_tbl_to_dat) -export(reneeDataSet) export(run_deseq2) if (getRversion() < "4.3.0") importFrom("S7", "@") +importFrom(DESeq2,DESeq) importFrom(dplyr,"%>%") diff --git a/R/counts.R b/R/counts.R index a7d3c0f..8bb3a9b 100644 --- a/R/counts.R +++ b/R/counts.R @@ -7,7 +7,7 @@ #' #' @examples #' counts_dat_to_matrix(head(gene_counts)) -counts_dat_to_matrix <- function(counts_tbl, gene_id_colname = "gene_id") { +counts_dat_to_matrix <- function(counts_tbl) { gene_id <- GeneName <- NULL counts_dat <- counts_tbl %>% # deseq2 requires integer counts diff --git a/R/filter_low_counts.R b/R/filter_low_counts.R index f505cd0..7a90855 100644 --- a/R/filter_low_counts.R +++ b/R/filter_low_counts.R @@ -8,7 +8,7 @@ #' #' @examples #' filter_low_counts(gene_counts) %>% head() -#' filter_low_counts(gene_counts, min_counts = 100) +#' filter_low_counts(gene_counts, min_count = 100) filter_low_counts <- function( counts_dat, min_count = 0) { diff --git a/man/counts_dat_to_matrix.Rd b/man/counts_dat_to_matrix.Rd index 4ad4922..7f4917a 100644 --- a/man/counts_dat_to_matrix.Rd +++ b/man/counts_dat_to_matrix.Rd @@ -4,7 +4,7 @@ \alias{counts_dat_to_matrix} \title{Convert a data frame of gene counts to a matrix} \usage{ -counts_dat_to_matrix(counts_tbl, gene_id_colname = "gene_id") +counts_dat_to_matrix(counts_tbl) } \arguments{ \item{counts_tbl}{expected gene counts from RSEM as a data frame or tibble.} diff --git a/man/create_reneeDataSet_from_dataframes.Rd b/man/create_reneeDataSet_from_dataframes.Rd index 83dabd9..e38d248 100644 --- a/man/create_reneeDataSet_from_dataframes.Rd +++ b/man/create_reneeDataSet_from_dataframes.Rd @@ -7,11 +7,11 @@ create_reneeDataSet_from_dataframes(count_dat, sample_meta_dat) } \arguments{ +\item{count_dat}{expected gene counts from RSEM as a data frame or tibble. +Must contain a \code{gene_id} column and a column for each sample ID in the metadata.} + \item{sample_meta_dat}{sample metadata as a data frame or tibble. Must contain a \code{sample_ID} column.} - -\item{gene_counts_dat}{expected gene counts from RSEM as a data frame or tibble. -Must contain a \code{gene_id} column and a column for each sample ID in the metadata.} } \value{ reneeDataSet object diff --git a/man/filter_low_counts.Rd b/man/filter_low_counts.Rd index 3a9bf79..c4bd1cd 100644 --- a/man/filter_low_counts.Rd +++ b/man/filter_low_counts.Rd @@ -19,5 +19,5 @@ filter_low_counts } \examples{ filter_low_counts(gene_counts) \%>\% head() -filter_low_counts(gene_counts, min_counts = 100) +filter_low_counts(gene_counts, min_count = 100) } diff --git a/man/reneeDataSet.Rd b/man/reneeDataSet.Rd index a1d7d10..f319ab0 100644 --- a/man/reneeDataSet.Rd +++ b/man/reneeDataSet.Rd @@ -2,10 +2,17 @@ % Please edit documentation in R/0_renee-class.R \name{reneeDataSet} \alias{reneeDataSet} -\title{RENEE data set class} +\title{reneeDataSet class} \usage{ -reneeDataSet(count_matrix, sample_meta_dat) +reneeDataSet(count_dat, sample_meta_dat) +} +\arguments{ +\item{count_dat}{expected gene counts from RSEM as a data frame or tibble. +Must contain a \code{gene_id} column and a column for each sample ID in the metadata.} + +\item{sample_meta_dat}{sample metadata as a data frame or tibble. +Must contain a \code{sample_ID} column.} } \description{ -RENEE data set class +reneeDataSet class } From d750c21967fbacdea6dde7bcafb929ede71aac8e Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Tue, 23 Apr 2024 17:55:46 -0400 Subject: [PATCH 11/11] docs: add dev version number to news --- NEWS.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/NEWS.md b/NEWS.md index f73a0ea..5870ad3 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,4 @@ -# reneeTools development version +# reneeTools development version 0.0.0.9000 This is the first release of reneeTools 🎉 @@ -6,8 +6,8 @@ This is the first release of reneeTools 🎉 ## Main functions & classes -- `reneeDataSet` +- `reneeDataSet` (#16, #28, @kelly-sovacool) - `create_reneeDataSet_from_files()` - `create_reneeDataSet_from_dataframes()` - `run_deseq2()` -- `filter_low_counts()` +- `filter_low_counts()` (#10, @kelly-sovacool)