From 22f551a59dce7b8ff9af17fec7b5a87b498cd8bd Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Thu, 28 Dec 2023 19:38:59 -0500 Subject: [PATCH 1/9] feat: create reneeDataSet class --- .pre-commit-config.yaml | 2 +- DESCRIPTION | 1 + NAMESPACE | 6 ++- R/counts.R | 25 +++++++++++ R/deseq2.R | 42 +++++++++++++------ R/metadata.R | 22 ++++++++++ R/renee-class.R | 34 +++++++++++++++ R/zzz.R | 9 ++++ data-raw/gene_counts.R | 2 +- .../RSEM.genes.expected_count.all_samples.txt | 0 inst/extdata/sample_metadata.tsv | 5 +++ man/counts_dat_to_matrix.Rd | 20 +++++++++ man/meta_tbl_to_dat.Rd | 25 +++++++++++ man/read_raw_counts.Rd | 19 --------- man/reneeDataSetFromFiles.Rd | 25 +++++++++++ man/run_deseq2.Rd | 29 +++++++++++++ 16 files changed, 232 insertions(+), 34 deletions(-) create mode 100644 R/counts.R create mode 100644 R/metadata.R create mode 100644 R/renee-class.R create mode 100644 R/zzz.R rename {data-raw => inst/extdata}/RSEM.genes.expected_count.all_samples.txt (100%) create mode 100644 inst/extdata/sample_metadata.tsv create mode 100644 man/counts_dat_to_matrix.Rd create mode 100644 man/meta_tbl_to_dat.Rd delete mode 100644 man/read_raw_counts.Rd create mode 100644 man/reneeDataSetFromFiles.Rd create mode 100644 man/run_deseq2.Rd diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 9693bf4..c2a3d98 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -4,7 +4,7 @@ exclude: | (?x)( ^assets/| ^docs/.*.html| - ^data-raw/*.txt| + ^inst/extdata| ^man/ ) repos: diff --git a/DESCRIPTION b/DESCRIPTION index 40c34ff..29853e6 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -19,6 +19,7 @@ Imports: assertthat, DESeq2, dplyr, + S7, tidyr Suggests: readr, diff --git a/NAMESPACE b/NAMESPACE index 024a199..fd4a4c4 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,7 +1,11 @@ # Generated by roxygen2: do not edit by hand export("%>%") +export(counts_dat_to_matrix) export(create_deseq_obj) export(filter_low_counts) -export(read_raw_counts) +export(meta_tbl_to_dat) +export(reneeDataSetFromFiles) +export(run_deseq2) +if (getRversion() < "4.3.0") importFrom("S7", "@") importFrom(dplyr,"%>%") diff --git a/R/counts.R b/R/counts.R new file mode 100644 index 0000000..8bb3a9b --- /dev/null +++ b/R/counts.R @@ -0,0 +1,25 @@ +#' Convert a data frame of gene counts to a matrix +#' +#' @param counts_tbl expected gene counts from RSEM as a data frame or tibble. +#' +#' @return matrix of gene counts with rows as gene IDs +#' @export +#' +#' @examples +#' counts_dat_to_matrix(head(gene_counts)) +counts_dat_to_matrix <- function(counts_tbl) { + gene_id <- GeneName <- NULL + counts_dat <- counts_tbl %>% + # deseq2 requires integer counts + dplyr::mutate(dplyr::across( + dplyr::where(is.numeric), + \(x) as.integer(round(x, 0)) + )) %>% + as.data.frame() + row.names(counts_dat) <- counts_dat %>% dplyr::pull("gene_id") + # convert counts tibble to matrix + counts_mat <- counts_dat %>% + dplyr::select(-c(gene_id, GeneName)) %>% + as.matrix() + return(counts_mat) +} diff --git a/R/deseq2.R b/R/deseq2.R index c6459e8..ccf84bf 100644 --- a/R/deseq2.R +++ b/R/deseq2.R @@ -16,22 +16,40 @@ #' ) #' dds <- create_deseq_obj(gene_counts, sample_meta, ~condition) create_deseq_obj <- function(counts_tbl, meta_dat, design) { - gene_id <- GeneName <- NULL - counts_dat <- counts_tbl %>% - # deseq2 requires integer counts - dplyr::mutate(dplyr::across( - dplyr::where(is.numeric), - \(x) as.integer(round(x, 0)) - )) %>% - as.data.frame() - row.names(counts_dat) <- counts_dat %>% dplyr::pull("gene_id") # convert counts tibble to matrix - counts_mat <- counts_dat %>% - dplyr::select(-c(gene_id, GeneName)) %>% - as.matrix() + counts_mat <- counts_dat_to_matrix(counts_tbl) # sample IDs must be in the same order assertthat::are_equal(colnames(counts_mat), rownames(meta_dat)) return(DESeq2::DESeqDataSetFromMatrix(counts_mat, meta_dat, design)) } + +#' Run DESeq2 on a reneeDataSet +#' +#' @param renee_ds reneeDataSet object +#' @param design model formula for experimental design. Columns must exist in `meta_dat`. +#' +#' @return reneeDataSet object with DESeq2 slot filled +#' @export +#' +#' @examples +#' rds <- reneeDataSetFromFiles( +#' system.file("extdata", +#' "RSEM.genes.expected_count.all_samples.txt", +#' package = "reneeTools" +#' ), +#' system.file("extdata", "sample_metadata.tsv", +#' package = "reneeTools" +#' ) +#' ) +#' run_deseq2(rds, ~condition) +run_deseq2 <- function(renee_ds, design) { + dds <- DESeq2::DESeqDataSetFromMatrix( + renee_ds$counts, + renee_ds$sample_meta, + design + ) + renee_ds$deseq2 <- DESeq2::DESeq(dds) + return(renee_ds) +} diff --git a/R/metadata.R b/R/metadata.R new file mode 100644 index 0000000..730edad --- /dev/null +++ b/R/metadata.R @@ -0,0 +1,22 @@ +#' Convert sample metadata from a tibble to a dataframe with sample IDs as row names +#' +#' @param meta_tbl tibble with `sample_id` column +#' +#' @return dataframe where row names are the sample IDs +#' @export +#' +#' @examples +#' sample_meta_tbl <- readr::read_tsv(system.file("extdata", +#' "sample_metadata.tsv", +#' package = "reneeTools" +#' )) +#' head(sample_meta_tbl) +#' meta_tbl_to_dat(sample_meta_tbl) +meta_tbl_to_dat <- function(meta_tbl) { + sample_id <- NULL + meta_dat <- meta_tbl %>% + as.data.frame() %>% + dplyr::select(-sample_id) + rownames(meta_dat) <- meta_tbl %>% dplyr::pull(sample_id) + return(meta_dat) +} diff --git a/R/renee-class.R b/R/renee-class.R new file mode 100644 index 0000000..aab1ac0 --- /dev/null +++ b/R/renee-class.R @@ -0,0 +1,34 @@ +reneeDataSet <- S7::new_class("renee", + parent = S7::class_list, + constructor = function(count_matrix, sample_meta_dat) { + S7::new_object(list( + counts = count_matrix, + sample_meta = sample_meta_dat + )) + } +) + +#' Create a reneeDataSet object from TSV files +#' +#' @param gene_counts_filepath path to TSV file of expected gene counts from RSEM +#' @param sample_meta_filepath path to TSV file with sample IDs and metadata for differential analysis +#' +#' @return reneeDataSet object +#' @export +#' +#' @examples +#' reneeDataSetFromFiles( +#' system.file("extdata", "RSEM.genes.expected_count.all_samples.txt", package = "reneeTools"), +#' system.file("extdata", "sample_metadata.tsv", package = "reneeTools") +#' ) +reneeDataSetFromFiles <- function(gene_counts_filepath, sample_meta_filepath) { + count_mat <- readr::read_tsv(gene_counts_filepath) %>% + counts_dat_to_matrix() + sample_meta_dat <- readr::read_tsv(sample_meta_filepath) %>% + meta_tbl_to_dat() + + # sample IDs must be in the same order + assertthat::are_equal(colnames(count_mat), rownames(sample_meta_dat)) + + return(reneeDataSet(count_mat, sample_meta_dat)) +} diff --git a/R/zzz.R b/R/zzz.R new file mode 100644 index 0000000..3a6f2a2 --- /dev/null +++ b/R/zzz.R @@ -0,0 +1,9 @@ +# source: https://rconsortium.github.io/S7/articles/packages.html#method-registration +.onLoad <- function(...) { + S7::methods_register() +} + +# enable usage of @name in package code +# source: https://rconsortium.github.io/S7/articles/packages.html#backward-compatibility +#' @rawNamespace if (getRversion() < "4.3.0") importFrom("S7", "@") +NULL diff --git a/data-raw/gene_counts.R b/data-raw/gene_counts.R index 6b68dc8..36d2942 100644 --- a/data-raw/gene_counts.R +++ b/data-raw/gene_counts.R @@ -1,3 +1,3 @@ # WT_S1.RSEM.genes.results was generated from running RENEE v2.5.3 on the test dataset https://github.com/CCBR/RENEE/tree/e08f7db6c6e638cfd330caa182f64665d2ef37fa/.tests -gene_counts <- readr::read_tsv("data-raw/RSEM.genes.expected_count.all_samples.txt") +gene_counts <- readr::read_tsv(system.file("inst", "extdata", "RSEM.genes.expected_count.all_samples.txt", package = "reneeTools")) usethis::use_data(gene_counts, overwrite = TRUE) diff --git a/data-raw/RSEM.genes.expected_count.all_samples.txt b/inst/extdata/RSEM.genes.expected_count.all_samples.txt similarity index 100% rename from data-raw/RSEM.genes.expected_count.all_samples.txt rename to inst/extdata/RSEM.genes.expected_count.all_samples.txt diff --git a/inst/extdata/sample_metadata.tsv b/inst/extdata/sample_metadata.tsv new file mode 100644 index 0000000..48f3de4 --- /dev/null +++ b/inst/extdata/sample_metadata.tsv @@ -0,0 +1,5 @@ +sample_id condition +KO_S3 knockout +KO_S4 knockout +WT_S1 wildtype +WT_S2 wildtype diff --git a/man/counts_dat_to_matrix.Rd b/man/counts_dat_to_matrix.Rd new file mode 100644 index 0000000..7f4917a --- /dev/null +++ b/man/counts_dat_to_matrix.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/counts.R +\name{counts_dat_to_matrix} +\alias{counts_dat_to_matrix} +\title{Convert a data frame of gene counts to a matrix} +\usage{ +counts_dat_to_matrix(counts_tbl) +} +\arguments{ +\item{counts_tbl}{expected gene counts from RSEM as a data frame or tibble.} +} +\value{ +matrix of gene counts with rows as gene IDs +} +\description{ +Convert a data frame of gene counts to a matrix +} +\examples{ +counts_dat_to_matrix(head(gene_counts)) +} diff --git a/man/meta_tbl_to_dat.Rd b/man/meta_tbl_to_dat.Rd new file mode 100644 index 0000000..2d043f5 --- /dev/null +++ b/man/meta_tbl_to_dat.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/metadata.R +\name{meta_tbl_to_dat} +\alias{meta_tbl_to_dat} +\title{Convert sample metadata from a tibble to a dataframe with sample IDs as row names} +\usage{ +meta_tbl_to_dat(meta_tbl) +} +\arguments{ +\item{meta_tbl}{tibble with \code{sample_id} column} +} +\value{ +dataframe where row names are the sample IDs +} +\description{ +Convert sample metadata from a tibble to a dataframe with sample IDs as row names +} +\examples{ +sample_meta_tbl <- readr::read_tsv(system.file("extdata", + "sample_metadata.tsv", + package = "reneeTools") +) +head(sample_meta_tbl) +meta_tbl_to_dat(sample_meta_tbl) +} diff --git a/man/read_raw_counts.Rd b/man/read_raw_counts.Rd deleted file mode 100644 index fb9fb09..0000000 --- a/man/read_raw_counts.Rd +++ /dev/null @@ -1,19 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/read_raw_counts.R -\name{read_raw_counts} -\alias{read_raw_counts} -\title{read_raw_counts} -\usage{ -read_raw_counts(pathtofile, delimiter = "\\t") -} -\arguments{ -\item{pathtofile}{string absolute file path to raw counts matrix} - -\item{delimiter}{string default is tab} -} -\value{ -raw_counts_matrix -} -\description{ -read_raw_counts -} diff --git a/man/reneeDataSetFromFiles.Rd b/man/reneeDataSetFromFiles.Rd new file mode 100644 index 0000000..fe98dcd --- /dev/null +++ b/man/reneeDataSetFromFiles.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/renee-class.R +\name{reneeDataSetFromFiles} +\alias{reneeDataSetFromFiles} +\title{Create a reneeDataSet object from TSV files} +\usage{ +reneeDataSetFromFiles(gene_counts_filepath, sample_meta_filepath) +} +\arguments{ +\item{gene_counts_filepath}{path to TSV file of expected gene counts from RSEM} + +\item{sample_meta_filepath}{path to TSV file with sample IDs and metadata for differential analysis} +} +\value{ +reneeDataSet object +} +\description{ +Create a reneeDataSet object from TSV files +} +\examples{ +reneeDataSetFromFiles( + system.file("extdata", "RSEM.genes.expected_count.all_samples.txt", package = "reneeTools"), + system.file("extdata", "sample_metadata.tsv", package = "reneeTools") +) +} diff --git a/man/run_deseq2.Rd b/man/run_deseq2.Rd new file mode 100644 index 0000000..a02842b --- /dev/null +++ b/man/run_deseq2.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/deseq2.R +\name{run_deseq2} +\alias{run_deseq2} +\title{Run DESeq2 on a reneeDataSet} +\usage{ +run_deseq2(renee_ds, design) +} +\arguments{ +\item{renee_ds}{reneeDataSet object} + +\item{design}{model formula for experimental design. Columns must exist in \code{meta_dat}.} +} +\value{ +reneeDataSet object with DESeq2 slot filled +} +\description{ +Run DESeq2 on a reneeDataSet +} +\examples{ +rds <- reneeDataSetFromFiles( + system.file("extdata", + "RSEM.genes.expected_count.all_samples.txt", + package = "reneeTools"), + system.file("extdata", "sample_metadata.tsv", + package = "reneeTools") +) +run_deseq2(rds, ~condition) +} From e7cd15c35709e9a6bba120e48ba9116bd7fd8ff7 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Thu, 28 Dec 2023 21:38:59 -0500 Subject: [PATCH 2/9] refactor: counts & sample meta as S7 properties all analysis objects in a list for extensibility --- R/renee-class.R | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/R/renee-class.R b/R/renee-class.R index aab1ac0..4296256 100644 --- a/R/renee-class.R +++ b/R/renee-class.R @@ -1,17 +1,22 @@ reneeDataSet <- S7::new_class("renee", - parent = S7::class_list, + properties = list( + counts = S7::new_S3_class("matrix"), + sample_meta = S7::class_data.frame, + analyses = S7::class_list + ), constructor = function(count_matrix, sample_meta_dat) { - S7::new_object(list( + S7::new_object(S7::S7_object(), counts = count_matrix, - sample_meta = sample_meta_dat - )) + sample_meta = sample_meta_dat, + analyses = list() + ) } ) -#' Create a reneeDataSet object from TSV files +#' Construct a reneeDataSet object from tsv files. #' -#' @param gene_counts_filepath path to TSV file of expected gene counts from RSEM -#' @param sample_meta_filepath path to TSV file with sample IDs and metadata for differential analysis +#' @param gene_counts_filepath path to tsv file of expected gene counts from RSEM. +#' @param sample_meta_filepath path to tsv file with sample IDs and metadata for differential analysis. #' #' @return reneeDataSet object #' @export From 011f0078c53da32dfc16bdc8fd2361c85f489625 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Thu, 28 Dec 2023 21:39:40 -0500 Subject: [PATCH 3/9] refactor: run_deseq function on reneeDS object --- R/deseq2.R | 36 +++--------------- tests/testthat/test-deseq2.R | 74 +++++++++++++++++++++++++++--------- 2 files changed, 60 insertions(+), 50 deletions(-) diff --git a/R/deseq2.R b/R/deseq2.R index ccf84bf..5a97575 100644 --- a/R/deseq2.R +++ b/R/deseq2.R @@ -1,34 +1,8 @@ -#' Create DESeq2 object from gene counts and sample metadata -#' -#' @param counts_tbl expected gene counts from RSEM as a data frame or tibble. -#' @param meta_dat sample metadata as a data frame with rownames as sample IDs. -#' @param design model formula for experimental design. Columns must exist in `meta_dat`. -#' -#' @return DESeqDataSet -#' @export -#' -#' @examples -#' sample_meta <- data.frame( -#' row.names = c("KO_S3", "KO_S4", "WT_S1", "WT_S2"), -#' condition = factor(c("knockout", "knockout", "wildtype", "wildtype"), -#' levels = c("wildtype", "knockout") -#' ) -#' ) -#' dds <- create_deseq_obj(gene_counts, sample_meta, ~condition) -create_deseq_obj <- function(counts_tbl, meta_dat, design) { - # convert counts tibble to matrix - counts_mat <- counts_dat_to_matrix(counts_tbl) - - # sample IDs must be in the same order - assertthat::are_equal(colnames(counts_mat), rownames(meta_dat)) - - return(DESeq2::DESeqDataSetFromMatrix(counts_mat, meta_dat, design)) -} - #' Run DESeq2 on a reneeDataSet #' #' @param renee_ds reneeDataSet object #' @param design model formula for experimental design. Columns must exist in `meta_dat`. +#' @param ... remaining variables are forwarded to `DESeq2::DESeq()`. #' #' @return reneeDataSet object with DESeq2 slot filled #' @export @@ -44,12 +18,12 @@ create_deseq_obj <- function(counts_tbl, meta_dat, design) { #' ) #' ) #' run_deseq2(rds, ~condition) -run_deseq2 <- function(renee_ds, design) { +run_deseq2 <- function(renee_ds, design, ...) { dds <- DESeq2::DESeqDataSetFromMatrix( - renee_ds$counts, - renee_ds$sample_meta, + renee_ds@counts, + renee_ds@sample_meta, design ) - renee_ds$deseq2 <- DESeq2::DESeq(dds) + renee_ds@analyses$deseq2_ds <- DESeq2::DESeq(dds, ...) return(renee_ds) } diff --git a/tests/testthat/test-deseq2.R b/tests/testthat/test-deseq2.R index 321bb01..284a096 100644 --- a/tests/testthat/test-deseq2.R +++ b/tests/testthat/test-deseq2.R @@ -1,26 +1,40 @@ set.seed(20231228) -test_that("create_deseq_obj works", { - sample_meta <- - data.frame( - row.names = c("KO_S3", "KO_S4", "WT_S1", "WT_S2"), - condition = factor( - c("knockout", "knockout", "wildtype", "wildtype"), - levels = c("wildtype", "knockout") - ) +test_that("run_deseq2 works", { + rds <- reneeDataSetFromFiles( + system.file( + "extdata", + "RSEM.genes.expected_count.all_samples.txt", + package = "reneeTools" + ), + system.file("extdata", "sample_metadata.tsv", + package = "reneeTools" ) - dds <- create_deseq_obj(gene_counts, sample_meta, ~condition) - + ) + rds@sample_meta <- rds@sample_meta %>% + dplyr::mutate(condition = factor(condition, + levels = c("wildtype", "knockout") + )) + rds <- run_deseq2(rds, design = ~condition, fitType = "local") + dds <- rds@analyses$deseq2_ds expect_equal( dds@colData %>% as.data.frame(), structure( - list(condition = structure( - c(2L, 2L, 1L, 1L), - levels = c( - "wildtype", - "knockout" + list( + condition = structure( + c(2L, 2L, 1L, 1L), + levels = c( + "wildtype", + "knockout" + ), + class = "factor" ), - class = "factor" - )), + sizeFactor = c( + 0.759835685651593, + 0.718608223926169, + 1.24466595457696, + 1.68179283050743 + ) + ), class = "data.frame", row.names = c( "KO_S3", @@ -33,9 +47,31 @@ test_that("create_deseq_obj works", { structure( list( counts.KO_S3 = c(25L, 16L, 19L), - counts.KO_S4 = c(22L, 10L, 26L), + counts.KO_S4 = c( + 22L, + 10L, 26L + ), counts.WT_S1 = c(74L, 0L, 10L), - counts.WT_S2 = c(104L, 0L, 8L) + counts.WT_S2 = c( + 104L, + 0L, 8L + ), + mu.1 = c(24.1516682192168, 13.3092987017581, 23.1890203839711), + mu.2 = c(22.8412375617529, 12.5871575689046, 21.9308214491443), + mu.3 = c(75.6181929866826, 0.158242295472865, 7.74702804399805), + mu.4 = c(102.175314069834, 0.213817014139956, 10.4677854923446), + H.1 = c(0.511844332569779, 0.504138999697003, 0.50668916671393), + H.2 = c(0.48815561528062, 0.495860729372594, 0.493310737076545), + H.3 = c(0.454896663393873, 0.499997728127273, 0.447711231007698), + H.4 = c(0.545103297274594, 0.499997728127273, 0.552288438770429), + cooks.1 = c( + 0.00787282712563543, + 0.0262422886768066, + 0.0478403297046226 + ), + cooks.2 = c(0.00740399240405431, 0.025741204681852, 0.0464156166543046), + cooks.3 = c(0.00250160501520332, 0.127533456822227, 0.0779681548830785), + cooks.4 = c(0.00307311736197857, 0.161328557073165, 0.100411826759898) ), class = "data.frame", row.names = c( From 36effdea47d946a091ce59702acc919b1f3010aa Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Thu, 28 Dec 2023 21:56:49 -0500 Subject: [PATCH 4/9] refactor: don't use assertthat it hasn't been updated in 4 years :'( --- DESCRIPTION | 1 - 1 file changed, 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 29853e6..97a201c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -16,7 +16,6 @@ BugReports: https://github.com/CCBR/reneeTools/issues Depends: R (>= 2.10) Imports: - assertthat, DESeq2, dplyr, S7, From 70a06a6d614d4e05225fbcd2198dab3a8c5b969b Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Thu, 28 Dec 2023 21:59:45 -0500 Subject: [PATCH 5/9] feat: reneeDataSetFromDataFrames() --- R/renee-class.R | 35 ++++++++++++++++++++++++++++++----- 1 file changed, 30 insertions(+), 5 deletions(-) diff --git a/R/renee-class.R b/R/renee-class.R index 4296256..1d21eef 100644 --- a/R/renee-class.R +++ b/R/renee-class.R @@ -27,13 +27,38 @@ reneeDataSet <- S7::new_class("renee", #' system.file("extdata", "sample_metadata.tsv", package = "reneeTools") #' ) reneeDataSetFromFiles <- function(gene_counts_filepath, sample_meta_filepath) { - count_mat <- readr::read_tsv(gene_counts_filepath) %>% - counts_dat_to_matrix() - sample_meta_dat <- readr::read_tsv(sample_meta_filepath) %>% - meta_tbl_to_dat() + count_dat <- readr::read_tsv(gene_counts_filepath) + sample_meta_dat <- readr::read_tsv(sample_meta_filepath) + return(reneeDataSetFromDataFrames) +} + +#' Construct a reneeDataSet object from data frames +#' +#' @param gene_counts_dat expected gene counts from RSEM as a data frame or tibble. +#' Must contain a `gene_id` column and a column for each sample ID in the metadata. +#' @param sample_meta_dat sample metadata as a data frame or tibble. +#' Must contain a `sample_ID` column. +#' +#' @return reneeDataSet object +#' @export +#' +#' @examples +#' sample_meta <- data.frame( +#' sample_id = c("KO_S3", "KO_S4", "WT_S1", "WT_S2"), +#' condition = factor( +#' c("knockout", "knockout", "wildtype", "wildtype"), +#' levels = c("wildtype", "knockout") +#' ) +#' ) +#' reneeDataSetFromDataFrames(gene_counts, sample_meta) +reneeDataSetFromDataFrames <- function(gene_counts_dat, sample_meta_dat) { + count_mat <- gene_counts_dat %>% counts_dat_to_matrix() + sample_meta_dat <- sample_meta_dat %>% meta_tbl_to_dat() # sample IDs must be in the same order - assertthat::are_equal(colnames(count_mat), rownames(sample_meta_dat)) + if (!all(colnames(count_mat) == rownames(sample_meta_dat))) { + stop("Not all columns in the count matrix equal the rows in the sample metadata. Sample IDs must be in the same order.") + } return(reneeDataSet(count_mat, sample_meta_dat)) } From fe5baa67894304d0fe64ec2012a548fcad7afc0a Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Thu, 28 Dec 2023 22:00:04 -0500 Subject: [PATCH 6/9] test: reneeDataSet constructors --- tests/testthat/test-deseq2.R | 8 +++---- tests/testthat/test-renee-class.R | 39 +++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 4 deletions(-) create mode 100644 tests/testthat/test-renee-class.R diff --git a/tests/testthat/test-deseq2.R b/tests/testthat/test-deseq2.R index 284a096..c62e2cb 100644 --- a/tests/testthat/test-deseq2.R +++ b/tests/testthat/test-deseq2.R @@ -1,6 +1,6 @@ set.seed(20231228) test_that("run_deseq2 works", { - rds <- reneeDataSetFromFiles( + renee_ds <- reneeDataSetFromFiles( system.file( "extdata", "RSEM.genes.expected_count.all_samples.txt", @@ -10,12 +10,12 @@ test_that("run_deseq2 works", { package = "reneeTools" ) ) - rds@sample_meta <- rds@sample_meta %>% + renee_ds@sample_meta <- renee_ds@sample_meta %>% dplyr::mutate(condition = factor(condition, levels = c("wildtype", "knockout") )) - rds <- run_deseq2(rds, design = ~condition, fitType = "local") - dds <- rds@analyses$deseq2_ds + renee_ds <- run_deseq2(renee_ds, design = ~condition, fitType = "local") + dds <- renee_ds@analyses$deseq2_ds expect_equal( dds@colData %>% as.data.frame(), structure( diff --git a/tests/testthat/test-renee-class.R b/tests/testthat/test-renee-class.R new file mode 100644 index 0000000..9193620 --- /dev/null +++ b/tests/testthat/test-renee-class.R @@ -0,0 +1,39 @@ +test_that("reneeDataSetFromFiles works", { + rds <- reneeDataSetFromFiles( + system.file("extdata", "RSEM.genes.expected_count.all_samples.txt", package = "reneeTools"), + system.file("extdata", "sample_metadata.tsv", package = "reneeTools") + ) + expect_equal( + rds@counts %>% head(), + structure(c( + 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, + 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L + ), dim = c(6L, 4L), dimnames = list( + c( + "ENSG00000121410.11", "ENSG00000268895.5", "ENSG00000148584.15", + "ENSG00000175899.14", "ENSG00000245105.3", "ENSG00000166535.20" + ), c("KO_S3", "KO_S4", "WT_S1", "WT_S2") + )) + ) + expect_equal( + rds@sample_meta, + structure(list(condition = c( + "knockout", "knockout", "wildtype", + "wildtype" + )), row.names = c("KO_S3", "KO_S4", "WT_S1", "WT_S2"), class = "data.frame") + ) +}) + +test_that("reneeDataSetFromDataFrames detect problems", { + sample_meta <- data.frame( + sample_id = c("KO_S3", "KO_S4", "WT_S1", "WT_S2"), + condition = factor( + c("knockout", "knockout", "wildtype", "wildtype"), + levels = c("wildtype", "knockout") + ) + ) + expect_error( + reneeDataSetFromDataFrames(gene_counts[, 1:4], sample_meta), + "Not all columns" + ) +}) From ab1eb892bc6d1bf49e892ac728fda4b2b5fd9d62 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Thu, 28 Dec 2023 22:03:22 -0500 Subject: [PATCH 7/9] fix: typo --- R/renee-class.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/renee-class.R b/R/renee-class.R index 1d21eef..f908a68 100644 --- a/R/renee-class.R +++ b/R/renee-class.R @@ -29,7 +29,7 @@ reneeDataSet <- S7::new_class("renee", reneeDataSetFromFiles <- function(gene_counts_filepath, sample_meta_filepath) { count_dat <- readr::read_tsv(gene_counts_filepath) sample_meta_dat <- readr::read_tsv(sample_meta_filepath) - return(reneeDataSetFromDataFrames) + return(reneeDataSetFromDataFrames(count_dat, sample_meta_dat)) } #' Construct a reneeDataSet object from data frames From 93fc2c7f1c0725a74bd41d9b8b0a4b4a00245967 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Thu, 28 Dec 2023 22:03:30 -0500 Subject: [PATCH 8/9] chore: document() --- NAMESPACE | 2 +- man/create_deseq_obj.Rd | 30 ------------------------------ man/meta_tbl_to_dat.Rd | 6 +++--- man/reneeDataSetFromDataFrames.Rd | 31 +++++++++++++++++++++++++++++++ man/reneeDataSetFromFiles.Rd | 8 ++++---- man/run_deseq2.Rd | 12 ++++++++---- 6 files changed, 47 insertions(+), 42 deletions(-) delete mode 100644 man/create_deseq_obj.Rd create mode 100644 man/reneeDataSetFromDataFrames.Rd diff --git a/NAMESPACE b/NAMESPACE index fd4a4c4..fedbab0 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,9 +2,9 @@ export("%>%") export(counts_dat_to_matrix) -export(create_deseq_obj) export(filter_low_counts) export(meta_tbl_to_dat) +export(reneeDataSetFromDataFrames) export(reneeDataSetFromFiles) export(run_deseq2) if (getRversion() < "4.3.0") importFrom("S7", "@") diff --git a/man/create_deseq_obj.Rd b/man/create_deseq_obj.Rd deleted file mode 100644 index e0b3d83..0000000 --- a/man/create_deseq_obj.Rd +++ /dev/null @@ -1,30 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/deseq2.R -\name{create_deseq_obj} -\alias{create_deseq_obj} -\title{Create DESeq2 object from gene counts and sample metadata} -\usage{ -create_deseq_obj(counts_tbl, meta_dat, design) -} -\arguments{ -\item{counts_tbl}{expected gene counts from RSEM as a data frame or tibble.} - -\item{meta_dat}{sample metadata as a data frame with rownames as sample IDs.} - -\item{design}{model formula for experimental design. Columns must exist in \code{meta_dat}.} -} -\value{ -DESeqDataSet -} -\description{ -Create DESeq2 object from gene counts and sample metadata -} -\examples{ -sample_meta <- data.frame( - row.names = c("KO_S3", "KO_S4", "WT_S1", "WT_S2"), - condition = factor(c("knockout", "knockout", "wildtype", "wildtype"), - levels = c("wildtype", "knockout") - ) -) -dds <- create_deseq_obj(gene_counts, sample_meta, ~condition) -} diff --git a/man/meta_tbl_to_dat.Rd b/man/meta_tbl_to_dat.Rd index 2d043f5..162cbd7 100644 --- a/man/meta_tbl_to_dat.Rd +++ b/man/meta_tbl_to_dat.Rd @@ -17,9 +17,9 @@ Convert sample metadata from a tibble to a dataframe with sample IDs as row name } \examples{ sample_meta_tbl <- readr::read_tsv(system.file("extdata", - "sample_metadata.tsv", - package = "reneeTools") -) + "sample_metadata.tsv", + package = "reneeTools" +)) head(sample_meta_tbl) meta_tbl_to_dat(sample_meta_tbl) } diff --git a/man/reneeDataSetFromDataFrames.Rd b/man/reneeDataSetFromDataFrames.Rd new file mode 100644 index 0000000..f2f959d --- /dev/null +++ b/man/reneeDataSetFromDataFrames.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/renee-class.R +\name{reneeDataSetFromDataFrames} +\alias{reneeDataSetFromDataFrames} +\title{Construct a reneeDataSet object from data frames} +\usage{ +reneeDataSetFromDataFrames(gene_counts_dat, sample_meta_dat) +} +\arguments{ +\item{gene_counts_dat}{expected gene counts from RSEM as a data frame or tibble. +Must contain a \code{gene_id} column and a column for each sample ID in the metadata.} + +\item{sample_meta_dat}{sample metadata as a data frame or tibble. +Must contain a \code{sample_ID} column.} +} +\value{ +reneeDataSet object +} +\description{ +Construct a reneeDataSet object from data frames +} +\examples{ +sample_meta <- data.frame( + sample_id = c("KO_S3", "KO_S4", "WT_S1", "WT_S2"), + condition = factor( + c("knockout", "knockout", "wildtype", "wildtype"), + levels = c("wildtype", "knockout") + ) +) +reneeDataSetFromDataFrames(gene_counts, sample_meta) +} diff --git a/man/reneeDataSetFromFiles.Rd b/man/reneeDataSetFromFiles.Rd index fe98dcd..5ecffe1 100644 --- a/man/reneeDataSetFromFiles.Rd +++ b/man/reneeDataSetFromFiles.Rd @@ -2,20 +2,20 @@ % Please edit documentation in R/renee-class.R \name{reneeDataSetFromFiles} \alias{reneeDataSetFromFiles} -\title{Create a reneeDataSet object from TSV files} +\title{Construct a reneeDataSet object from tsv files.} \usage{ reneeDataSetFromFiles(gene_counts_filepath, sample_meta_filepath) } \arguments{ -\item{gene_counts_filepath}{path to TSV file of expected gene counts from RSEM} +\item{gene_counts_filepath}{path to tsv file of expected gene counts from RSEM.} -\item{sample_meta_filepath}{path to TSV file with sample IDs and metadata for differential analysis} +\item{sample_meta_filepath}{path to tsv file with sample IDs and metadata for differential analysis.} } \value{ reneeDataSet object } \description{ -Create a reneeDataSet object from TSV files +Construct a reneeDataSet object from tsv files. } \examples{ reneeDataSetFromFiles( diff --git a/man/run_deseq2.Rd b/man/run_deseq2.Rd index a02842b..8dd04b8 100644 --- a/man/run_deseq2.Rd +++ b/man/run_deseq2.Rd @@ -4,12 +4,14 @@ \alias{run_deseq2} \title{Run DESeq2 on a reneeDataSet} \usage{ -run_deseq2(renee_ds, design) +run_deseq2(renee_ds, design, ...) } \arguments{ \item{renee_ds}{reneeDataSet object} \item{design}{model formula for experimental design. Columns must exist in \code{meta_dat}.} + +\item{...}{remaining variables are forwarded to \code{DESeq2::DESeq()}.} } \value{ reneeDataSet object with DESeq2 slot filled @@ -20,10 +22,12 @@ Run DESeq2 on a reneeDataSet \examples{ rds <- reneeDataSetFromFiles( system.file("extdata", - "RSEM.genes.expected_count.all_samples.txt", - package = "reneeTools"), + "RSEM.genes.expected_count.all_samples.txt", + package = "reneeTools" + ), system.file("extdata", "sample_metadata.tsv", - package = "reneeTools") + package = "reneeTools" + ) ) run_deseq2(rds, ~condition) } From b0739bc93bfd7bebdf6ea48364346bae8b74d1b9 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Thu, 28 Dec 2023 22:13:41 -0500 Subject: [PATCH 9/9] chore: update news --- NEWS.md | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/NEWS.md b/NEWS.md index cd5e27e..fc8ce76 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,7 +2,10 @@ - Create a `NEWS.md` file to track changes to the package. -## New functions +## Main functions & classes +- `reneeDataSet` (#16) + - `reneeDataSetFromFiles()` (#16) + - `reneeDataSetFromDataFrames()` (#16) + - `run_deseq2()` (#16) - `filter_low_counts()` (#10) -- `create_deseq_obj()` (#13)