From 79d1fdd82914b8ebfd5f469cc80e2443d169bd47 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Tue, 19 Nov 2024 10:40:48 -0500 Subject: [PATCH 1/5] refactor: rename package from reneeTools to MOSuite BREAKING CHANGE: all instances of reneeTools are now 'MOSuite', reneeDataSet is multiOmicDataSet, and renee_ds is moo (multi-omic object) --- .github/CONTRIBUTING.md | 18 +++---- .github/ISSUE_TEMPLATE/bug_report.yml | 6 +-- .github/ISSUE_TEMPLATE/config.yml | 2 +- .github/workflows/docker.yml | 2 +- CITATION.cff | 8 +-- DESCRIPTION | 9 ++-- Dockerfile | 4 +- reneeTools.Rproj => MOSuite.Rproj | 0 NAMESPACE | 4 +- NEWS.md | 13 +++-- R/{0_renee-class.R => 0_mo-class.R} | 46 +++++++++--------- R/{reneeTools-package.R => MOSuite-package.R} | 4 +- R/counts.R | 18 +++---- R/deseq2.R | 32 ++++++------ R/filter.R | 24 ++++----- R/metadata.R | 4 +- README.Rmd | 21 ++++---- README.md | 22 +++++---- _pkgdown.yml | 2 +- data-raw/gene_counts.R | 2 +- data-raw/nidap.R | 6 +-- ...neeTools-package.Rd => MOSuite-package.Rd} | 18 +++---- man/calc_cpm.Rd | 14 +++--- ...reate_multiOmicDataSet_from_dataframes.Rd} | 16 +++--- ... => create_multiOmicDataSet_from_files.Rd} | 24 ++++----- ...elopment-plan.png => development-plan.png} | Bin man/filter_counts.Rd | 16 +++--- man/meta_tbl_to_dat.Rd | 2 +- man/{reneeDataSet.Rd => multiOmicDataSet.Rd} | 12 ++--- man/run_deseq2.Rd | 18 +++---- tests/testthat.R | 4 +- tests/testthat/test-0_renee-class.R | 12 ++--- tests/testthat/test-counts.R | 6 +-- tests/testthat/test-deseq2.R | 22 ++++----- tests/testthat/test-filter.R | 12 ++--- vignettes/intro.Rmd | 14 +++--- vignettes/memory.Rmd | 32 ++++++------ 37 files changed, 238 insertions(+), 231 deletions(-) rename reneeTools.Rproj => MOSuite.Rproj (100%) rename R/{0_renee-class.R => 0_mo-class.R} (72%) rename R/{reneeTools-package.R => MOSuite-package.R} (57%) rename man/{reneeTools-package.Rd => MOSuite-package.Rd} (61%) rename man/{create_reneeDataSet_from_dataframes.Rd => create_multiOmicDataSet_from_dataframes.Rd} (67%) rename man/{create_reneeDataSet_from_files.Rd => create_multiOmicDataSet_from_files.Rd} (64%) rename man/figures/{reneeTools-development-plan.png => development-plan.png} (100%) rename man/{reneeDataSet.Rd => multiOmicDataSet.Rd} (67%) diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 80756a4..25553bf 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -1,4 +1,4 @@ -# Contributing to reneeTools +# Contributing to MOSuite ## Proposing changes with issues @@ -15,7 +15,7 @@ so others will know you're working on it. We use [GitHub Flow](https://docs.github.com/en/get-started/using-github/github-flow) as our collaboration process. Follow the steps below for detailed instructions on contributing changes to -reneeTools. +MOSuite. ![GitHub Flow diagram](./img/GitHub-Flow_bg-white.png) @@ -28,10 +28,10 @@ Otherwise, you will first need to the repo and clone your fork. You only need to do this step once. ```sh -git clone https://github.com/CCBR/reneeTools +git clone https://github.com/CCBR/MOSuite ``` -> Cloning into 'reneeTools'...
+> Cloning into 'MOSuite'...
> remote: Enumerating objects: 1136, done.
> remote: Counting objects: 100% (463/463), done.
> remote: Compressing objects: 100% (357/357), done.
@@ -40,7 +40,7 @@ git clone https://github.com/CCBR/reneeTools > Resolving deltas: 100% (530/530), done.
```sh -cd reneeTools +cd MOSuite ``` ### If this is your first time cloning the repo, install dependencies @@ -204,22 +204,22 @@ git push --set-upstream origin iss-10 > remote: Resolving deltas: 100% (3/3), completed with 3 local objects.
> remote:
> remote: Create a pull request for 'iss-10' on GitHub by visiting:
-> remote: https://github.com/CCBR/reneeTools/pull/new/iss-10
+> remote: https://github.com/CCBR/MOSuite/pull/new/iss-10
> remote:
-> To https://github.com/CCBR/reneeTools
+> To https://github.com/CCBR/MOSuite
>
> [new branch] iss-10 -> iss-10
> branch 'iss-10' set up to track 'origin/iss-10'.
We recommend pushing your commits often so they will be backed up on GitHub. You can view the files in your branch on GitHub at -`https://github.com/CCBR/reneeTools/tree/` +`https://github.com/CCBR/MOSuite/tree/` (replace `` with the actual name of your branch). ### Create the PR Once your branch is ready, create a PR on GitHub: - + Select the branch you just pushed: diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml index d594e1d..13b961e 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yml +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -5,7 +5,7 @@ body: - type: markdown attributes: value: | - Before you submit this issue, please check the documentation: + Before you submit this issue, please check the documentation: - type: textarea id: description @@ -22,7 +22,7 @@ body: description: Please include a minimal reproducible example (AKA a reprex). If you've never heard of a [reprex](http://reprex.tidyverse.org/) before, start by reading . render: console placeholder: | - library(reneeTools) + library(MOSuite) ... insert_your_code_here() ... Paste some output where something broke @@ -40,6 +40,6 @@ body: label: System information description: | * Version of R - * Version of CCBR/reneeTools + * Version of CCBR/MOSuite * OS _(eg. Ubuntu Linux, macOS)_ * Hardware _(eg. HPC, Desktop)_ diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml index cc3283a..d1dde83 100644 --- a/.github/ISSUE_TEMPLATE/config.yml +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -1,4 +1,4 @@ contact_links: - name: Discussions - url: https://github.com/CCBR/reneeTools/discussions + url: https://github.com/CCBR/MOSuite/discussions about: Please ask and answer questions here. diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index 1b45654..ca3fa9b 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -6,7 +6,7 @@ on: types: [published] env: - IMAGE_NAME: "reneeTools" + IMAGE_NAME: "MOSuite" CONTEXT: "./" NAMESPACE: "nciccbr" diff --git a/CITATION.cff b/CITATION.cff index 5564143..4d30967 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -4,10 +4,10 @@ # -------------------------------------------- cff-version: 1.2.0 -message: 'To cite package "reneeTools" in publications use:' +message: 'To cite package "MOSuite" in publications use:' type: software license: MIT -title: 'reneeTools: Helper functions for RENEE' +title: 'MOSuite: Helper functions for RENEE' version: 0.0.0.9000 abstract: TODO What the package does (one paragraph). authors: @@ -23,8 +23,8 @@ authors: given-names: Samantha email: samantha.chill@nih.gov orcid: https://orcid.org/0000-0002-8734-9875 -repository-code: https://github.com/CCBR/reneeTools -url: https://ccbr.github.io/reneeTools/ +repository-code: https://github.com/CCBR/MOSuite +url: https://ccbr.github.io/MOSuite/ contact: - family-names: Koparde given-names: Vishal diff --git a/DESCRIPTION b/DESCRIPTION index 81cf032..b934b81 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ -Package: reneeTools -Title: Helper functions for RENEE +Package: MOSuite +Title: R package for downstream multi-omics analysis Version: 0.0.0.9000 Authors@R: c( person("Vishal", "Koparde", , "vishal.koparde@nih.gov", role = c("aut", "cre"), @@ -12,9 +12,8 @@ Authors@R: c( ) Description: TODO What the package does (one paragraph). License: MIT + file LICENSE -URL: https://github.com/CCBR/reneeTools, - https://ccbr.github.io/reneeTools/ -BugReports: https://github.com/CCBR/reneeTools/issues +URL: https://github.com/CCBR/MOSuite, https://ccbr.github.io/MOSuite/ +BugReports: https://github.com/CCBR/MOSuite/issues Depends: R (>= 2.10) Imports: diff --git a/Dockerfile b/Dockerfile index 5ae1bd6..2cb62f9 100644 --- a/Dockerfile +++ b/Dockerfile @@ -91,8 +91,8 @@ RUN mamba install -c conda-forge \ r-devtools # install R package -COPY . /opt2/reneeTools -RUN R -e "devtools::install_local('/opt2/reneeTools', dependencies = TRUE)" +COPY . /opt2/MOSuite +RUN R -e "devtools::install_local('/opt2/MOSuite', dependencies = TRUE)" # Save Dockerfile in the docker COPY Dockerfile /opt2/Dockerfile_${REPONAME}.${BUILD_TAG} diff --git a/reneeTools.Rproj b/MOSuite.Rproj similarity index 100% rename from reneeTools.Rproj rename to MOSuite.Rproj diff --git a/NAMESPACE b/NAMESPACE index 3e8830d..c247775 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,8 +2,8 @@ export("%>%") export(calc_cpm) -export(create_reneeDataSet_from_dataframes) -export(create_reneeDataSet_from_files) +export(create_multiOmicDataSet_from_dataframes) +export(create_multiOmicDataSet_from_files) export(filter_counts) export(meta_tbl_to_dat) export(run_deseq2) diff --git a/NEWS.md b/NEWS.md index a7baf64..bcbabc2 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,14 +1,17 @@ -# reneeTools development version 0.0.0.9000 +# MOSuite development version 0.0.0.9000 -This is the first release of reneeTools 🎉 +This is the first release of MOSuite 🎉 - Create a `NEWS.md` file to track changes to the package. ## Main functions & classes -- `reneeDataSet` (#16, #28, @kelly-sovacool) - - `create_reneeDataSet_from_files()` - - `create_reneeDataSet_from_dataframes()` +- `multiOmicDataSet` (#16, #28, @kelly-sovacool) + - `create_multiOmicDataSet_from_files()` + - `create_multiOmicDataSet_from_dataframes()` - `run_deseq2()` - `calc_cpm()` (#38, @kelly-sovacool) - `filter_counts()` (#38, @kelly-sovacool) + +Note: at the start of development, this package was called reneeTools. +Later it was renamed to MOSuite. () diff --git a/R/0_renee-class.R b/R/0_mo-class.R similarity index 72% rename from R/0_renee-class.R rename to R/0_mo-class.R index d26e8d2..e759227 100644 --- a/R/0_renee-class.R +++ b/R/0_mo-class.R @@ -1,11 +1,11 @@ -#' reneeDataSet class +#' multiOmicDataSet class #' #' @param sample_meta_dat sample metadata as a data frame or tibble. #' Must contain a `sample_id` column. #' @param counts_lst named list of dataframes containing counts, e.g. expected gene counts from RSEM. Each data frame is expected to contain a `gene_id` column and a column for each sample ID in the metadata. #' #' -reneeDataSet <- S7::new_class("renee", +multiOmicDataSet <- S7::new_class("multiOmicDataSet", properties = list( sample_meta = S7::class_data.frame, counts = S7::class_list, # list of data frames @@ -29,35 +29,35 @@ reneeDataSet <- S7::new_class("renee", } ) -#' Construct a reneeDataSet object from tsv files. +#' Construct a multiOmicDataSet object from tsv files. #' #' @param sample_meta_filepath path to tsv file with sample IDs and metadata for differential analysis. #' @param gene_counts_filepath path to tsv file of expected gene counts from RSEM. #' @param count_type type to assign the values of `gene_counts_filepath` to in the `counts` slot #' @param sample_id_colname name of the column in `sample_meta_filepath` that contains the sample IDs #' -#' @return reneeDataSet object +#' @return multiOmicDataSet object #' @export #' #' @examples -#' renee_ds <- create_reneeDataSet_from_files( +#' moo <- create_multiOmicDataSet_from_files( #' sample_meta_filepath = system.file("extdata", #' "sample_metadata.tsv.gz", -#' package = "reneeTools" +#' package = "MOSuite" #' ), #' gene_counts_filepath = system.file("extdata", #' "RSEM.genes.expected_count.all_samples.txt.gz", -#' package = "reneeTools" +#' package = "MOSuite" #' ) #' ) -#' renee_ds@counts$raw %>% head() -#' renee_ds@sample_meta -create_reneeDataSet_from_files <- function(sample_meta_filepath, gene_counts_filepath, - count_type = "raw", - sample_id_colname = "sample_id") { +#' moo@counts$raw %>% head() +#' moo@sample_meta +create_multiOmicDataSet_from_files <- function(sample_meta_filepath, gene_counts_filepath, + count_type = "raw", + sample_id_colname = "sample_id") { count_dat <- readr::read_tsv(gene_counts_filepath) sample_meta_dat <- readr::read_tsv(sample_meta_filepath) - return(create_reneeDataSet_from_dataframes( + return(create_multiOmicDataSet_from_dataframes( sample_meta_dat = sample_meta_dat, count_dat = count_dat, count_type = "raw", @@ -65,13 +65,13 @@ create_reneeDataSet_from_files <- function(sample_meta_filepath, gene_counts_fil )) } -#' Construct a reneeDataSet object from data frames +#' Construct a multiOmicDataSet object from data frames #' -#' @inheritParams reneeDataSet -#' @inheritParams create_reneeDataSet_from_files +#' @inheritParams multiOmicDataSet +#' @inheritParams create_multiOmicDataSet_from_files #' @param count_dat data frame of feature counts (e.g. expected gene counts from RSEM) #' -#' @return reneeDataSet object +#' @return multiOmicDataSet object #' @export #' #' @examples @@ -82,11 +82,11 @@ create_reneeDataSet_from_files <- function(sample_meta_filepath, gene_counts_fil #' levels = c("wildtype", "knockout") #' ) #' ) -#' create_reneeDataSet_from_dataframes(sample_meta, gene_counts) -create_reneeDataSet_from_dataframes <- function(sample_meta_dat, - count_dat, - sample_id_colname = "sample_id", - count_type = "raw") { +#' create_multiOmicDataSet_from_dataframes(sample_meta, gene_counts) +create_multiOmicDataSet_from_dataframes <- function(sample_meta_dat, + count_dat, + sample_id_colname = "sample_id", + count_type = "raw") { gene_columns <- c("gene_id", "GeneName", "Gene") # sample IDs must be in the same order gene_sample_colnames <- count_dat %>% @@ -109,5 +109,5 @@ create_reneeDataSet_from_dataframes <- function(sample_meta_dat, counts <- list() counts[[count_type]] <- count_dat - return(reneeDataSet(sample_meta_dat, counts)) + return(multiOmicDataSet(sample_meta_dat, counts)) } diff --git a/R/reneeTools-package.R b/R/MOSuite-package.R similarity index 57% rename from R/reneeTools-package.R rename to R/MOSuite-package.R index 7afc33b..ffff6ec 100644 --- a/R/reneeTools-package.R +++ b/R/MOSuite-package.R @@ -1,6 +1,6 @@ -#' reneeTools: R helper functions for RENEE +#' MOSuite: R helper functions for RENEE #' -#' `reneeTools` implements helper functions for RENEE, a comprehensive +#' `MOSuite` implements helper functions for RENEE, a comprehensive #' quality-control and quantification RNA-seq pipeline #' #' @keywords internal diff --git a/R/counts.R b/R/counts.R index 42892c8..96aec45 100644 --- a/R/counts.R +++ b/R/counts.R @@ -1,9 +1,9 @@ -#' Calculate counts-per-million (CPM) on raw counts in a reneeDataSet +#' Calculate counts-per-million (CPM) on raw counts in a multiOmicDataSet #' -#' @param renee_ds reneeDataSet object +#' @param moo multiOmicDataSet object #' @param ... additional arguments to pass to edgeR::cpm() #' -#' @return reneeDataSet with cpm-transformed counts +#' @return multiOmicDataSet with cpm-transformed counts #' @export #' #' @examples @@ -14,17 +14,17 @@ #' levels = c("wildtype", "knockout") #' ) #' ) -#' renee_ds <- create_reneeDataSet_from_dataframes(sample_meta, gene_counts) %>% +#' moo <- create_multiOmicDataSet_from_dataframes(sample_meta, gene_counts) %>% #' calc_cpm() -#' head(renee_ds@counts$cpm) -calc_cpm <- S7::new_generic("calc_cpm", "renee_ds", function(renee_ds, ...) { +#' head(moo@counts$cpm) +calc_cpm <- S7::new_generic("calc_cpm", "moo", function(moo, ...) { S7::S7_dispatch() }) -S7::method(calc_cpm, reneeDataSet) <- function(renee_ds, gene_colname = "gene_id", ...) { - renee_ds@counts$cpm <- renee_ds@counts$raw %>% +S7::method(calc_cpm, multiOmicDataSet) <- function(moo, gene_colname = "gene_id", ...) { + moo@counts$cpm <- moo@counts$raw %>% calc_cpm_df(gene_colname = gene_colname) - return(renee_ds) + return(moo) } #' Calculate CPM on a data frame diff --git a/R/deseq2.R b/R/deseq2.R index 5dac0dc..ab85399 100644 --- a/R/deseq2.R +++ b/R/deseq2.R @@ -1,41 +1,41 @@ -#' Run DESeq2 on a reneeDataSet +#' Run DESeq2 on a multiOmicDataSet #' -#' @param renee_ds reneeDataSet object +#' @param moo multiOmicDataSet object #' @param design model formula for experimental design. Columns must exist in `meta_dat`. #' @param ... remaining variables are forwarded to `DESeq2::DESeq()`. #' -#' @return reneeDataSet object with DESeq2 slot filled +#' @return multiOmicDataSet object with DESeq2 slot filled #' @export #' #' @examples #' \dontrun{ -#' renee_ds <- create_reneeDataSet_from_files( +#' moo <- create_multiOmicDataSet_from_files( #' system.file("extdata", "sample_metadata.tsv.gz", -#' package = "reneeTools" +#' package = "MOSuite" #' ), #' system.file("extdata", #' "RSEM.genes.expected_count.all_samples.txt.gz", -#' package = "reneeTools" +#' package = "MOSuite" #' ) #' ) %>% filter_counts() -#' renee_ds <- run_deseq2(renee_ds, ~condition) +#' moo <- run_deseq2(moo, ~condition) #' } -run_deseq2 <- S7::new_generic("run_deseq2", "renee_ds", function(renee_ds, design, ...) { +run_deseq2 <- S7::new_generic("run_deseq2", "moo", function(moo, design, ...) { S7::S7_dispatch() }) -S7::method(run_deseq2, reneeDataSet) <- function(renee_ds, design, gene_colname = "gene_id", min_count = 10, ...) { - if (is.null(renee_ds@counts$filt)) { - stop("renee_ds must contain filtered counts for DESeq2. Hint: Did you forget to run filter_counts()?") +S7::method(run_deseq2, multiOmicDataSet) <- function(moo, design, gene_colname = "gene_id", min_count = 10, ...) { + if (is.null(moo@counts$filt)) { + stop("moo must contain filtered counts for DESeq2. Hint: Did you forget to run filter_counts()?") } dds <- DESeq2::DESeqDataSetFromMatrix( - countData = renee_ds@counts$filt %>% + countData = moo@counts$filt %>% dplyr::mutate(dplyr::across(dplyr::where(is.numeric), round)) %>% # DESeq2 requires integer counts counts_dat_to_matrix(gene_colname = gene_colname), - colData = renee_ds@sample_meta, + colData = moo@sample_meta, design = design ) - renee_ds@analyses$deseq2_ds <- DESeq2::DESeq(dds, ...) - renee_ds@analyses$deseq2_results <- DESeq2::results(renee_ds@analyses$deseq2_ds) - return(renee_ds) + moo@analyses$deseq2_ds <- DESeq2::DESeq(dds, ...) + moo@analyses$deseq2_results <- DESeq2::results(moo@analyses$deseq2_ds) + return(moo) } diff --git a/R/filter.R b/R/filter.R index e4b4965..5cd13bf 100644 --- a/R/filter.R +++ b/R/filter.R @@ -3,8 +3,8 @@ #' This is often the first step in the QC portion of an analysis to filter out #' features that have very low raw counts across most or all of your samples. #' -#' This function takes a reneeDataSet containing raw counts and a sample -#' metadata table, and returns the reneeDataSet object with filtered counts. +#' This function takes a multiOmicDataSet containing raw counts and a sample +#' metadata table, and returns the multiOmicDataSet object with filtered counts. #' It also produces an image consisting of three QC plots. #' #' You can tune the threshold for tuning how low counts for a given gene are @@ -19,8 +19,8 @@ #' another based on unsupervised clustering. #' #' -#' @param renee_ds reneeDataSet object (see `create_reneeDataSet_from_dataframes()`) -#' @param count_type the type of counts to use -- must be a name in the counts slot (`renee_ds@counts`) +#' @param moo multiOmicDataSet object (see `create_multiOmicDataSet_from_dataframes()`) +#' @param count_type the type of counts to use -- must be a name in the counts slot (`moo@counts`) #' @param gene_names_column The column from your input Counts Matrix containing the Feature IDs (Usually Gene or Protein ID). This is usually the first column of your input Counts Matrix. Only columns of Text type from your input Counts Matrix will be available to select for this parameter. #' @param sample_names_column The column from your input Sample Metadata table containing the sample names. The names in this column must exactly match the names used as the sample column names of your input Counts Matrix. Only columns of Text type from your input Sample Metadata table will be available to select for this parameter. #' @param group_column The column from your input Sample Metadata table containing the sample group information. This is usually a column showing to which experimental treatments each sample belongs (e.g. WildType, Knockout, Tumor, Normal, Before, After, etc.). Only columns of Text type from your input Sample Metadata will be available to select for this parameter. @@ -54,11 +54,11 @@ #' @param interactive_plots set to TRUE to make PCA and Histogram plots interactive with `plotly`, allowing you to hover your mouse over a point or line to view sample information. The similarity heat map will not display if this toggle is set to TRUE. Default is FALSE. #' @param plot_correlation_matrix_heatmap Data sets with a large number of samples may be too large to create a correlation matrix heat map. If this template takes longer than 5 minutes to run, Toggle switch to FALSE and the correlation matrix will not be be created. Default is TRUE. #' -#' @return `reneeDataSet` with filtered counts +#' @return `multiOmicDataSet` with filtered counts #' @export #' #' @examples -#' renee_ds <- create_reneeDataSet_from_dataframes( +#' moo <- create_multiOmicDataSet_from_dataframes( #' as.data.frame(nidap_sample_metadata), #' as.data.frame(nidap_clean_raw_counts), #' sample_id_colname = "Sample" @@ -68,9 +68,9 @@ #' sample_names_column = "Sample", #' gene_names_column = "Gene" #' ) -#' head(renee_ds@counts$filt) +#' head(moo@counts$filt) #' -filter_counts <- function(renee_ds, +filter_counts <- function(moo, count_type = "raw", gene_names_column = "gene_id", sample_names_column = "sample_id", @@ -117,8 +117,8 @@ filter_counts <- function(renee_ds, interactive_plots = FALSE, plot_correlation_matrix_heatmap = TRUE, make_plots = TRUE) { - counts_matrix <- renee_ds@counts[[count_type]] %>% as.data.frame() # currently, this function requires data frames - sample_metadata <- renee_ds@sample_meta %>% as.data.frame() + counts_matrix <- moo@counts[[count_type]] %>% as.data.frame() # currently, this function requires data frames + sample_metadata <- moo@sample_meta %>% as.data.frame() # TODO we should use "feature" instead of "gene" to make sure this is applicable beyond RNA-seq # TODO: just have users specify hex values directly for simplicity @@ -304,9 +304,9 @@ filter_counts <- function(renee_ds, df.final <- merge(anno_tbl, df.final, by = gene_names_column, all.y = T) df.final[, gene_names_column] <- gsub("_[0-9]+$", "", df.final[, gene_names_column]) - renee_ds@counts[["filt"]] <- df.final + moo@counts[["filt"]] <- df.final - return(renee_ds) + return(moo) } #' Remove low-count genes diff --git a/R/metadata.R b/R/metadata.R index b73f791..e6ffcc3 100644 --- a/R/metadata.R +++ b/R/metadata.R @@ -2,7 +2,7 @@ #' #' @param meta_tbl tibble with `sample_id` column #' -#' @inheritParams create_reneeDataSet_from_files +#' @inheritParams create_multiOmicDataSet_from_files #' #' @return dataframe where row names are the sample IDs #' @export @@ -10,7 +10,7 @@ #' @examples #' sample_meta_tbl <- readr::read_tsv(system.file("extdata", #' "sample_metadata.tsv.gz", -#' package = "reneeTools" +#' package = "MOSuite" #' )) #' head(sample_meta_tbl) #' meta_tbl_to_dat(sample_meta_tbl) diff --git a/README.Rmd b/README.Rmd index d27289c..7ee0e73 100644 --- a/README.Rmd +++ b/README.Rmd @@ -13,23 +13,26 @@ knitr::opts_chunk$set( ) ``` -# reneeTools +# MOSuite -[![R-CMD-check](https://github.com/CCBR/reneeTools/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/CCBR/reneeTools/actions/workflows/R-CMD-check.yaml) +[![R-CMD-check](https://github.com/CCBR/MOSuite/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/CCBR/MOSuite/actions/workflows/R-CMD-check.yaml) -Helper functions for [RENEE](https://github.com/CCBR/renee): RNA sequencing analysis pipeline +R package for downstream multi-omics analysis + +Designed for differential [RNA-seq](https://github.com/CCBR/RENEE) analysis +or any data represented in a counts table. ## Installation -You can install the development version of reneeTools from [GitHub](https://github.com/) with: +You can install the development version of MOSuite from [GitHub](https://github.com/) with: ```r # install.packages("devtools") -devtools::install_github("CCBR/reneeTools") +devtools::install_github("CCBR/MOSuite") ``` ## Help & Contributing -Come across a **bug**? Open an [issue](https://github.com/CCBR/reneeTools/issues) and include a minimal reproducible example. +Come across a **bug**? Open an [issue](https://github.com/CCBR/MOSuite/issues) and include a minimal reproducible example. -Have a **question**? Ask it in [discussions](https://github.com/CCBR/reneeTools/discussions). +Have a **question**? Ask it in [discussions](https://github.com/CCBR/MOSuite/discussions). Want to **contribute** to this project? Check out the [contributing guidelines](.github/CONTRIBUTING.md). ## Development Roadmap -![](./man/figures/reneeTools-development-plan.png) +![](./man/figures/development-plan.png) - [dev spreadsheet](https://nih-my.sharepoint.com/:x:/g/personal/homanpj_nih_gov/ETvHXgnwxExEpcP57Jj9_EwBHBvZBqNuZ_c3eu51w-SlnA?e=PcXKU8) - [project board](https://github.com/orgs/CCBR/projects/32) diff --git a/README.md b/README.md index 5689143..b32ae90 100644 --- a/README.md +++ b/README.md @@ -1,25 +1,27 @@ -# reneeTools +# MOSuite -[![R-CMD-check](https://github.com/CCBR/reneeTools/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/CCBR/reneeTools/actions/workflows/R-CMD-check.yaml) +[![R-CMD-check](https://github.com/CCBR/MOSuite/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/CCBR/MOSuite/actions/workflows/R-CMD-check.yaml) -Helper functions for [RENEE](https://github.com/CCBR/renee): RNA -sequencing analysis pipeline +R package for downstream multi-omics analysis + +Designed for differential [RNA-seq](https://github.com/CCBR/RENEE) +analysis or any data represented in a counts table. ## Installation -You can install the development version of reneeTools from +You can install the development version of MOSuite from [GitHub](https://github.com/) with: ``` r # install.packages("devtools") -devtools::install_github("CCBR/reneeTools") +devtools::install_github("CCBR/MOSuite") ``` @@ -35,18 +37,18 @@ library(reneeTools) ## Help & Contributing Come across a **bug**? Open an -[issue](https://github.com/CCBR/reneeTools/issues) and include a minimal +[issue](https://github.com/CCBR/MOSuite/issues) and include a minimal reproducible example. Have a **question**? Ask it in -[discussions](https://github.com/CCBR/reneeTools/discussions). +[discussions](https://github.com/CCBR/MOSuite/discussions). Want to **contribute** to this project? Check out the [contributing guidelines](.github/CONTRIBUTING.md). ## Development Roadmap -![](./man/figures/reneeTools-development-plan.png) +![](./man/figures/development-plan.png) - [dev spreadsheet](https://nih-my.sharepoint.com/:x:/g/personal/homanpj_nih_gov/ETvHXgnwxExEpcP57Jj9_EwBHBvZBqNuZ_c3eu51w-SlnA?e=PcXKU8) diff --git a/_pkgdown.yml b/_pkgdown.yml index 969f8b2..52a8943 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -1,4 +1,4 @@ -url: https://ccbr.github.io/reneeTools/ +url: https://ccbr.github.io/MOSuite/ template: bootstrap: 5 bootswatch: yeti diff --git a/data-raw/gene_counts.R b/data-raw/gene_counts.R index 3390b34..1e45fa3 100644 --- a/data-raw/gene_counts.R +++ b/data-raw/gene_counts.R @@ -1,3 +1,3 @@ # WT_S1.RSEM.genes.results was generated from running RENEE v2.5.3 on the test dataset https://github.com/CCBR/RENEE/tree/e08f7db6c6e638cfd330caa182f64665d2ef37fa/.tests -gene_counts <- readr::read_tsv(system.file("inst", "extdata", "RSEM.genes.expected_count.all_samples.txt.gz", package = "reneeTools")) +gene_counts <- readr::read_tsv(system.file("inst", "extdata", "RSEM.genes.expected_count.all_samples.txt.gz", package = "MOSuite")) usethis::use_data(gene_counts, overwrite = TRUE) diff --git a/data-raw/nidap.R b/data-raw/nidap.R index 5e1e6b7..f282d65 100644 --- a/data-raw/nidap.R +++ b/data-raw/nidap.R @@ -2,20 +2,20 @@ nidap_clean_raw_counts <- readr::read_csv(system.file( "extdata", "nidap", "Clean_Raw_Counts.csv.gz", - package = "reneeTools" + package = "MOSuite" )) usethis::use_data(nidap_clean_raw_counts, overwrite = TRUE) nidap_sample_metadata <- readr::read_csv(system.file( "extdata", "nidap", "Sample_Metadata_Bulk_RNA-seq_Training_Dataset_CCBR.csv.gz", - package = "reneeTools" + package = "MOSuite" )) usethis::use_data(nidap_sample_metadata, overwrite = TRUE) nidap_filtered_counts <- readr::read_csv(system.file( "extdata", "nidap", "Filtered_Counts.csv.gz", - package = "reneeTools" + package = "MOSuite" )) usethis::use_data(nidap_filtered_counts, overwrite = TRUE) diff --git a/man/reneeTools-package.Rd b/man/MOSuite-package.Rd similarity index 61% rename from man/reneeTools-package.Rd rename to man/MOSuite-package.Rd index 50908a4..85ccbd3 100644 --- a/man/reneeTools-package.Rd +++ b/man/MOSuite-package.Rd @@ -1,20 +1,20 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/reneeTools-package.R +% Please edit documentation in R/MOSuite-package.R \docType{package} -\name{reneeTools-package} -\alias{reneeTools} -\alias{reneeTools-package} -\title{reneeTools: R helper functions for RENEE} +\name{MOSuite-package} +\alias{MOSuite} +\alias{MOSuite-package} +\title{MOSuite: R helper functions for RENEE} \description{ -\code{reneeTools} implements helper functions for RENEE, a comprehensive +\code{MOSuite} implements helper functions for RENEE, a comprehensive quality-control and quantification RNA-seq pipeline } \seealso{ Useful links: \itemize{ - \item \url{https://github.com/CCBR/reneeTools} - \item \url{https://ccbr.github.io/reneeTools/} - \item Report bugs at \url{https://github.com/CCBR/reneeTools/issues} + \item \url{https://github.com/CCBR/MOSuite} + \item \url{https://ccbr.github.io/MOSuite/} + \item Report bugs at \url{https://github.com/CCBR/MOSuite/issues} } } diff --git a/man/calc_cpm.Rd b/man/calc_cpm.Rd index fcce4f5..ea9bb5f 100644 --- a/man/calc_cpm.Rd +++ b/man/calc_cpm.Rd @@ -2,20 +2,20 @@ % Please edit documentation in R/counts.R \name{calc_cpm} \alias{calc_cpm} -\title{Calculate counts-per-million (CPM) on raw counts in a reneeDataSet} +\title{Calculate counts-per-million (CPM) on raw counts in a multiOmicDataSet} \usage{ -calc_cpm(renee_ds, ...) +calc_cpm(moo, ...) } \arguments{ -\item{renee_ds}{reneeDataSet object} +\item{moo}{multiOmicDataSet object} \item{...}{additional arguments to pass to edgeR::cpm()} } \value{ -reneeDataSet with cpm-transformed counts +multiOmicDataSet with cpm-transformed counts } \description{ -Calculate counts-per-million (CPM) on raw counts in a reneeDataSet +Calculate counts-per-million (CPM) on raw counts in a multiOmicDataSet } \examples{ sample_meta <- data.frame( @@ -25,7 +25,7 @@ sample_meta <- data.frame( levels = c("wildtype", "knockout") ) ) -renee_ds <- create_reneeDataSet_from_dataframes(sample_meta, gene_counts) \%>\% +moo <- create_multiOmicDataSet_from_dataframes(sample_meta, gene_counts) \%>\% calc_cpm() -head(renee_ds@counts$cpm) +head(moo@counts$cpm) } diff --git a/man/create_reneeDataSet_from_dataframes.Rd b/man/create_multiOmicDataSet_from_dataframes.Rd similarity index 67% rename from man/create_reneeDataSet_from_dataframes.Rd rename to man/create_multiOmicDataSet_from_dataframes.Rd index 7b6f4a2..5f4f332 100644 --- a/man/create_reneeDataSet_from_dataframes.Rd +++ b/man/create_multiOmicDataSet_from_dataframes.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/0_renee-class.R -\name{create_reneeDataSet_from_dataframes} -\alias{create_reneeDataSet_from_dataframes} -\title{Construct a reneeDataSet object from data frames} +% Please edit documentation in R/0_mo-class.R +\name{create_multiOmicDataSet_from_dataframes} +\alias{create_multiOmicDataSet_from_dataframes} +\title{Construct a multiOmicDataSet object from data frames} \usage{ -create_reneeDataSet_from_dataframes( +create_multiOmicDataSet_from_dataframes( sample_meta_dat, count_dat, sample_id_colname = "sample_id", @@ -22,10 +22,10 @@ Must contain a \code{sample_id} column.} \item{count_type}{type to assign the values of \code{gene_counts_filepath} to in the \code{counts} slot} } \value{ -reneeDataSet object +multiOmicDataSet object } \description{ -Construct a reneeDataSet object from data frames +Construct a multiOmicDataSet object from data frames } \examples{ sample_meta <- data.frame( @@ -35,5 +35,5 @@ sample_meta <- data.frame( levels = c("wildtype", "knockout") ) ) -create_reneeDataSet_from_dataframes(sample_meta, gene_counts) +create_multiOmicDataSet_from_dataframes(sample_meta, gene_counts) } diff --git a/man/create_reneeDataSet_from_files.Rd b/man/create_multiOmicDataSet_from_files.Rd similarity index 64% rename from man/create_reneeDataSet_from_files.Rd rename to man/create_multiOmicDataSet_from_files.Rd index 5f8d001..3205f76 100644 --- a/man/create_reneeDataSet_from_files.Rd +++ b/man/create_multiOmicDataSet_from_files.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/0_renee-class.R -\name{create_reneeDataSet_from_files} -\alias{create_reneeDataSet_from_files} -\title{Construct a reneeDataSet object from tsv files.} +% Please edit documentation in R/0_mo-class.R +\name{create_multiOmicDataSet_from_files} +\alias{create_multiOmicDataSet_from_files} +\title{Construct a multiOmicDataSet object from tsv files.} \usage{ -create_reneeDataSet_from_files( +create_multiOmicDataSet_from_files( sample_meta_filepath, gene_counts_filepath, count_type = "raw", @@ -21,22 +21,22 @@ create_reneeDataSet_from_files( \item{sample_id_colname}{name of the column in \code{sample_meta_filepath} that contains the sample IDs} } \value{ -reneeDataSet object +multiOmicDataSet object } \description{ -Construct a reneeDataSet object from tsv files. +Construct a multiOmicDataSet object from tsv files. } \examples{ -renee_ds <- create_reneeDataSet_from_files( +moo <- create_multiOmicDataSet_from_files( sample_meta_filepath = system.file("extdata", "sample_metadata.tsv.gz", - package = "reneeTools" + package = "MOSuite" ), gene_counts_filepath = system.file("extdata", "RSEM.genes.expected_count.all_samples.txt.gz", - package = "reneeTools" + package = "MOSuite" ) ) -renee_ds@counts$raw \%>\% head() -renee_ds@sample_meta +moo@counts$raw \%>\% head() +moo@sample_meta } diff --git a/man/figures/reneeTools-development-plan.png b/man/figures/development-plan.png similarity index 100% rename from man/figures/reneeTools-development-plan.png rename to man/figures/development-plan.png diff --git a/man/filter_counts.Rd b/man/filter_counts.Rd index b8f6789..c085436 100644 --- a/man/filter_counts.Rd +++ b/man/filter_counts.Rd @@ -5,7 +5,7 @@ \title{Filter low counts} \usage{ filter_counts( - renee_ds, + moo, count_type = "raw", gene_names_column = "gene_id", sample_names_column = "sample_id", @@ -43,9 +43,9 @@ filter_counts( ) } \arguments{ -\item{renee_ds}{reneeDataSet object (see \code{create_reneeDataSet_from_dataframes()})} +\item{moo}{multiOmicDataSet object (see \code{create_multiOmicDataSet_from_dataframes()})} -\item{count_type}{the type of counts to use -- must be a name in the counts slot (\code{renee_ds@counts})} +\item{count_type}{the type of counts to use -- must be a name in the counts slot (\code{moo@counts})} \item{gene_names_column}{The column from your input Counts Matrix containing the Feature IDs (Usually Gene or Protein ID). This is usually the first column of your input Counts Matrix. Only columns of Text type from your input Counts Matrix will be available to select for this parameter.} @@ -112,15 +112,15 @@ filter_counts( \item{make_plots}{whether to create plots} } \value{ -\code{reneeDataSet} with filtered counts +\code{multiOmicDataSet} with filtered counts } \description{ This is often the first step in the QC portion of an analysis to filter out features that have very low raw counts across most or all of your samples. } \details{ -This function takes a reneeDataSet containing raw counts and a sample -metadata table, and returns the reneeDataSet object with filtered counts. +This function takes a multiOmicDataSet containing raw counts and a sample +metadata table, and returns the multiOmicDataSet object with filtered counts. It also produces an image consisting of three QC plots. You can tune the threshold for tuning how low counts for a given gene are @@ -135,7 +135,7 @@ samples; and (3) Similarity Heatmap: the overall similarity of samples to one another based on unsupervised clustering. } \examples{ -renee_ds <- create_reneeDataSet_from_dataframes( +moo <- create_multiOmicDataSet_from_dataframes( as.data.frame(nidap_sample_metadata), as.data.frame(nidap_clean_raw_counts), sample_id_colname = "Sample" @@ -145,6 +145,6 @@ renee_ds <- create_reneeDataSet_from_dataframes( sample_names_column = "Sample", gene_names_column = "Gene" ) -head(renee_ds@counts$filt) +head(moo@counts$filt) } diff --git a/man/meta_tbl_to_dat.Rd b/man/meta_tbl_to_dat.Rd index bc2b3ca..1eee614 100644 --- a/man/meta_tbl_to_dat.Rd +++ b/man/meta_tbl_to_dat.Rd @@ -20,7 +20,7 @@ Convert sample metadata from a tibble to a dataframe with sample IDs as row name \examples{ sample_meta_tbl <- readr::read_tsv(system.file("extdata", "sample_metadata.tsv.gz", - package = "reneeTools" + package = "MOSuite" )) head(sample_meta_tbl) meta_tbl_to_dat(sample_meta_tbl) diff --git a/man/reneeDataSet.Rd b/man/multiOmicDataSet.Rd similarity index 67% rename from man/reneeDataSet.Rd rename to man/multiOmicDataSet.Rd index 6f6e9f5..be709fa 100644 --- a/man/reneeDataSet.Rd +++ b/man/multiOmicDataSet.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/0_renee-class.R -\name{reneeDataSet} -\alias{reneeDataSet} -\title{reneeDataSet class} +% Please edit documentation in R/0_mo-class.R +\name{multiOmicDataSet} +\alias{multiOmicDataSet} +\title{multiOmicDataSet class} \usage{ -reneeDataSet(sample_meta_dat, counts_lst) +multiOmicDataSet(sample_meta_dat, counts_lst) } \arguments{ \item{sample_meta_dat}{sample metadata as a data frame or tibble. @@ -13,5 +13,5 @@ Must contain a \code{sample_id} column.} \item{counts_lst}{named list of dataframes containing counts, e.g. expected gene counts from RSEM. Each data frame is expected to contain a \code{gene_id} column and a column for each sample ID in the metadata.} } \description{ -reneeDataSet class +multiOmicDataSet class } diff --git a/man/run_deseq2.Rd b/man/run_deseq2.Rd index 1f6d772..7c845a4 100644 --- a/man/run_deseq2.Rd +++ b/man/run_deseq2.Rd @@ -2,34 +2,34 @@ % Please edit documentation in R/deseq2.R \name{run_deseq2} \alias{run_deseq2} -\title{Run DESeq2 on a reneeDataSet} +\title{Run DESeq2 on a multiOmicDataSet} \usage{ -run_deseq2(renee_ds, design, ...) +run_deseq2(moo, design, ...) } \arguments{ -\item{renee_ds}{reneeDataSet object} +\item{moo}{multiOmicDataSet object} \item{design}{model formula for experimental design. Columns must exist in \code{meta_dat}.} \item{...}{remaining variables are forwarded to \code{DESeq2::DESeq()}.} } \value{ -reneeDataSet object with DESeq2 slot filled +multiOmicDataSet object with DESeq2 slot filled } \description{ -Run DESeq2 on a reneeDataSet +Run DESeq2 on a multiOmicDataSet } \examples{ \dontrun{ -renee_ds <- create_reneeDataSet_from_files( +moo <- create_multiOmicDataSet_from_files( system.file("extdata", "sample_metadata.tsv.gz", - package = "reneeTools" + package = "MOSuite" ), system.file("extdata", "RSEM.genes.expected_count.all_samples.txt.gz", - package = "reneeTools" + package = "MOSuite" ) ) \%>\% filter_counts() -renee_ds <- run_deseq2(renee_ds, ~condition) +moo <- run_deseq2(moo, ~condition) } } diff --git a/tests/testthat.R b/tests/testthat.R index 4dc4414..109f1dc 100644 --- a/tests/testthat.R +++ b/tests/testthat.R @@ -7,6 +7,6 @@ # * https://testthat.r-lib.org/articles/special-files.html library(testthat) -library(reneeTools) +library(MOSuite) -test_check("reneeTools") +test_check("MOSuite") diff --git a/tests/testthat/test-0_renee-class.R b/tests/testthat/test-0_renee-class.R index 4f6e9d7..337df60 100644 --- a/tests/testthat/test-0_renee-class.R +++ b/tests/testthat/test-0_renee-class.R @@ -1,7 +1,7 @@ -test_that("reneeDataSet from files works", { - rds <- create_reneeDataSet_from_files( - system.file("extdata", "sample_metadata.tsv.gz", package = "reneeTools"), - system.file("extdata", "RSEM.genes.expected_count.all_samples.txt.gz", package = "reneeTools") +test_that("multiOmicDataSet from files works", { + rds <- create_multiOmicDataSet_from_files( + system.file("extdata", "sample_metadata.tsv.gz", package = "MOSuite"), + system.file("extdata", "RSEM.genes.expected_count.all_samples.txt.gz", package = "MOSuite") ) %>% suppressMessages() expect_equal( @@ -30,7 +30,7 @@ test_that("reneeDataSet from files works", { )) }) -test_that("reneeDataSet from data frames detect problems", { +test_that("multiOmicDataSet from data frames detect problems", { sample_meta <- data.frame( sample_id = c("KO_S3", "KO_S4", "WT_S1", "WT_S2"), condition = factor( @@ -39,7 +39,7 @@ test_that("reneeDataSet from data frames detect problems", { ) ) expect_error( - create_reneeDataSet_from_dataframes(sample_meta, gene_counts[, 1:4]), + create_multiOmicDataSet_from_dataframes(sample_meta, gene_counts[, 1:4]), "Not all columns" ) }) diff --git a/tests/testthat/test-counts.R b/tests/testthat/test-counts.R index 1b7318d..010037b 100644 --- a/tests/testthat/test-counts.R +++ b/tests/testthat/test-counts.R @@ -26,18 +26,18 @@ test_that("calc_cpm works on RENEE data", { levels = c("wildtype", "knockout") ) ) - renee_ds <- create_reneeDataSet_from_dataframes( + moo <- create_multiOmicDataSet_from_dataframes( sample_meta, gene_counts %>% dplyr::select(-GeneName) ) - renee_ds <- renee_ds %>% calc_cpm() + moo <- moo %>% calc_cpm() cpm_edger <- gene_counts %>% dplyr::select(-GeneName) %>% counts_dat_to_matrix() %>% edgeR::cpm() %>% as.data.frame() %>% tibble::rownames_to_column("gene_id") - expect_equal(renee_ds@counts$cpm, cpm_edger) + expect_equal(moo@counts$cpm, cpm_edger) }) test_that("calc_cpm_df works on NIDAP data", { diff --git a/tests/testthat/test-deseq2.R b/tests/testthat/test-deseq2.R index 08e8633..7f7fe7a 100644 --- a/tests/testthat/test-deseq2.R +++ b/tests/testthat/test-deseq2.R @@ -1,28 +1,28 @@ set.seed(20231228) -renee_ds <- create_reneeDataSet_from_files( +moo <- create_multiOmicDataSet_from_files( sample_meta_filepath = system.file("extdata", "sample_metadata.tsv.gz", - package = "reneeTools" + package = "MOSuite" ), gene_counts_filepath = system.file( "extdata", "RSEM.genes.expected_count.all_samples.txt.gz", - package = "reneeTools" + package = "MOSuite" ) ) %>% suppressMessages() -renee_ds@sample_meta <- renee_ds@sample_meta %>% +moo@sample_meta <- moo@sample_meta %>% dplyr::mutate(condition = factor(condition, levels = c("wildtype", "knockout") )) test_that("run_deseq2 works", { expect_error( - run_deseq2(renee_ds, design = ~condition), - "renee_ds must contain filtered counts" + run_deseq2(moo, design = ~condition), + "moo must contain filtered counts" ) min_count <- 10 - genes_above_threshold <- renee_ds@counts$raw %>% + genes_above_threshold <- moo@counts$raw %>% tidyr::pivot_longer(!tidyselect::any_of(c("gene_id", "GeneName")), names_to = "sample_id", values_to = "count" ) %>% @@ -30,12 +30,12 @@ test_that("run_deseq2 works", { dplyr::summarize(count_sum = sum(count)) %>% dplyr::filter(count_sum >= min_count) %>% dplyr::pull(gene_id) - renee_ds@counts$filt <- renee_ds@counts$raw %>% + moo@counts$filt <- moo@counts$raw %>% dplyr::filter(gene_id %in% (genes_above_threshold)) - renee_ds <- renee_ds %>% - run_deseq2(renee_ds, design = ~condition, fitType = "local", gene_colname = "gene_id") %>% + moo <- moo %>% + run_deseq2(moo, design = ~condition, fitType = "local", gene_colname = "gene_id") %>% suppressMessages() - dds <- renee_ds@analyses$deseq2_ds + dds <- moo@analyses$deseq2_ds # check colData expect_equal( diff --git a/tests/testthat/test-filter.R b/tests/testthat/test-filter.R index 997d424..cab1a13 100644 --- a/tests/testthat/test-filter.R +++ b/tests/testthat/test-filter.R @@ -9,7 +9,7 @@ equal_dfs <- function(x, y) { test_that("filter_counts reproduces NIDAP results", { set.seed(10) - renee_ds <- create_reneeDataSet_from_dataframes( + moo <- create_multiOmicDataSet_from_dataframes( as.data.frame(nidap_sample_metadata), as.data.frame(nidap_clean_raw_counts), sample_id_colname = "Sample" @@ -19,7 +19,7 @@ test_that("filter_counts reproduces NIDAP results", { sample_names_column = "Sample", gene_names_column = "Gene" ) - rds_counts_filt <- renee_ds@counts$filt %>% + rds_counts_filt <- moo@counts$filt %>% dplyr::arrange(desc(Gene)) nidap_counts_filt <- as.data.frame(nidap_filtered_counts) %>% dplyr::arrange(desc(Gene)) @@ -30,15 +30,15 @@ test_that("filter_counts reproduces NIDAP results", { # TODO get filter_counts() to work on tibbles too, not only dataframes test_that("filter_counts works on RENEE dataset", { - renee_ds <- create_reneeDataSet_from_files( - system.file("extdata", "sample_metadata.tsv.gz", package = "reneeTools"), + moo <- create_multiOmicDataSet_from_files( + system.file("extdata", "sample_metadata.tsv.gz", package = "MOSuite"), system.file( "extdata", "RSEM.genes.expected_count.all_samples.txt.gz", - package = "reneeTools" + package = "MOSuite" ) ) - rds2 <- renee_ds %>% filter_counts( + rds2 <- moo %>% filter_counts( gene_names_column = "gene_id", sample_names_column = "sample_id", group_column = "condition", diff --git a/vignettes/intro.Rmd b/vignettes/intro.Rmd index 65094c2..b07b31a 100644 --- a/vignettes/intro.Rmd +++ b/vignettes/intro.Rmd @@ -15,7 +15,7 @@ knitr::opts_chunk$set( ``` ```{r setup} -library(reneeTools) +library(MOSuite) library(dplyr) ``` @@ -23,19 +23,19 @@ library(dplyr) # replace these lines with the actual paths to your files gene_counts_tsv <- system.file("extdata", "RSEM.genes.expected_count.all_samples.txt.gz", - package = "reneeTools" + package = "MOSuite" ) metadata_tsv <- system.file("extdata", "sample_metadata.tsv.gz", - package = "reneeTools" + package = "MOSuite" ) -# create reneeDataSet object -renee_ds <- create_reneeDataSet_from_files( +# create multi-omic object +moo <- create_multiOmicDataSet_from_files( sample_meta_filepath = metadata_tsv, gene_counts_filepath = gene_counts_tsv ) -renee_ds <- renee_ds %>% filter_counts( +moo <- moo %>% filter_counts( group_column = "condition", label_column = "sample_id", columns_to_include = c("gene_id", "KO_S3", "KO_S4", "WT_S1", "WT_S2"), @@ -44,5 +44,5 @@ renee_ds <- renee_ds %>% filter_counts( minimum_number_of_samples_with_nonzero_counts_in_a_group = 1, ) -renee_ds@counts$filt %>% head() +moo@counts$filt %>% head() ``` diff --git a/vignettes/memory.Rmd b/vignettes/memory.Rmd index 68d4033..32ed312 100644 --- a/vignettes/memory.Rmd +++ b/vignettes/memory.Rmd @@ -25,7 +25,7 @@ library(dplyr) library(ggplot2) library(glue) library(readr) -library(reneeTools) +library(MOSuite) ``` Dataset from: @@ -33,18 +33,18 @@ Dataset from: ```{r data} counts <- read_tsv(system.file( "extdata", "LIHC_HTseqCounts.txt.gz", - package = "reneeTools" + package = "MOSuite" )) %>% rename(gene_id = Gene) metadat <- read_tsv(system.file( "extdata", "LIHC_PatientData.txt.gz", - package = "reneeTools" + package = "MOSuite" )) %>% select(-sample_id) %>% rename(sample_id = barcode) -renee_ds <- create_reneeDataSet_from_dataframes(metadat, counts) %>% +moo <- create_multiOmicDataSet_from_dataframes(metadat, counts) %>% calc_cpm() -renee_ds@counts$filt <- renee_ds@counts$raw +moo@counts$filt <- moo@counts$raw # filter_counts( # sample_names_column = "sample_id", # gene_names_column = "gene_id", @@ -57,21 +57,21 @@ renee_ds@counts$filt <- renee_ds@counts$raw ```{r subset} -subset_data <- function(renee_ds, nsamples) { - renee_ds_subset <- renee_ds - renee_ds_subset@sample_meta <- renee_ds@sample_meta %>% slice_sample(n = nsamples) - for (count_type in names(renee_ds@counts)) { - renee_ds_subset@counts[[count_type]] <- renee_ds@counts[[count_type]] %>% - select(gene_id, all_of(renee_ds_subset@sample_meta$sample_id)) +subset_data <- function(moo, nsamples) { + moo_subset <- moo + moo_subset@sample_meta <- moo@sample_meta %>% slice_sample(n = nsamples) + for (count_type in names(moo@counts)) { + moo_subset@counts[[count_type]] <- moo@counts[[count_type]] %>% + select(gene_id, all_of(moo_subset@sample_meta$sample_id)) } - return(renee_ds_subset) + return(moo_subset) } -subset_mem <- lapply(c(10, 50, 100, 200, nrow(renee_ds@sample_meta)), function(nsamples) { - renee_ds_subset <- subset_data(renee_ds, nsamples) +subset_mem <- lapply(c(10, 50, 100, 200, nrow(moo@sample_meta)), function(nsamples) { + moo_subset <- subset_data(moo, nsamples) return(tibble( n_samples = nsamples, - object_size = lobstr::obj_size(renee_ds_subset) + object_size = lobstr::obj_size(moo_subset) )) }) %>% bind_rows() @@ -117,7 +117,7 @@ subset_mem %>% caption = glue( "The object contains sample metadata and count data as raw, CPM-transformed, and filtered counts.\n", "Each dataset has the same number of genes: ", - format(length(renee_ds@counts$raw$gene_id), big.mark = ",") + format(length(moo@counts$raw$gene_id), big.mark = ",") ) ) + theme_bw() + From 1214d99ea044ff10392fd306d78e3e0b6a6f7209 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 19 Nov 2024 15:43:21 +0000 Subject: [PATCH 2/5] chore: update CITATION.cff --- CITATION.cff | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CITATION.cff b/CITATION.cff index 4d30967..e28f0e6 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -7,7 +7,7 @@ cff-version: 1.2.0 message: 'To cite package "MOSuite" in publications use:' type: software license: MIT -title: 'MOSuite: Helper functions for RENEE' +title: 'MOSuite: R package for downstream multi-omics analysis' version: 0.0.0.9000 abstract: TODO What the package does (one paragraph). authors: @@ -320,7 +320,7 @@ references: title: S7 abstract: 'S7: An Object Oriented System Meant to Become a Successor to S3 and S4' notes: Imports - url: https://github.com/rconsortium/S7/ + url: https://rconsortium.github.io/S7/ repository: https://CRAN.R-project.org/package=S7 authors: - family-names: Vaughan From beeba2cdeb9e7b05d694c9b6af079cf380e419dc Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Tue, 19 Nov 2024 10:52:11 -0500 Subject: [PATCH 3/5] chore: link to PR in news --- .Rbuildignore | 2 +- NEWS.md | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/.Rbuildignore b/.Rbuildignore index 5b5c7d2..245a301 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -1,4 +1,4 @@ -^reneeTools\.Rproj$ +^MOSuite\.Rproj$ ^\.Rproj\.user$ ^LICENSE\.md$ ^_pkgdown\.yml$ diff --git a/NEWS.md b/NEWS.md index bcbabc2..95e6a50 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,7 +2,8 @@ This is the first release of MOSuite 🎉 -- Create a `NEWS.md` file to track changes to the package. +- Note: at the start of development, this package was called reneeTools. + Later it was renamed to MOSuite. (#76, @kelly-sovacool) ## Main functions & classes @@ -12,6 +13,3 @@ This is the first release of MOSuite 🎉 - `run_deseq2()` - `calc_cpm()` (#38, @kelly-sovacool) - `filter_counts()` (#38, @kelly-sovacool) - -Note: at the start of development, this package was called reneeTools. -Later it was renamed to MOSuite. () From 34e8757a183d90b4ce54d71905faf2fdf47d441f Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Tue, 19 Nov 2024 10:55:28 -0500 Subject: [PATCH 4/5] docs: add Phil to authors list Co-authored-by: Philip Homan --- DESCRIPTION | 7 ++++--- _pkgdown.yml | 6 ++++-- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index b934b81..641b64e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -2,10 +2,11 @@ Package: MOSuite Title: R package for downstream multi-omics analysis Version: 0.0.0.9000 Authors@R: c( - person("Vishal", "Koparde", , "vishal.koparde@nih.gov", role = c("aut", "cre"), - comment = c(ORCID = "0000-0001-8978-8495")), - person("Kelly", "Sovacool", , "kelly.sovacool@nih.gov", role = "aut", + person("Kelly", "Sovacool", , "kelly.sovacool@nih.gov", role = c("aut", "cre"), comment = c(ORCID = "0000-0003-3283-829X")), + person("Philip", "Homan", , "philip.homan@nih.gov", role = "aut"), + person("Vishal", "Koparde", , "vishal.koparde@nih.gov", role = "aut", + comment = c(ORCID = "0000-0001-8978-8495")), person("Samantha", "Chill", , "samantha.chill@nih.gov", role = "aut", comment = c(ORCID = "0000-0002-8734-9875")), person("CCR Collaborative Bioinformatics Resource", role = "cph") diff --git a/_pkgdown.yml b/_pkgdown.yml index 52a8943..2044c50 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -5,10 +5,12 @@ template: development: mode: auto authors: - Vishal Koparde: - href: "https://github.com/kopardev" Kelly Sovacool: href: "https://github.com/kelly-sovacool" + Philip Homan: + href: "https://github.com/phoman14" + Vishal Koparde: + href: "https://github.com/kopardev" CCR Collaborative Bioinformatics Resource: href: "https://github.com/CCBR" footer: From 4759a1c4691bb408c2ce3e1bf8e454cc67e4033b Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 19 Nov 2024 15:57:43 +0000 Subject: [PATCH 5/5] chore: update CITATION.cff --- CITATION.cff | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/CITATION.cff b/CITATION.cff index e28f0e6..fd588d3 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -11,14 +11,17 @@ title: 'MOSuite: R package for downstream multi-omics analysis' version: 0.0.0.9000 abstract: TODO What the package does (one paragraph). authors: -- family-names: Koparde - given-names: Vishal - email: vishal.koparde@nih.gov - orcid: https://orcid.org/0000-0001-8978-8495 - family-names: Sovacool given-names: Kelly email: kelly.sovacool@nih.gov orcid: https://orcid.org/0000-0003-3283-829X +- family-names: Homan + given-names: Philip + email: philip.homan@nih.gov +- family-names: Koparde + given-names: Vishal + email: vishal.koparde@nih.gov + orcid: https://orcid.org/0000-0001-8978-8495 - family-names: Chill given-names: Samantha email: samantha.chill@nih.gov @@ -26,10 +29,10 @@ authors: repository-code: https://github.com/CCBR/MOSuite url: https://ccbr.github.io/MOSuite/ contact: -- family-names: Koparde - given-names: Vishal - email: vishal.koparde@nih.gov - orcid: https://orcid.org/0000-0001-8978-8495 +- family-names: Sovacool + given-names: Kelly + email: kelly.sovacool@nih.gov + orcid: https://orcid.org/0000-0003-3283-829X references: - type: software title: 'R: A Language and Environment for Statistical Computing'