diff --git a/.Rbuildignore b/.Rbuildignore index be665f6..1e97b27 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -3,4 +3,8 @@ ^\.travis\.yml$ ^_pkgdown.yaml ^docs -^update_examples.sh \ No newline at end of file +^update_examples.sh +^doc$ +^Meta$ +^\.github$ +long_vignettes \ No newline at end of file diff --git a/.github/.gitignore b/.github/.gitignore new file mode 100644 index 0000000..2d19fc7 --- /dev/null +++ b/.github/.gitignore @@ -0,0 +1 @@ +*.html diff --git a/.github/workflows/check-bioc.yml b/.github/workflows/check-bioc.yml new file mode 100644 index 0000000..06dfeb8 --- /dev/null +++ b/.github/workflows/check-bioc.yml @@ -0,0 +1,249 @@ +## Read more about GitHub actions the features of this GitHub Actions workflow +## at https://lcolladotor.github.io/biocthis/articles/biocthis.html#use_bioc_github_action +## +## For more details, check the biocthis developer notes vignette at +## https://lcolladotor.github.io/biocthis/articles/biocthis_dev_notes.html +## +## You can add this workflow to other packages using: +## > biocthis::use_bioc_github_action() +## +## Using GitHub Actions exposes you to many details about how R packages are +## compiled and installed in several operating system.s +### If you need help, please follow the steps listed at +## https://github.com/r-lib/actions#where-to-find-help +## +## If you found an issue specific to biocthis's GHA workflow, please report it +## with the information that will make it easier for others to help you. +## Thank you! + +## Acronyms: +## * GHA: GitHub Action +## * OS: operating system + +on: + push: + branches: [master, dev] + pull_request: + branches: [master, dev] + +name: R-CMD-check-bioc + +## These environment variables control whether to run GHA code later on that is +## specific to testthat, covr, and pkgdown. +## +## If you need to clear the cache of packages, update the number inside +## cache-version as discussed at https://github.com/r-lib/actions/issues/86. +## Note that you can always run a GHA test without the cache by using the word +## "/nocache" in the commit message. +env: + has_testthat: 'false' + run_covr: 'false' + run_pkgdown: 'false' + has_RUnit: 'false' + cache-version: 'cache-v1' + +jobs: + build-check: + runs-on: ${{ matrix.config.os }} + name: ${{ matrix.config.os }} (${{ matrix.config.r }}) + container: ${{ matrix.config.cont }} + ## Environment variables unique to this job. + + strategy: + fail-fast: false + matrix: + config: + - { os: ubuntu-latest, r: '4.0.2', cont: "bioconductor/bioconductor_docker:RELEASE_3_11", rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest" } + - { os: macOS-latest, r: '4.0.2'} + env: + R_REMOTES_NO_ERRORS_FROM_WARNINGS: true + RSPM: ${{ matrix.config.rspm }} + NOT_CRAN: true + TZ: UTC + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + + steps: + + ## Set the R library to the directory matching the + ## R packages cache step further below when running on Docker (Linux). + - name: Set R Library home on Linux + if: runner.os == 'Linux' + run: | + mkdir /__w/_temp/Library + echo ".libPaths('/__w/_temp/Library')" > ~/.Rprofile + + ## Most of these steps are the same as the ones in + ## https://github.com/r-lib/actions/blob/master/examples/check-standard.yaml + ## If they update their steps, we will also need to update ours. + - name: Checkout Repository + uses: actions/checkout@v2 + + ## R is already included in the Bioconductor docker images + - name: Setup R from r-lib + if: runner.os != 'Linux' + uses: r-lib/actions/setup-r@master + with: + r-version: ${{ matrix.config.r }} + + ## pandoc is already included in the Bioconductor docker images + - name: Setup pandoc from r-lib + if: runner.os != 'Linux' + uses: r-lib/actions/setup-pandoc@master + + - name: Query dependencies + run: | + install.packages('remotes') + saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2) + shell: Rscript {0} + + - name: Cache R packages + if: "!contains(github.event.head_commit.message, '/nocache') && runner.os != 'Linux'" + uses: actions/cache@v1 + with: + path: ${{ env.R_LIBS_USER }} + key: ${{ env.cache-version }}-${{ runner.os }}-biocversion-RELEASE_3_11-r-4.0.2-${{ hashFiles('.github/depends.Rds') }} + restore-keys: ${{ env.cache-version }}-${{ runner.os }}-biocversion-RELEASE_3_11-r-4.0.2- + + - name: Cache R packages on Linux + if: "!contains(github.event.head_commit.message, '/nocache') && runner.os == 'Linux' " + uses: actions/cache@v1 + with: + path: /home/runner/work/_temp/Library + key: ${{ env.cache-version }}-${{ runner.os }}-biocversion-RELEASE_3_11-r-4.0.2-${{ hashFiles('.github/depends.Rds') }} + restore-keys: ${{ env.cache-version }}-${{ runner.os }}-biocversion-RELEASE_3_11-r-4.0.2- + + - name: Install Linux system dependencies + if: runner.os == 'Linux' + env: + RHUB_PLATFORM: linux-x86_64-ubuntu-gcc + run: | + Rscript -e "remotes::install_github('r-hub/sysreqs')" + sysreqs=$(Rscript -e "cat(sysreqs::sysreq_commands('DESCRIPTION'))") + sudo -s eval "$sysreqs" + + - name: Install macOS system dependencies + if: matrix.config.os == 'macOS-latest' + run: | + ## Enable installing XML from source if needed + brew install libxml2 + echo "::set-env name=XML_CONFIG::/usr/local/opt/libxml2/bin/xml2-config" + + ## Required to install magick as noted at + ## https://github.com/r-lib/usethis/commit/f1f1e0d10c1ebc75fd4c18fa7e2de4551fd9978f#diff-9bfee71065492f63457918efcd912cf2 + brew install imagemagick@6 + + - name: Install Windows system dependencies + if: runner.os == 'Windows' + run: | + ## Edit below if you have any Windows system dependencies + shell: Rscript {0} + + - name: Install BiocManager + run: | + message(paste('****', Sys.time(), 'installing BiocManager ****')) + remotes::install_cran("BiocManager") + shell: Rscript {0} + + - name: Set BiocVersion + run: | + BiocManager::install(version = "3.11", ask = FALSE) + shell: Rscript {0} + + - name: Install dependencies + run: | + ## Try installing the package dependencies in steps. First the local + ## dependencies, then any remaining dependencies to avoid the + ## issues described at + ## https://stat.ethz.ch/pipermail/bioc-devel/2020-April/016675.html + ## https://github.com/r-lib/remotes/issues/296 + ## Ideally, all dependencies should get installed in the first pass. + ## Pass #1 at installing dependencies + message(paste('****', Sys.time(), 'pass number 1 at installing dependencies: local dependencies ****')) + remotes::install_local(dependencies = TRUE, repos = BiocManager::repositories(), build_vignettes = TRUE, upgrade = TRUE) + + ## Pass #2 at installing dependencies + message(paste('****', Sys.time(), 'pass number 2 at installing dependencies: any remaining dependencies ****')) + remotes::install_local(dependencies = TRUE, repos = BiocManager::repositories(), build_vignettes = TRUE, upgrade = TRUE) + + ## For running the checks + message(paste('****', Sys.time(), 'installing rcmdcheck and BiocCheck ****')) + remotes::install_cran("rcmdcheck") + BiocManager::install("BiocCheck") + shell: Rscript {0} + + - name: Install BiocGenerics + if: env.has_RUnit == 'true' + run: | + ## Install BiocGenerics + BiocManager::install("BiocGenerics") + shell: Rscript {0} + + - name: Install covr + if: github.ref == 'refs/heads/master' && env.run_covr == 'true' && runner.os == 'Linux' + run: | + remotes::install_cran("covr") + shell: Rscript {0} + + - name: Install pkgdown + if: github.ref == 'refs/heads/master' && env.run_pkgdown == 'true' && runner.os == 'Linux' + run: | + remotes::install_cran("pkgdown") + shell: Rscript {0} + + - name: Session info + run: | + options(width = 100) + pkgs <- installed.packages()[, "Package"] + sessioninfo::session_info(pkgs, include_base = TRUE) + shell: Rscript {0} + + - name: Run CMD check + env: + _R_CHECK_CRAN_INCOMING_: false + run: | + rcmdcheck::rcmdcheck( + args = c("--no-build-vignettes", "--no-manual", "--timings"), + build_args = c("--no-manual", "--no-resave-data"), + error_on = "warning", + check_dir = "check" + ) + shell: Rscript {0} + + ## Might need an to add this to the if: && runner.os == 'Linux' + - name: Reveal testthat details + if: env.has_testthat == 'true' + run: find . -name testthat.Rout -exec cat '{}' ';' + + - name: Run RUnit tests + if: env.has_RUnit == 'true' + run: | + BiocGenerics:::testPackage() + shell: Rscript {0} + + - name: Run BiocCheck + run: | + BiocCheck::BiocCheck( + dir('check', 'tar.gz$', full.names = TRUE), + `quit-with-status` = TRUE, + `no-check-R-ver` = TRUE, + `no-check-bioc-help` = TRUE + ) + shell: Rscript {0} + + - name: Test coverage + if: github.ref == 'refs/heads/master' && env.run_covr == 'true' && runner.os == 'Linux' + run: | + covr::codecov() + shell: Rscript {0} + + - name: Install package + if: github.ref == 'refs/heads/master' && env.run_pkgdown == 'true' && runner.os == 'Linux' + run: R CMD INSTALL . + + - name: Upload check results + if: failure() + uses: actions/upload-artifact@master + with: + name: ${{ runner.os }}-biocversion-RELEASE_3_11-r-4.0.2-results + path: check diff --git a/.gitignore b/.gitignore index beeb841..8e4f40f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ + inst/doc .Rproj.user .Rhistory @@ -5,3 +6,5 @@ inst/doc .DS_Store docs *.RProj +doc +Meta \ No newline at end of file diff --git a/.travis.yml b/.travis.yml index 8cbe59f..0175711 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,13 +1,14 @@ -language: R -sudo: false +language: r +branches: + only: + - master + - dev +r: bioc-release +os: + - linux cache: packages warnings_are_errors: false r_packages: devtools -r_github_packages: - - pepkit/pepr +r_github_packages: - databio/simpleCache -bioc_packages: - - GenomicRanges - - BiocStyle - - BiocFileCache - + - pepkit/pepr \ No newline at end of file diff --git a/DESCRIPTION b/DESCRIPTION index 29ec4c3..6fd5380 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,23 +1,18 @@ Package: BiocProject Title: Bioconductor Management with Portable Encapsulated Project (PEP) Objects -Version: 0.2 +Version: 0.2.1 Authors@R: c(person("Michal", "Stolarczyk", email = "mjs5kd@virginia.edu",role = c("aut", "cre")), person("Nathan", "Sheffield", email = "nathan@code.databio.org",role = c("aut"))) Description: A Bioconductor-oriented project management class. It wraps the generic pepr R package for project metadata. BiocProject allows you to read in project metadata and data for an entire project with a single line of R code. -License: GPL-3 +License: BSD_2_clause + file LICENSE Encoding: UTF-8 LazyData: true Depends: S4Vectors, pepr, methods -Suggests: - knitr, - rmarkdown, - testthat, - yaml -Enhances: BiocFileCache, simpleCache, GenomicRanges +Suggests: testthat, yaml, BiocFileCache, simpleCache, GenomicRanges, knitr, BiocStyle, rmarkdown biocViews: DataImport, DataRepresentation -RoxygenNote: 6.1.1 +RoxygenNote: 7.1.1 URL: https://github.com/pepkit/BiocProject BugReports: https://github.com/pepkit/BiocProject VignetteBuilder: knitr diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..1097411 --- /dev/null +++ b/LICENSE @@ -0,0 +1,2 @@ +YEAR: 2018 +COPYRIGHT HOLDER: Nathan Sheffield \ No newline at end of file diff --git a/NAMESPACE b/NAMESPACE index cbc618c..9dc4c23 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,12 +2,12 @@ export(.insertPEP) export(.setShowMethod) -export(.updateList) +export(.unionList) export(BiocProject) exportMethods(config) exportMethods(getProject) exportMethods(is) -exportMethods(samples) +exportMethods(sampleTable) import(S4Vectors) import(methods) import(pepr) diff --git a/NEWS.md b/NEWS.md index d84d12e..5c9c9d5 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,13 @@ + +# BiocProject 0.2.1 - 2019-10-15 + +## Added +* added vignette: "Using BiocProject with tximeta" + +## Changed +* `subproject` argument to `amendments` in `BiocProject` function +* `Project` is not added as a first element in `Annotated@metadata`, it is appended to any existing ones + # BiocProject 0.2 - 2019-04-19 ## Added @@ -71,4 +81,4 @@ * make `BiocProject` class inherit from `pepr::Project` and `base::list` * the `initialize` method can read in the data with the provided `func` -* the object constructor does not fail if the `pepr::Project` object is provided in the `funcArgs` arguments list \ No newline at end of file +* the object constructor does not fail if the `pepr::Project` object is provided in the `funcArgs` arguments list diff --git a/R/constants.R b/R/constants.R index 8972cc9..50025a1 100644 --- a/R/constants.R +++ b/R/constants.R @@ -1,14 +1,10 @@ -# config section names -# -# The YAML file looks like this: -# -# MAIN_SECTION: -# FUNCTION_NAME: -# FUNCTION_PATH: -# FUNCTION_ARGS: -# : -# : -MAIN_SECTION = "bioconductor" +# config section names The YAML +# file looks like this: +# MAIN_SECTION: FUNCTION_NAME: +# FUNCTION_PATH: +# FUNCTION_ARGS: : +# : +BIOC_SECTION = "bioconductor" FUNCTION_ARGS = "funcArgs" FUNCTION_PATH = "readFunPath" -FUNCTION_NAME = "readFunName" +FUNCTION_NAME = "readFunName" \ No newline at end of file diff --git a/R/functions.R b/R/functions.R index a8a1de1..40a4716 100644 --- a/R/functions.R +++ b/R/functions.R @@ -1,4 +1,3 @@ - #' Portable Encapsulated Project (PEP) for biological applications #' #' This function creates a \code{\link[pepr]{Project-class}} object, @@ -32,7 +31,7 @@ #' The custom data processing function must take #' the \code{\link[pepr]{Project-class}} as an argument since this object will #' be passed to the function by default. However, if the function requires -#' addtional arguments, ones can be provided with the \code{funcArgs} argument +#' additional arguments, ones can be provided with the \code{funcArgs} argument #' in the \code{\link{BiocProject}} function call. #' Besides, the \code{func} argument with the anonymous #' function may serve similar possibility. @@ -41,17 +40,18 @@ #' If the \code{autoLoad} is set to \code{FALSE} the data will not be loaded #' and empty \code{\link[pepr]{Project-class}} object will be returned. #' +#' @note The \code{bioconductor} section can be read from the project config +#' file or pipeline interface. The former is given the priority #' #' @section Further reading: #' Browse the #' \href{http://code.databio.org/BiocProject/articles/index.html}{\code{BiocProject} package vignettes} #' for more detailed explanation with examples. #' -#' #' @param file a character vector with a path to the PEP config file -#' @param subproject a character vector with a name of the subproject +#' @param amendments a character vector with a name of the amendments #' to be activated -#' @param func a anonymous function that reads and/or processess the data, +#' @param func a anonymous function that reads and/or processes the data, #' it must take #' the \code{\link[pepr]{Project-class}} as an argument. #' See \code{Details} for more information @@ -81,27 +81,30 @@ #' @seealso \url{https://pepkit.github.io/} #' @import pepr #' @export BiocProject -BiocProject = function(file, subproject = NULL, autoLoad = TRUE, func = NULL, - funcArgs = NULL) { - p = pepr::Project(file=file, subproject=subproject) +BiocProject = function(file, amendments = NULL, autoLoad = TRUE, func = NULL, + funcArgs = NULL) { + p = pepr::Project(file=file, amendments = amendments) # prevent PEP (Project object) input. This prevents BiocProject object # failing when the user provides the Project object if(is.null(funcArgs)){ funcArgs = list() }else{ - if (length(.findProjectInList(funcArgs)) > 0) + if (length(.findProjectInList(funcArgs)) > 0) { + warning("Project object was found in the arguments list. + It will be removed.") funcArgs = funcArgs[-.findProjectInList(funcArgs)] + } } args = append(list(p), funcArgs) - if(pepr::checkSection(pepr::config(p), c(MAIN_SECTION, FUNCTION_ARGS))){ - args = .updateList(config(p)[[MAIN_SECTION]][[FUNCTION_ARGS]],args) + cfg = pepr::config(p) + if(pepr::.checkSection(cfg, c(BIOC_SECTION, FUNCTION_ARGS))){ + args = .unionList(config(p)[[BIOC_SECTION]][[FUNCTION_ARGS]],args) argsNames = names(args) project = args[[.findProjectInList(args)]] argsNames = append("",argsNames[-.findProjectInList(args)]) args = append(list(p), args[[-.findProjectInList(args)]]) names(args) = argsNames } - if (!is.null(func)) { # use the anonymous function if provided if (is.function(func)) { @@ -113,17 +116,17 @@ BiocProject = function(file, subproject = NULL, autoLoad = TRUE, func = NULL, } }else{ # use config to find it - if(!is.logical(autoLoad)) stop("'autoLoad' argument has to be a logical, - got '", class(autoLoad),"'") + if(!is.logical(autoLoad)) stop("'autoLoad' argument has to be a", + " logical, got '", class(autoLoad),"'") if (autoLoad) { - # check if the config consists of MAIN_SECTION section - if(!pepr::checkSection(pepr::config(p), MAIN_SECTION)){ + # check if the config consists of BIOC_SECTION section + if(!pepr::.checkSection(cfg, BIOC_SECTION)){ message("No data was read. Returning a Project object") warning("The config YAML is missing the '", - MAIN_SECTION,"' section.") + BIOC_SECTION,"' section.") return(p) } - funcName = pepr::config(p)[[MAIN_SECTION]][[FUNCTION_NAME]] + funcName = cfg[[BIOC_SECTION]][[FUNCTION_NAME]] # check if the function name was provided # and if it exists in the environment if (!is.null(funcName) && exists(funcName)) { @@ -132,7 +135,8 @@ BiocProject = function(file, subproject = NULL, autoLoad = TRUE, func = NULL, message("Used function '", funcName, "' from the environment") return(.insertPEP(readData, p)) }else{ - if (!is.null(funcName) && length(grep("(\\:){2,3}", funcName)) != 0) { + if (!is.null(funcName) && + length(grep("(\\:){2,3}", funcName)) != 0) { # trying to access the function from the namespace that # was specified in the config.yaml FUNCTION_NAME splitted = strsplit(funcName, ":")[[1]] @@ -147,16 +151,13 @@ BiocProject = function(file, subproject = NULL, autoLoad = TRUE, func = NULL, # trying to source the file specified # in the config.yaml FUNCTION_PATH funcPath = pepr::.expandPath( - pepr::config(p)[[MAIN_SECTION]][[FUNCTION_PATH]]) + cfg[[BIOC_SECTION]][[FUNCTION_PATH]]) if (!is.null(funcPath)){ if (!file.exists(funcPath)) - funcPath = .makeAbsPath(funcPath,dirname(p@file)) + funcPath = .makeAbsPath(funcPath, dirname(p@file)) if (!file.exists(funcPath)) - stop( - "The function does not exist in the environment and file '", - funcPath, - "' does not exist" - ) + stop("The function does not exist in the environment", + " and file '", funcPath, "' does not exist") # Load the sourced objects into a new environment, # so they are not in the .GlobalEnv after the BiocProject # function execution @@ -170,19 +171,21 @@ BiocProject = function(file, subproject = NULL, autoLoad = TRUE, func = NULL, # check again for the specified funcion name, maybe it is # defined in the file which was just sourced if (!is.null(funcName) && exists(funcName, where=e)) { - message("Function '", funcName,"' read from file '", funcPath, "'") + message("Function '", funcName,"' read from file '", + funcPath, "'") readData = .callBiocFun( getFunction(funcName, where=e,mustFind=TRUE), args) return(.insertPEP(readData, p)) } # the function indicated in FUNCTION_NAME was not found, # use the last one in FUNCTION_PATH - message("Multiple functions found in '", funcPath, "'. Using the last one.") + message("Multiple functions found in '", funcPath, + "'. Using the last one.") readData = .callBiocFun(lastFun, args) return(.insertPEP(readData, p)) }else{ - warning("Can't find function in the environment and the value for '" - , FUNCTION_PATH, + warning("Can't find function in the environment and the", + " value for '" , FUNCTION_PATH, "' key was not provided in the config YAML.") message("No data was read. Returning a Project object") return(p) @@ -193,54 +196,4 @@ BiocProject = function(file, subproject = NULL, autoLoad = TRUE, func = NULL, return(p) } } -} - -#' Insert a PEP metadata in a metadata slot of Annotated -#' -#' This function inserts the PEP (\code{\link[pepr]{Project-class}}) -#' into the metadata slot of objects that -#' extend the \code{\link[S4Vectors]{Annotated-class}} -#' -#' Additionally, if the object extends the -#' \code{\link[S4Vectors]{Annotated-class}} (or is a list that will be -#' automatically converted to a \code{\link[S4Vectors]{List}}) the show method -#' for its class is redefined to display the \code{\link[pepr]{Project-class}} -#' as the metadata. -#' -#' @param object an object of \code{\link[S4Vectors]{Annotated-class}} -#' @param pep an object of class \code{\link[pepr]{Project-class}} -#' -#' @return an object of the same class as the object argument but enriched -#' with the metadata from the pep argument -#' -#' @examples -#' # If the object is of class Annotated -#' object = S4Vectors::List(result="test") -#' result = .insertPEP(object, pepr::Project()) -#' metadata(result) -#' -#' # If the object is not of class Annotated -#' object1 = "test" -#' result1 = .insertPEP(object1, pepr::Project()) -#' metadata(result1) -#' @import S4Vectors methods -#' @export -.insertPEP = function(object, pep) { - if(!methods::is(pep, "Project")) - stop("the pep argument has to be of class 'Project', - got '", class(pep),"'") - # do we throw a warning/message saying what happens in the next line? - if(methods::is(object, "list")) - object = S4Vectors::List(object) - if(methods::is(object, "Annotated")){ - S4Vectors::metadata(object) = list(PEP=pep) - } else{ - warning("BiocProject expects data loading functions to return an 'Annotated' object, but your function returned a '", - class(object),"' object. To use an Annotated, this returned object has been placed in the first slot of a List") - result = S4Vectors::List(result=object) - S4Vectors::metadata(result) = list(PEP=pep) - object = result - } - .setShowMethod(object) - object -} +} \ No newline at end of file diff --git a/R/methods_Annotated.R b/R/methods_Annotated.R index 5b9c825..42de465 100644 --- a/R/methods_Annotated.R +++ b/R/methods_Annotated.R @@ -1,16 +1,15 @@ -setGeneric(".is.project", function(.Object) - standardGeneric(".is.project")) - -setMethod(".is.project","Annotated",function(.Object){ - mData = S4Vectors::metadata(.Object) - result = tryCatch(expr = { - mData[[1]] - }, error = function(e){ - FALSE - }) - is(result,"Project") -}) +setGeneric(".is.project", function(.Object) standardGeneric(".is.project")) +setMethod(".is.project", "Annotated", + function(.Object) { + mData = S4Vectors::metadata(.Object) + result = tryCatch(expr = { + mData$PEP + }, error = function(e) { + FALSE + }) + is(result, "Project") + }) #' Extract the object of \code{\link[pepr]{Project-class}} from #' the \code{\link[S4Vectors]{Annotated-class}} #' @@ -22,25 +21,26 @@ setMethod(".is.project","Annotated",function(.Object){ #' @return an object of \code{\link[pepr]{Project-class}} #' #' @examples -#' projectConfig = system.file("extdata", "example_peps-master", -#' "example_BiocProject", "project_config.yaml", package="BiocProject") +#' projectConfig = system.file('extdata', 'example_peps-master', +#' 'example_BiocProject', 'project_config.yaml', package='BiocProject') #' p=BiocProject(projectConfig) #' getProject(p) #' #' @import S4Vectors #' @exportMethod getProject -setGeneric("getProject", function(.Object) - standardGeneric("getProject")) - -#' @describeIn getProject extracts \code{\link[pepr]{Project-class}} from the \code{\link[S4Vectors]{Annotated-class}} -setMethod("getProject","Annotated",function(.Object){ - if(.is.project(.Object)) { - S4Vectors::metadata(.Object)[[1]] - } else { - stop("This object does not have PEP in the metadata slot.") - } -}) +setGeneric("getProject", function(.Object) standardGeneric("getProject")) +#' @describeIn getProject extracts \code{\link[pepr]{Project-class}} +#' from the \code{\link[S4Vectors]{Annotated-class}} +setMethod("getProject", "Annotated", + function(.Object) { + if (.is.project(.Object)) { + S4Vectors::metadata(.Object)$PEP + } else { + stop(.Object) + stop("This object does not have PEP in the metadata slot.") + } + }) #' View samples in the objects of \code{\link[pepr]{Project-class}} #' #' This method can be used to view the samples slot @@ -51,19 +51,16 @@ setMethod("getProject","Annotated",function(.Object){ #' #' @return a data.table with the with metadata about samples #' @examples -#' projectConfig = system.file("extdata", "example_peps-master", -#' "example_BiocProject", "project_config.yaml", package="BiocProject") +#' projectConfig = system.file('extdata', 'example_peps-master', +#' 'example_BiocProject', 'project_config.yaml', package='BiocProject') #' p=BiocProject(projectConfig) -#' samples(p) +#' sampleTable(p) #' @import pepr #' @export -setMethod( - f = "samples", - signature = "Annotated", - definition = function(object) { - pepr::samples(getProject(object)) - }) - +setMethod(f = "sampleTable", signature = "Annotated", + definition = function(object) { + pepr::sampleTable(getProject(object)) + }) #' View PEP config of the object of \code{\link[pepr]{Project-class}} #' @@ -76,27 +73,24 @@ setMethod( #' @return a list with the config file #' #' @examples -#' projectConfig = system.file("extdata", "example_peps-master", -#' "example_BiocProject", "project_config.yaml", package="BiocProject") +#' projectConfig = system.file('extdata', 'example_peps-master', +#' 'example_BiocProject', 'project_config.yaml', package='BiocProject') #' p=BiocProject(projectConfig) #' config(p) #' #' @import pepr #' @export -setMethod( - f = "config", - signature = "Annotated", - definition = function(object) { - pepr::config(getProject(object)) - }) - +setMethod(f = "config", signature = "Annotated", + definition = function(object) { + pepr::config(getProject(object)) + }) setGeneric("is", package = "methods") #' Is an Object from a Class? #' #' Functions to test inheritance relationships between an object and a class #' or between two classes. It uses the generic is function but overrides its -#' behavior for obejcts of class \code{\link[S4Vectors]{Annotated-class}} when +#' behavior for objects of class \code{\link[S4Vectors]{Annotated-class}} when #' testing for inheritance from \code{\link[pepr]{Project-class}} class. #' #' see the \code{\link[methods]{is}} for more details @@ -106,16 +100,17 @@ setGeneric("is", package = "methods") #' #' @return a logical #' @examples -#' object = S4Vectors::List(test="test") -#' is(object,"Annotated") +#' object = S4Vectors::List(test='test') +#' is(object,'Annotated') #' #' @import methods #' @export -setMethod("is", "Annotated", definition = function(object, class2){ - if(class2=="Project" & .is.project(object)){ +setMethod("is", "Annotated", definition = function(object, + class2) { + if (class2 == "Project" & .is.project(object)) { TRUE } else { - methods::extends(class(object), class2) + methods::extends(class(object), + class2) } }) - diff --git a/R/utils.R b/R/utils.R index cad06bf..9be2ff8 100644 --- a/R/utils.R +++ b/R/utils.R @@ -18,24 +18,92 @@ message(rep("-", n), "\n") } + +#' Insert a PEP metadata in a metadata slot of Annotated +#' +#' This function inserts the PEP (\code{\link[pepr]{Project-class}}) +#' into the metadata slot of objects that +#' extend the \code{\link[S4Vectors]{Annotated-class}} +#' +#' Additionally, if the object extends the +#' \code{\link[S4Vectors]{Annotated-class}} (or is a list that will be +#' automatically converted to a \code{\link[S4Vectors]{List}}) the show method +#' for its class is redefined to display the \code{\link[pepr]{Project-class}} +#' as the metadata. +#' +#' @param object an object of \code{\link[S4Vectors]{Annotated-class}} +#' @param pep an object of class \code{\link[pepr]{Project-class}} +#' +#' @return an object of the same class as the object argument but enriched +#' with the metadata from the pep argument +#' +#' @examples +#' # If the object is of class Annotated +#' object = S4Vectors::List(result='test') +#' result = .insertPEP(object, pepr::Project()) +#' metadata(result) +#' +#' # If the object is not of class Annotated +#' object1 = 'test' +#' result1 = .insertPEP(object1, pepr::Project()) +#' metadata(result1) +#' @import S4Vectors methods +#' @export +.insertPEP = function(object, pep) { + if (!methods::is(pep, "Project")) + stop("the pep argument has to be of class 'Project', + got '", + class(pep), "'") + # do we throw a warning/message + # saying what happens in the + # next line? + if (methods::is(object, "list")) + object = S4Vectors::List(object) + if (methods::is(object, "Annotated")) { + S4Vectors::metadata(object) = + .unionList(S4Vectors::metadata(object), list(PEP = pep)) + } else { + warning("BiocProject expects data loading functions to return an + 'Annotated' object, but your function returned a '", + class(object), "' object. Therefore, this returned object has", + "been placed in the first slot of a S4Vectors::List") + result = S4Vectors::List(result = object) + S4Vectors::metadata(result) = list(PEP = pep) + object = result + } + object +} + +# Finds the pepr::Project +# object in a list and returns +# its index If it is not +# present, returns integer(0) +.findProjectInList = function(l) { + which(as.logical(lapply(l, + function(x) { + is(x, "Project") + }))) +} + # internal function that wraps the external function execution # in tryCatch to indicate problems with the external function execution .callBiocFun <- function(func, arguments) { if(!is(arguments, "list")) stop("The 'arguments' argument has to be a list, got '", - class(arguments),"'") + class(arguments),"'") .warnings = c() frameNumber <- sys.nframe() wHandler <- function(w){ # warning handler - assign(".warnings", append(.warnings,w$message), - envir = sys.frame(frameNumber)) + assign(".warnings", append(.warnings, w$message), + envir = sys.frame(frameNumber)) invokeRestart("muffleWarning") } eHandler <- function(e){ # error handler .wrapFunMessages(e$message,"error") - message("No data was read. The error message was returned instead.") + message("No data was read. The error message was returned instead: ", + e$message) S4Vectors::List(e$message) } res = withCallingHandlers( @@ -94,49 +162,54 @@ #' #' @param list1 a list to be updated #' @param list2 a list to update with +#' @param combine a logical indicating whether the elements of the second list +#' should replace (\code{FALSE}, default) or append to (\code{TRUE}) the +#' first one. #' #' @return an updated list #' #' @examples #' list1=list(a=1,b=2) #' list2=list(a=1,b=1,c=3) -#' .updateList(list1,list2) +#' .unionList(list1,list2) #' #' @export -.updateList = function(list1,list2) { - if((!is.list(list1)) || (!is.list(list2))) - stop("One of the arguments was not a list") +.unionList = function(list1, list2, + combine = FALSE) { + if ((!is.list(list1)) || (!is.list(list2))) + stop("One of the arguments is not a list") nms1 = names(list1) nms2 = names(list2) - if(is.null(nms2)) nms2 = "" - counter=1 - for(n in nms2){ + if (is.null(nms2)) + nms2 = "" + counter = 1 + for (n in nms2) { idx = which(nms1 == n) - if(length(idx) > 0){ - list1[[idx]] = list2[[n]] - }else{ + if (length(idx) > 0) { + if (combine) { + list1[[idx]] = append(list1[[idx]], + list2[[n]]) + } else { + list1[[idx]] = list2[[n]] + } + } else { add = list(list2[[counter]]) names(add) = n - list1 = append(list1,add) + list1 = append(list1, + add) } counter = counter + 1 } return(list1) } -# Finds the pepr::Project object in a list and returns its index -# If it is not present, returns integer(0) -.findProjectInList = function(l) { - which(as.logical(lapply(l, function(x) { - is(x, "Project") - }))) -} - #' Redefine the show method of the object #' -#' Adds the Project objects display to the default show method of an \code{\link[S4Vectors]{Annotated-class}} +#' Adds the Project objects display to the default show method +#' of an \code{\link[S4Vectors]{Annotated-class}} #' -#' The method is defined in the environment in which the function was called, see: \code{\link[base]{sys.parent}} +#' The method is defined in the environment in which the function was called, +#' see: \code{\link[base]{sys.parent}} #' #' @param returnedObject object of \code{\link[S4Vectors]{Annotated-class}} #' @@ -145,24 +218,34 @@ #' @export #' #' @examples -#' x = S4Vectors::List(c("so","cool")) +#' x = S4Vectors::List(c('so','cool')) #' metadata(x) = list(PEP=pepr::Project()) #' .setShowMethod(x) #' x .setShowMethod = function(returnedObject) { oriClass = class(returnedObject) - if(!is(returnedObject,"Annotated")){ - warning("The show method was not redefined for '", oriClass, "'") + if (!is(returnedObject, "Annotated")) { + warning("The show method was not redefined for '", + oriClass, "'") return(FALSE) } - oriShow = selectMethod("show", oriClass) - # the new method is created only if the environment of the original one is locked. - # this way the method will not be redefined over and over again when the BiocProject functon is called. - if(environmentIsLocked(environment(oriShow))) - setMethod("show", signature = oriClass, definition = function(object){ - do.call(oriShow, list(object)) - pep = getProject(object) - cat("\nmetadata: ") - selectMethod("show","Project")(pep) - }, where = parent.frame()) -} + oriShow = selectMethod("show", + oriClass) + # the new method is created + # only if the environment of + # the original one is locked. + # this way the method will not + # be redefined over and over + # again when the BiocProject + # functon is called. + if (environmentIsLocked(environment(oriShow))) + setMethod("show", signature = oriClass, + definition = function(object) { + do.call(oriShow, + list(object)) + pep = getProject(object) + cat("\nmetadata: ") + selectMethod("show", + "Project")(pep) + }, where = parent.frame()) +} \ No newline at end of file diff --git a/README.md b/README.md index fbd45c5..3e2c5c8 100644 --- a/README.md +++ b/README.md @@ -1,20 +1,23 @@ [![Travis-CI Build Status](https://travis-ci.org/pepkit/BiocProject.svg?branch=master)](https://travis-ci.org/pepkit/BiocProject) +[![Coverage Status](https://coveralls.io/repos/github/pepkit/BiocProject/badge.svg?branch=piface)](https://coveralls.io/github/pepkit/BiocProject?branch=piface) [![PEP compatible](http://pepkit.github.io/img/PEP-compatible-green.svg)](http://pepkit.github.io) # Description of the BiocProject package -The `BiocProject` package is a [Bioconductor](https://www.bioconductor.org/)-oriented project management package. It wraps the generic [pepr](http://code.databio.org/pepr/) R package for project metadata. `BiocProject` allows you to read in project metadata and data for an entire project with a single line of `R` code. +The BiocProject package is a [Bioconductor](https://www.bioconductor.org/)-oriented project management package. It wraps the generic [pepr](http://code.databio.org/pepr/) R package for project metadata. + +**BiocProject allows you to read in project metadata and data for an entire project with a single line of R code.** ### Quick start: Install from GitHub: -``` +```r devtools::install_github("pepkit/BiocProject") ``` -Read in both the metadata and data by passing your [PEP configuration file](http://pepkit.github.io): -``` +Read in both the metadata and data by passing your [PEP configuration file](http://pep.databio.org): +```r bp = BiocProject(file=ProjectConfig) ``` diff --git a/_pkgdown.yaml b/_pkgdown.yaml index 950ec5c..e4f25fa 100644 --- a/_pkgdown.yaml +++ b/_pkgdown.yaml @@ -32,6 +32,7 @@ articles: - vignette2multipleArguments - vignette3simpleCache - vignette4remoteData + - vignette6tximeta reference: - title: "BiocProject API" @@ -41,5 +42,5 @@ reference: - title: "utilities" desc: "Extra non-exported utility functions." contents: - - .updateList + - .unionList - .insertPEP diff --git a/inst/extdata/.DS_Store b/inst/extdata/.DS_Store deleted file mode 100644 index cc1f97a..0000000 Binary files a/inst/extdata/.DS_Store and /dev/null differ diff --git a/inst/extdata/example_peps-master/example_BiocProject/project_config.yaml b/inst/extdata/example_peps-master/example_BiocProject/project_config.yaml index 7c9b9e8..4e0dc00 100644 --- a/inst/extdata/example_peps-master/example_BiocProject/project_config.yaml +++ b/inst/extdata/example_peps-master/example_BiocProject/project_config.yaml @@ -1,5 +1,5 @@ -metadata: - sample_table: sample_table.csv +pep_version: "2.0.0" +sample_table: sample_table.csv bioconductor: readFunName: readBedFiles diff --git a/inst/extdata/example_peps-master/example_BiocProject/project_config_resize.yaml b/inst/extdata/example_peps-master/example_BiocProject/project_config_resize.yaml index 93338cd..3ef5c99 100644 --- a/inst/extdata/example_peps-master/example_BiocProject/project_config_resize.yaml +++ b/inst/extdata/example_peps-master/example_BiocProject/project_config_resize.yaml @@ -1,5 +1,5 @@ -metadata: - sample_table: sample_table.csv +pep_version: "2.0.0" +sample_table: sample_table.csv bioconductor: readFunName: readBedFiles_resize diff --git a/inst/extdata/example_peps-master/example_BiocProject/readBedFiles.R b/inst/extdata/example_peps-master/example_BiocProject/readBedFiles.R index 363c5c0..57eaf19 100644 --- a/inst/extdata/example_peps-master/example_BiocProject/readBedFiles.R +++ b/inst/extdata/example_peps-master/example_BiocProject/readBedFiles.R @@ -1,7 +1,7 @@ readBedFiles = function(project) { cwd = getwd() - paths = pepr::samples(project)$file_path - sampleNames = pepr::samples(project)$sample_name + paths = pepr::sampleTable(project)$file_path + sampleNames = pepr::sampleTable(project)$sample_name setwd(dirname(project@file)) result = lapply(paths, function(x){ df = read.table(x) diff --git a/inst/extdata/example_peps-master/example_BiocProject/readBedFiles_resize.R b/inst/extdata/example_peps-master/example_BiocProject/readBedFiles_resize.R index 2e9718d..722888a 100644 --- a/inst/extdata/example_peps-master/example_BiocProject/readBedFiles_resize.R +++ b/inst/extdata/example_peps-master/example_BiocProject/readBedFiles_resize.R @@ -1,7 +1,7 @@ readBedFiles_resize = function(project, resize.width) { cwd = getwd() - paths = pepr::samples(project)$file_path - sampleNames = pepr::samples(project)$sample_name + paths = pepr::sampleTable(project)$file_path + sampleNames = pepr::sampleTable(project)$sample_name setwd(dirname(project@file)) result = lapply(paths, function(x){ df = read.table(x) diff --git a/inst/extdata/example_peps-master/example_BiocProject_exceptions/project_config.yaml b/inst/extdata/example_peps-master/example_BiocProject_exceptions/project_config.yaml index 3b84909..a6e9e52 100644 --- a/inst/extdata/example_peps-master/example_BiocProject_exceptions/project_config.yaml +++ b/inst/extdata/example_peps-master/example_BiocProject_exceptions/project_config.yaml @@ -1,5 +1,5 @@ -metadata: - sample_table: sample_table.csv +pep_version: "2.0.0" +sample_table: sample_table.csv bioconductor: readFunName: readBedFilesExceptions diff --git a/inst/extdata/example_peps-master/example_BiocProject_exceptions/readBedFilesExceptions.R b/inst/extdata/example_peps-master/example_BiocProject_exceptions/readBedFilesExceptions.R index 8d36abe..8233726 100644 --- a/inst/extdata/example_peps-master/example_BiocProject_exceptions/readBedFilesExceptions.R +++ b/inst/extdata/example_peps-master/example_BiocProject_exceptions/readBedFilesExceptions.R @@ -2,8 +2,8 @@ readBedFilesExceptions = function(project) { warning("first test warning") warning("second test warning") stop("test error") - paths = pepr::samples(project)$file_path - sampleNames = pepr::samples(project)$sample_name + paths = pepr::sampleTable(project)$file_path + sampleNames = pepr::sampleTable(project)$sample_name setwd(dirname(project@file)) result = lapply(paths, function(x){ df = read.table(x) diff --git a/inst/extdata/example_peps-master/example_BiocProject_remote/project_config.yaml b/inst/extdata/example_peps-master/example_BiocProject_remote/project_config.yaml index 533262a..0adacab 100644 --- a/inst/extdata/example_peps-master/example_BiocProject_remote/project_config.yaml +++ b/inst/extdata/example_peps-master/example_BiocProject_remote/project_config.yaml @@ -1,5 +1,5 @@ -metadata: - sample_table: sample_table.csv +pep_version: "2.0.0" +sample_table: sample_table.csv bioconductor: readFunName: readRemoteData diff --git a/inst/extdata/example_peps-master/example_BiocProject_remote/project_config_resize.yaml b/inst/extdata/example_peps-master/example_BiocProject_remote/project_config_resize.yaml index 6c1bbe8..b64a05d 100644 --- a/inst/extdata/example_peps-master/example_BiocProject_remote/project_config_resize.yaml +++ b/inst/extdata/example_peps-master/example_BiocProject_remote/project_config_resize.yaml @@ -1,5 +1,5 @@ -metadata: - sample_table: sample_table.csv +pep_version: "2.0.0" +sample_table: sample_table.csv bioconductor: read_fun_name: readRemoteData_resize diff --git a/inst/extdata/example_peps-master/example_BiocProject_remote/readRemoteData.R b/inst/extdata/example_peps-master/example_BiocProject_remote/readRemoteData.R index d79206d..6e9b91b 100644 --- a/inst/extdata/example_peps-master/example_BiocProject_remote/readRemoteData.R +++ b/inst/extdata/example_peps-master/example_BiocProject_remote/readRemoteData.R @@ -1,6 +1,6 @@ readRemoteData = function(project) { # get the data from the Project object - url = pepr::samples(project)$remote_url[[1]] + url = pepr::sampleTable(project)$remote_url[[1]] # download the file bfc = BiocFileCache::BiocFileCache(cache=tempdir(),ask=FALSE) path = BiocFileCache::bfcrpath(bfc, url) diff --git a/inst/extdata/example_peps-master/example_BiocProject_remote/readRemoteData_resize.R b/inst/extdata/example_peps-master/example_BiocProject_remote/readRemoteData_resize.R index 1cf749f..c197193 100644 --- a/inst/extdata/example_peps-master/example_BiocProject_remote/readRemoteData_resize.R +++ b/inst/extdata/example_peps-master/example_BiocProject_remote/readRemoteData_resize.R @@ -1,6 +1,6 @@ readRemoteData_resize = function(project, resize.width) { # get the data from the Project config - url = pepr::samples(project)$remote_url[[1]] + url = pepr::sampleTable(project)$remote_url[[1]] # download the file bfc = BiocFileCache::BiocFileCache(cache=tempdir(),ask=FALSE) path = BiocFileCache::bfcrpath(bfc, url) diff --git a/inst/test_projects/faulty_project/project_config_no_function.yaml b/inst/test_projects/faulty_project/project_config_no_function.yaml index ca8a498..27a06ac 100644 --- a/inst/test_projects/faulty_project/project_config_no_function.yaml +++ b/inst/test_projects/faulty_project/project_config_no_function.yaml @@ -1,7 +1,7 @@ # This is a faulty PEP. # The readFunName and readFunPath values point to function that does not exist -metadata: - sample_table: sample_table.csv +pep_version: 2.0.0 +sample_table: sample_table.csv bioconductor: readFunName: readBedFiles_missing diff --git a/inst/test_projects/faulty_project/project_config_no_section.yaml b/inst/test_projects/faulty_project/project_config_no_section.yaml index 0518a46..afd0824 100644 --- a/inst/test_projects/faulty_project/project_config_no_section.yaml +++ b/inst/test_projects/faulty_project/project_config_no_section.yaml @@ -1,4 +1,4 @@ # This is a faulty PEP. # The bioconductor section does not exist -metadata: - sample_table: sample_table.csv +pep_version: 2.0.0 +sample_table: sample_table.csv \ No newline at end of file diff --git a/inst/test_projects/faulty_project/readBedFiles.R b/inst/test_projects/faulty_project/readBedFiles.R index 363c5c0..57eaf19 100644 --- a/inst/test_projects/faulty_project/readBedFiles.R +++ b/inst/test_projects/faulty_project/readBedFiles.R @@ -1,7 +1,7 @@ readBedFiles = function(project) { cwd = getwd() - paths = pepr::samples(project)$file_path - sampleNames = pepr::samples(project)$sample_name + paths = pepr::sampleTable(project)$file_path + sampleNames = pepr::sampleTable(project)$sample_name setwd(dirname(project@file)) result = lapply(paths, function(x){ df = read.table(x) diff --git a/long_vignettes/.gitignore b/long_vignettes/.gitignore new file mode 100644 index 0000000..0c123e4 --- /dev/null +++ b/long_vignettes/.gitignore @@ -0,0 +1,4 @@ +tximeta_pep +*.sqlite +*.tar.gz +*.html \ No newline at end of file diff --git a/long_vignettes/render-long-vignettes.R b/long_vignettes/render-long-vignettes.R new file mode 100644 index 0000000..3f26a72 --- /dev/null +++ b/long_vignettes/render-long-vignettes.R @@ -0,0 +1,3 @@ +knitr::opts_knit$set(base.dir = 'vignettes/', progress = TRUE, verbose = TRUE) +knitr::opts_chunk$set(fig.path="vignette6tximeta/") +knitr::knit("long_vignettes/vignette6tximeta.Rmd", "vignettes/vignette6tximeta.Rmd") diff --git a/long_vignettes/vignette6tximeta.Rmd b/long_vignettes/vignette6tximeta.Rmd new file mode 100644 index 0000000..5549f25 --- /dev/null +++ b/long_vignettes/vignette6tximeta.Rmd @@ -0,0 +1,164 @@ +--- +title: "Using BiocProject with tximeta" +author: "Michał Stolarczyk" +date: "`r Sys.Date()`" +output: BiocStyle::html_document +vignette: > + %\VignetteIndexEntry{Using BiocProject with tximeta"} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- +```{r, echo=FALSE} +knitr::opts_chunk$set(collapse=FALSE, message=FALSE) +``` + +# Introduction + +## Prerequisites + +This vignette demonstrates how to integrate BiocProject with the [tximeta Bioconductor package](https://www.bioconductor.org/packages/release/bioc/html/tximeta.html) for a really slick start-to-finish analysis of RNA-seq data. We assume you're familiar with BiocProject; if not, please start with [Getting started with `BiocProject` vignette](./vignette1getStarted.html) for basic instructions. + +## Introduction to Tximeta + +Tximeta is a package that imports transcript quantification files from the [salmon](https://salmon.readthedocs.io/en/latest/salmon.html) transcript quantifier. When importing, tximeta automatically annotates the data with the transcriptome used. How it works is that `salmon` records a unique identifier of the transcriptome it uses during quantification; then, tximeta reads this identifier and looks up metadata about those sequences using a local database of known transcriptome identifiers. For more details, refer to the [tximeta GitHub repository](https://github.com/mikelove/tximeta) or [publication in PLoS Computational Biology](https://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1007664). + +The `tximeta::tximeta` function takes as input a `data.frame` (`coldata`) object that, for Salmon results, points to a quantification results directory for each sample. The `tximeta` function reads the `*.sa` files and returns a single `SummarizedExperiment` object with the Salmon-generated metadata in the object `metadata` slot. + +Since `SummarizedExperiment` inherits from the Bioconductor `Annotated` class, it fits perfectly into `BiocProject` output object class requirements. + + +```{r} +suppressPackageStartupMessages(library(BiocProject)) +suppressPackageStartupMessages(library(SummarizedExperiment)) +is(SummarizedExperiment(), "Annotated") +``` + +## Advantages of using BiocProject with tximeta + +If we add BiocProject in to the tximeta workflow, then sample metadata from the PEP project specification can be easily plugged in! For example, if a researcher used a PEP to run Salmon to quantify reads across multiple samples with PEP-compatible workflow management engine/job scatterer like [Snakemake](https://snakemake.github.io/), [CWL](https://www.commonwl.org/), or [looper](https://looper.databio.org/), the same PEP would be ready to use with tximeta as long as the samples had `files` attribute defined. This could be done either via a `files` column in the sample table, or by using one of the sample modifiers provided by the PEP framework. The advantages of calling `tximport` within `BiocProject` include: + + - project portability, inherent to projects following PEP specification + - single source of metadata from start of the analysis to finish -- all the PEP-defined metadata will be propagated to the output object of the `tximeta` function automatically. It will be accessible from within your R session using the [pepr](http://code.databio.org/pepr/) API, or with `@PEP` in the `metadata` slot of the `SummarizedExperiment` object, just as any other metadata attached to the result by `tximeta` function. + +Let's show you how this work with a simple demo. + +# Demo of the BiocProject + tximeta workflow + +## Download example data + +First, let's download some RNA-seq counts from salmon, described in PEP format: + +```{r, download-data, collapse=TRUE, comment=" "} +if (basename(getwd()) != "long_vignettes") setwd("long_vignettes") +pth = BiocFileCache::bfcrpath( + BiocFileCache::BiocFileCache(getwd()), + "http://big.databio.org/example_data/tximeta_pep.tar.gz" + ) +utils::untar(tarfile=pth) +abs_pep_path = file.path(getwd(), "tximeta_pep") +abs_cfg_path = file.path(abs_pep_path, "project_config.yaml") +``` + +Let's take a look at what we have here... + +## Examine and load the PEP into R + +The `Biocproject` + `tximeta` workflow requires a PEP. The example we just downloaded looks like this: + +```{r, warning=FALSE, echo=FALSE, message=FALSE, collapse=TRUE, comment=" "} +library(pepr) +.printNestedList(yaml::read_yaml(abs_cfg_path)) +``` + +As you can see, this PEP configuration file uses a `$TXIMPORTDATA` environment variable to specify a file path. This is just an optional way to make this PEP work in any computing environment without being changed, so you can share your sample metadata more easily. For this vignette, we need to set the variable to the output directory where our downloaded results are stored: + +```{r} +Sys.setenv("TXIMPORTDATA"=file.path(abs_pep_path, "/tximportData")) +``` + +```{r eval=TRUE, include=FALSE} +# Run some stuff we need for the vignette +p=Project(abs_cfg_path) +``` + +Now, look at the `sample_table` key in the configuration file. It points to the second major part of a PEP: the +sample table CSV file (``r { basename(config(p)$sample_table) }``). Check out the contents of that file: + +```{r, echo=FALSE, message=FALSE, warning=FALSE, collapse=TRUE, comment=" "} +library(knitr) +coldataDF = read.table(p@config$sample_table, sep=",", header=TRUE) +knitr::kable(coldataDF, format = "html") +``` + +This sample table lacks the `files` column required by tximeta -- but this file is sufficient, since BiocProject, or more specifically pepr, will take care of constructing the portable `files` sample attribute automatically via `sample_modifiers.derive`, where the config file above specifies the `files` attribute and its path. + +Now we can load the file with BiocProject... but first, a short detour + +## Detour: the magic of PEP sample modifiers + +Before we jump into using `BiocProject`, let's take a minute to demonstrate how using the PEP helps us out here. Let's read in our PEP using the the generic `Project` function from `pepr`: + + +```{r} +p=Project(abs_cfg_path) +``` + +We now have our PEP project read in, and we can see what is found in the sample table: + +```{r} +sampleTable(p) +``` + +See how our sample table has now been automatically updated with the `files` attribute? *That* is the magic of the PEP sample modifiers. It's that simple. Now, let's move on to demonstrate what `BiocProject` adds. + +## The BiocProject data processing function + +If you look again at our configuration file above, you'll notice the `biconductor` section in the configuration file, which defines a function name and R script. These specify the BiocProject data processing function, which in this case, is simply a `tximeta` call that uses the PEP-managed processed sample table its input. Here's what that function looks like: + +```{r echo=FALSE, eval=TRUE, comment=""} +source(file.path(abs_pep_path, "readTximeta.R")) +get(config(p)$bioconductor$readFunName) +``` + +## Loading in the data with BiocProject + +We have everything we need: a salmon output file, a PEP that specifies a sample table and provides the `files` column, and a function that uses `tximeta` to create the final `SummarizedExperiment` object. Now, we can call the `BiocProject` function: + +```{r collapse=TRUE} +require(tximeta) +bp = BiocProject(abs_cfg_path) +``` + +The output of `BiocProject` function, the `bp` object in our case, is magical. In one object, it supports the functionality of `SummarizedExperiment`, `tximeta`, and `pepr`. Observe: + +First, it is a `RangedSummarizedExperiment`, so it supports all methods defined in `SummarizedExperiment`: + +```{r} +suppressPackageStartupMessages(library(SummarizedExperiment)) +colData(bp) +assayNames(bp) +rowRanges(bp) +``` + +Naturally, we can use tximeta methods: + +```{r collapse=TRUE} +retrieveDb(bp) +``` + +But wait, there's more! The `PEP` metadata information has been attached to the metadata as well. Let's extract the `Project` object from the result with `getProject` method: + +```{r collapse=TRUE} +getProject(bp) +``` + +You can use the `pepr` API for any R-based PEP processing tools: + +```{r collapse=TRUE} +sampleTable(bp) +config(bp) +``` + +# Conclusion + +If you format your project metadata according to the PEP specification, it will be ready to use with tximeta and the resulting object will include project-wide metadata and expose [pepr](http://code.databio.org/pepr/) API for any PEP-compatible R packages for downstream analysis. diff --git a/man/BiocProject.Rd b/man/BiocProject.Rd index 751a8de..e81199b 100644 --- a/man/BiocProject.Rd +++ b/man/BiocProject.Rd @@ -4,19 +4,24 @@ \alias{BiocProject} \title{Portable Encapsulated Project (PEP) for biological applications} \usage{ -BiocProject(file, subproject = NULL, autoLoad = TRUE, func = NULL, - funcArgs = NULL) +BiocProject( + file, + amendments = NULL, + autoLoad = TRUE, + func = NULL, + funcArgs = NULL +) } \arguments{ \item{file}{a character vector with a path to the PEP config file} -\item{subproject}{a character vector with a name of the subproject +\item{amendments}{a character vector with a name of the amendments to be activated} \item{autoLoad}{a logical indicating whether the data should be loaded automatically. See \code{Details} for more information.} -\item{func}{a anonymous function that reads and/or processess the data, +\item{func}{a anonymous function that reads and/or processes the data, it must take the \code{\link[pepr]{Project-class}} as an argument. See \code{Details} for more information} @@ -67,14 +72,19 @@ implementation. Consider the possibilities listed below: The custom data processing function must take the \code{\link[pepr]{Project-class}} as an argument since this object will be passed to the function by default. However, if the function requires -addtional arguments, ones can be provided with the \code{funcArgs} argument +additional arguments, ones can be provided with the \code{funcArgs} argument in the \code{\link{BiocProject}} function call. Besides, the \code{func} argument with the anonymous function may serve similar possibility. + If the \code{autoLoad} is set to \code{FALSE} the data will not be loaded and empty \code{\link[pepr]{Project-class}} object will be returned. } +\note{ +The \code{bioconductor} section can be read from the project config +file or pipeline interface. The former is given the priority +} \section{Further reading}{ Browse the diff --git a/man/config-Annotated-method.Rd b/man/config-Annotated-method.Rd index 8ca99c8..9403f2d 100644 --- a/man/config-Annotated-method.Rd +++ b/man/config-Annotated-method.Rd @@ -1,6 +1,5 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/methods_Annotated.R -\docType{methods} \name{config,Annotated-method} \alias{config,Annotated-method} \title{View PEP config of the object of \code{\link[pepr]{Project-class}}} @@ -19,8 +18,8 @@ the \code{\link[pepr]{Project-class}} or \code{\link[S4Vectors]{Annotated-class}} } \examples{ -projectConfig = system.file("extdata", "example_peps-master", -"example_BiocProject", "project_config.yaml", package="BiocProject") +projectConfig = system.file('extdata', 'example_peps-master', +'example_BiocProject', 'project_config.yaml', package='BiocProject') p=BiocProject(projectConfig) config(p) diff --git a/man/dot-insertPEP.Rd b/man/dot-insertPEP.Rd index 1de4d0d..94d6f37 100644 --- a/man/dot-insertPEP.Rd +++ b/man/dot-insertPEP.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/functions.R +% Please edit documentation in R/utils.R \name{.insertPEP} \alias{.insertPEP} \title{Insert a PEP metadata in a metadata slot of Annotated} @@ -29,12 +29,12 @@ as the metadata. } \examples{ # If the object is of class Annotated -object = S4Vectors::List(result="test") +object = S4Vectors::List(result='test') result = .insertPEP(object, pepr::Project()) metadata(result) # If the object is not of class Annotated -object1 = "test" +object1 = 'test' result1 = .insertPEP(object1, pepr::Project()) metadata(result1) } diff --git a/man/dot-setShowMethod.Rd b/man/dot-setShowMethod.Rd index b6a3789..8ea2515 100644 --- a/man/dot-setShowMethod.Rd +++ b/man/dot-setShowMethod.Rd @@ -13,13 +13,15 @@ \code{FALSE} if the function was not set } \description{ -Adds the Project objects display to the default show method of an \code{\link[S4Vectors]{Annotated-class}} +Adds the Project objects display to the default show method +of an \code{\link[S4Vectors]{Annotated-class}} } \details{ -The method is defined in the environment in which the function was called, see: \code{\link[base]{sys.parent}} +The method is defined in the environment in which the function was called, +see: \code{\link[base]{sys.parent}} } \examples{ -x = S4Vectors::List(c("so","cool")) +x = S4Vectors::List(c('so','cool')) metadata(x) = list(PEP=pepr::Project()) .setShowMethod(x) x diff --git a/man/dot-updateList.Rd b/man/dot-unionList.Rd similarity index 68% rename from man/dot-updateList.Rd rename to man/dot-unionList.Rd index 4d5b78d..5d7edfd 100644 --- a/man/dot-updateList.Rd +++ b/man/dot-unionList.Rd @@ -1,15 +1,19 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/utils.R -\name{.updateList} -\alias{.updateList} +\name{.unionList} +\alias{.unionList} \title{Update list with another list} \usage{ -.updateList(list1, list2) +.unionList(list1, list2, combine = FALSE) } \arguments{ \item{list1}{a list to be updated} \item{list2}{a list to update with} + +\item{combine}{a logical indicating whether the elements of the second list +should replace (\code{FALSE}, default) or append to (\code{TRUE}) the +first one.} } \value{ an updated list @@ -25,6 +29,6 @@ preserved but the order might be lost. \examples{ list1=list(a=1,b=2) list2=list(a=1,b=1,c=3) -.updateList(list1,list2) +.unionList(list1,list2) } diff --git a/man/getProject.Rd b/man/getProject.Rd index 2d6abaf..5248416 100644 --- a/man/getProject.Rd +++ b/man/getProject.Rd @@ -1,6 +1,5 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/methods_Annotated.R -\docType{methods} \name{getProject} \alias{getProject} \alias{getProject,Annotated-method} @@ -23,12 +22,13 @@ This method can be used to extract the project metadata from objects of } \section{Methods (by class)}{ \itemize{ -\item \code{Annotated}: extracts \code{\link[pepr]{Project-class}} from the \code{\link[S4Vectors]{Annotated-class}} +\item \code{Annotated}: extracts \code{\link[pepr]{Project-class}} +from the \code{\link[S4Vectors]{Annotated-class}} }} \examples{ -projectConfig = system.file("extdata", "example_peps-master", -"example_BiocProject", "project_config.yaml", package="BiocProject") +projectConfig = system.file('extdata', 'example_peps-master', +'example_BiocProject', 'project_config.yaml', package='BiocProject') p=BiocProject(projectConfig) getProject(p) diff --git a/man/is-Annotated-method.Rd b/man/is-Annotated-method.Rd index 0661bc6..fc95fbf 100644 --- a/man/is-Annotated-method.Rd +++ b/man/is-Annotated-method.Rd @@ -1,6 +1,5 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/methods_Annotated.R -\docType{methods} \name{is,Annotated-method} \alias{is,Annotated-method} \title{Is an Object from a Class?} @@ -18,14 +17,14 @@ a logical \description{ Functions to test inheritance relationships between an object and a class or between two classes. It uses the generic is function but overrides its -behavior for obejcts of class \code{\link[S4Vectors]{Annotated-class}} when +behavior for objects of class \code{\link[S4Vectors]{Annotated-class}} when testing for inheritance from \code{\link[pepr]{Project-class}} class. } \details{ see the \code{\link[methods]{is}} for more details } \examples{ -object = S4Vectors::List(test="test") -is(object,"Annotated") +object = S4Vectors::List(test='test') +is(object,'Annotated') } diff --git a/man/samples-Annotated-method.Rd b/man/sampleTable-Annotated-method.Rd similarity index 65% rename from man/samples-Annotated-method.Rd rename to man/sampleTable-Annotated-method.Rd index 3ff83f5..2657be0 100644 --- a/man/samples-Annotated-method.Rd +++ b/man/sampleTable-Annotated-method.Rd @@ -1,11 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/methods_Annotated.R -\docType{methods} -\name{samples,Annotated-method} -\alias{samples,Annotated-method} +\name{sampleTable,Annotated-method} +\alias{sampleTable,Annotated-method} \title{View samples in the objects of \code{\link[pepr]{Project-class}}} \usage{ -\S4method{samples}{Annotated}(object) +\S4method{sampleTable}{Annotated}(object) } \arguments{ \item{object}{an object of \code{\link[pepr]{Project-class}}} @@ -19,8 +18,8 @@ of the \code{\link[pepr]{Project-class}} or \code{\link[S4Vectors]{Annotated-class}} } \examples{ -projectConfig = system.file("extdata", "example_peps-master", -"example_BiocProject", "project_config.yaml", package="BiocProject") +projectConfig = system.file('extdata', 'example_peps-master', +'example_BiocProject', 'project_config.yaml', package='BiocProject') p=BiocProject(projectConfig) -samples(p) +sampleTable(p) } diff --git a/tests/testthat/test_all.R b/tests/testthat/test_all.R index bdb8588..6923546 100644 --- a/tests/testthat/test_all.R +++ b/tests/testthat/test_all.R @@ -49,22 +49,21 @@ c=function(arg) { testChar = "a" # Test -------------------------------------------------------------------- -context("Test .updateList utility function") +context("Test .unionList utility function") -test_that(".updateList returns correct object type", { - expect_is(.updateList(list(a=1),list(a=2,b=2)), 'list') +test_that(".unionList returns correct object type", { + expect_is(.unionList(list(a=1),list(a=2,b=2)), 'list') }) -test_that(".updateList returns list of correct length", { - expect_equal(length(.updateList(list(a=1),list(a=2,b=2))), 2) - expect_equal(length(.updateList(list(a=1,c=3),list(a=2,b=2))), 3) - expect_equal(length(.updateList(list(a=1,b=3),list(c=2,d=2))), 4) +test_that(".unionList returns list of correct length", { + expect_equal(length(.unionList(list(a=1),list(a=2,b=2))), 2) + expect_equal(length(.unionList(list(a=1,c=3),list(a=2,b=2))), 3) + expect_equal(length(.unionList(list(a=1,b=3),list(c=2,d=2))), 4) }) -test_that(".updateList throws errors", { - expect_error(.updateList(list(a=1),2)) +test_that(".unionList throws errors", { + expect_error(.unionList(list(a=1),2)) }) - context("Test .makeAbsPath utility function") test_that(".makeAbsPath returns correct object", { @@ -153,29 +152,29 @@ test_that("BiocProject function works with arguments", { test_that("BiocProject function returns Annotated when provided objects of different class and thorows a warning", { - expect_warning(expect_is(BiocProject(configFile, func = function(x){ - return("test") - }),"Annotated")) -}) + expect_warning(expect_is(BiocProject(configFile, func = function(x){ + return("test") + }),"Annotated")) + }) test_that("BiocProject function returns a Project object when autoload is set to FALSE", { - expect_is(BiocProject(file=configFile,autoLoad = FALSE),"Project") -}) + expect_is(BiocProject(file=configFile,autoLoad = FALSE),"Project") + }) test_that("BiocProject function throws errors/warnings when the arguments are inappropriate", { - expect_error(BiocProject(file=configFile,func = "2")) - expect_error(BiocProject(file = "test")) - expect_error(BiocProject(file = configFile,autoLoad = "test")) -}) + expect_error(BiocProject(file=configFile,func = "2")) + expect_error(BiocProject(file = "test")) + expect_error(BiocProject(file = configFile,autoLoad = "test")) + }) test_that("BiocProject function catches errors in the user-provided function returns the error message as Annotated", { - expect_is(BiocProject(file=configFile,func=function(x) { - stop("test") - }),"Annotated") -}) + expect_is(BiocProject(file=configFile,func=function(x) { + stop("test") + }),"Annotated") + }) test_that("BiocProject function catches errors when the function specified does not exist", { @@ -184,13 +183,13 @@ test_that("BiocProject function catches errors when the function specified test_that("BiocProject function throws a warning and returns a Project object when no bioconductor section found",{ - expect_warning(expect_is(BiocProject(configFileNoSection),"Project")) -}) + expect_warning(expect_is(BiocProject(configFileNoSection),"Project")) + }) context("Test Annotated methods") test_that("samples returns a correct object", { - expect_is(samples(bp),"data.table") + expect_is(sampleTable(bp),"data.table") }) test_that("config returns a correct object", { diff --git a/update_examples.sh b/update_examples.sh index 5cd7055..3f7f85b 100755 --- a/update_examples.sh +++ b/update_examples.sh @@ -1,10 +1,5 @@ wget https://github.com/pepkit/example_peps/archive/master.zip unzip master.zip -rm -rf inst/extdata/example_peps-master -mkdir -p inst/extdata/example_peps-master/example_BiocProject -mkdir -p inst/extdata/example_peps-master/example_BiocProject_remote -mv example_peps-master/example_BiocProject inst/extdata/example_peps-master -mv example_peps-master/example_BiocProject_remote inst/extdata/example_peps-master -mv example_peps-master/example_BiocProject_exceptions inst/extdata/example_peps-master -rm -rf example_peps-master +rm -rf inst/extdata/example_peps-master +mv example_peps-master inst/extdata rm master.zip \ No newline at end of file diff --git a/vignettes/vignette1getStarted.Rmd b/vignettes/vignette1getStarted.Rmd index 3d44bc0..de53d08 100644 --- a/vignettes/vignette1getStarted.Rmd +++ b/vignettes/vignette1getStarted.Rmd @@ -1,6 +1,6 @@ --- title: "Getting started with BiocProject" -author: "Michal Stolarczyk" +author: "Michał Stolarczyk" date: "`r Sys.Date()`" output: BiocStyle::html_document vignette: > @@ -18,13 +18,24 @@ knitr::opts_chunk$set( # Introduction -`BiocProject` is a (pending) [Bioconductor](https://www.bioconductor.org/) package that provides a way to use Portable Encapsulated Projects (PEPs) within Bioconductor framework. +`BiocProject` is a (pending) [Bioconductor](https://www.bioconductor.org/) +package that provides a way to use Portable Encapsulated Projects (PEPs) within +Bioconductor framework. -This vignette assumes you are already familiar with PEPs. If not, see [pepkit.github.io](https://pepkit.github.io/) to learn more about PEP, and the [pepr documentation](http://code.databio.org/pepr/) to learn more about reading PEPs in `R`. +This vignette assumes you are already familiar with PEPs. +If not, see [pep.databio.org](https://pep.databio.org/) to learn more about +PEP, and the [pepr documentation](http://code.databio.org/pepr/) to learn more +about reading PEPs in `R`. -`BiocProject` uses objects of [`Project` class](http://code.databio.org/pepr/reference/Project-class.html) (from `pepr`) to handle your project metadata, and allows you to provide a data loading/processing function so that you can load both project metadata and data for an entire project with a **single line of `R` code**. +`BiocProject` uses objects of [`Project` class](http://code.databio.org/pepr/reference/Project-class.html) (from `pepr`) +to handle your project metadata, and allows you to provide a data +loading/processing function so that you can load both project metadata and data +for an entire project with a **single line of `R` code**. -The output of the `BiocProject` function is the object that your function returns, but enriched with the PEP in its `metadata` slot. **This way of metadata storage is uniform across all objects within Bioconductor project (see: `?Annotated-class` for details).** +The output of the `BiocProject` function is the object that your function +returns, but enriched with the PEP in its `metadata` slot. **This way of +metadata storage is uniform across all objects within Bioconductor project +(see: `?Annotated-class` for details).** # Installation @@ -44,9 +55,13 @@ devtools::install_github(repo='pepkit/BiocProject') ## Introduction to PEP components -In order to use the `BiocProject` package, you first need a PEP. For this vignette, we have included a basic example PEP within the package, but if you like, you can [create your own](https://pepkit.github.io/), or download [an example PEP](https://pepkit.github.io/docs/simple_example/). +In order to use the `BiocProject` package, you first need a PEP. For this +vignette, we have included a basic example PEP within the package, but if you +like, you can [create your own](https://pepkit.github.io/), or download +[an example PEP](https://pepkit.github.io/docs/simple_example/). -The central component of a PEP is the project configuration file. Let's load up `BiocProject` and grab the path to our example configuration file: +The central component of a PEP is the project configuration file. Let's load +up `BiocProject` and grab the path to our example configuration file: ```{r echo=TRUE, message=FALSE} library(BiocProject) @@ -81,7 +96,9 @@ library(pepr) .printNestedList(yaml::read_yaml(configFile)) ``` -This configuration file points to the second major part of a PEP: the sample annotation CSV file (``r { basename(config(bp)$metadata$sample_table) }``). Here are the contents of that file: +This configuration file points to the second major part of a PEP: the +sample annotation CSV file (``r { basename(config(bp)$sample_table) }``). +Here are the contents of that file: ```{r, echo=FALSE, message=FALSE, warning=FALSE, collapse=TRUE, comment=" "} library(knitr) @@ -96,9 +113,14 @@ sampleAnnotationDF = read.table(sampleAnnotation, sep=",", header=TRUE) knitr::kable(sampleAnnotationDF, format = "html") ``` -In this example, our PEP has two samples, which have two attributes: `sample_name`, and `file_path`, which points the location for the data. +In this example, our PEP has two samples, which have two attributes: +`sample_name`, and `file_path`, which points the location for the data. -The configuration file also points to a third file (``r { basename(config(bp)$bioconductor$readFunPath) }``). This file holds a single `R` function called ``r { basename(config(bp)$bioconductor$readFunName) }``, which has these contents: +The configuration file also points to a third +file (``r { basename(config(bp)$bioconductor$readFunPath) }``). This file holds +a single `R` function called +``r { basename(config(bp)$bioconductor$readFunName) }``, which has these +contents: ```{r echo=FALSE, eval=TRUE, comment=""} get(config(bp)$bioconductor$readFunName) @@ -106,7 +128,8 @@ get(config(bp)$bioconductor$readFunName) And that's all there is to it! **This PEP consists really of 3 components:** -1. the project configuration file (which points to an annotation sheet and specifies your function name) +1. the project configuration file (which points to an annotation sheet and +specifies your function name) 1. the annotation sheet 1. an R file that holds a function that knows how to process this data. @@ -114,26 +137,34 @@ With that, we're ready to see how `BiocProject` works. ## How to use the `BiocProject` function -With a PEP in hand, it takes only a single line of code to do all the magic with `BiocProject`: +With a PEP in hand, it takes only a single line of code to do all the magic +with `BiocProject`: ```{r} bp = BiocProject(file=configFile) ``` -This loads the project metadata from the PEP, then loads and calls the actual data processing function, and returns the R object that the data processing function produces, but enriched with the PEP metadata. Consequently, the object contains all your project metadata and data! Let's inspect the it: +This loads the project metadata from the PEP, then loads and calls the actual +data processing function, and returns the R object that the data processing +function produces, but enriched with the PEP metadata. Consequently, the object +contains all your project metadata and data! Let's inspect the it: ```{r} bp ``` -Since the data processing function returned `GenomicRanges::GRangesList` object, the final result of the `BiocProject` function is an object of the same class. +Since the data processing function returned `GenomicRanges::GRangesList` +object, the final result of the `BiocProject` function is an object of the +same class. ## How to interact with the returned object -The created object provides all the `pepr::Project` methods (which you can find in the [reference documentation for pepr](http://code.databio.org/pepr/reference/index.html)). +The created object provides all the `pepr::Project` methods (which you can +find in the reference +[documentation](http://code.databio.org/pepr/reference/index.html)) for pepr. ```{r} -samples(bp) +sampleTable(bp) config(bp) ``` @@ -146,7 +177,8 @@ getProject(bp) # How to provide a data load function -In the basic case the function name (and path to source file, if necessary) is specified in the YAML config file itself, like: +In the basic case the function name (and path to source file, if necessary) +is specified in the YAML config file itself, like: ``` bioconductor: @@ -161,12 +193,14 @@ bioconductor: readFunPath: /path/to/the/file.R ``` -The function specified can be a data processing function of any complexity, but has to follow 3 rules listed below. +The function specified can be a data processing function of any complexity, +but has to follow 3 rules listed below. ### Rules: 1. must take at least a single argument, -1. the argument must be a [`pepr::Project`](http://code.databio.org/pepr/reference/Project-class.html) object (should use that input to load all the relevant data into `R`), +1. the argument must be a `pepr::Project` object (should use that input to +load all the relevant data into `R`), 1. must return an object of class that extends the class `Annotated`. Listed below are some of the classes that extend the class `Annotated`: @@ -176,7 +210,8 @@ showClass("Annotated") ``` -Consider the [readBedFiles function](https://github.com/pepkit/example_peps/blob/master/example_BiocProject/readBedFiles.R) as an example of a function that can be used with `BiocProject` package: +Consider the `readBedFiles`function as an example of a function that can be used +with `BiocProject` package: ```{r echo=FALSE, eval=TRUE, comment=""} processFunction = system.file( @@ -191,7 +226,9 @@ readBedFiles ``` # Data reading function error/warning handling -The `BiocProject` function provides a way to rigorously monitor exceptions related to your data reading function. All the produced warnings and errors are caught, processed and displayed in an organized way: +The `BiocProject` function provides a way to rigorously monitor exceptions +related to your data reading function. All the produced warnings and errors +are caught, processed and displayed in an organized way: ```{r} configFile = system.file( @@ -204,7 +241,8 @@ configFile = system.file( bpExceptions = BiocProject(configFile) ``` -As indicated in the warning messages above -- no data is being returned. Instead a `S4Vectors::List` with a PEP is its `metadata` slot is produced. +As indicated in the warning messages above -- no data is being returned. +Instead a `S4Vectors::List` with a PEP is its `metadata` slot is produced. ```{r} bpExceptions @@ -212,11 +250,16 @@ bpExceptions # Further reading -See this [More arguments than just a PEP in your function?](./vignette2multipleArguments.html) vignette if you want to: +See ["More arguments than just a PEP in your function?"](./vignette2multipleArguments.html) vignette if you want to: * use an anonymous function instead of one defined *a priori* * use a function that requires more arguments than just a PEP -See the [Working with remote data](./vignette4remoteData.html) vignette to learn how to download the data from the Internet, process it and store it conveniently with related metadata in any object from the Bioconductor project. +See the ["Working with remote data"](./vignette4remoteData.html) vignette to +learn how to download the data from the Internet, process it and store it +conveniently with related metadata in any object from the Bioconductor project. -See the [Working with large datasets - simpleCache](./vignette3simpleCache.html) vignette to learn how the `simpleCache` R package can be used to prevent copious and lengthy results recalculations when working with large datasets. \ No newline at end of file +See the +["Working with large datasets - simpleCache"](./vignette3simpleCache.html) +vignette to learn how the `simpleCache` R package can be used to prevent +copious and lengthy results recalculations when working with large datasets. \ No newline at end of file diff --git a/vignettes/vignette2multipleArguments.Rmd b/vignettes/vignette2multipleArguments.Rmd index c92ac64..7e8fb67 100644 --- a/vignettes/vignette2multipleArguments.Rmd +++ b/vignettes/vignette2multipleArguments.Rmd @@ -1,6 +1,6 @@ --- title: "More arguments than just a PEP in your function?" -author: "Michal Stolarczyk" +author: "Michał Stolarczyk" date: "`r Sys.Date()`" output: BiocStyle::html_document vignette: > @@ -18,7 +18,9 @@ knitr::opts_chunk$set( # Introduction -Before you start see the [Getting started with `BiocProject` vignette](./vignette1getStarted.html) for the basic information and installation instructions. +Before you start see the +[Getting started with `BiocProject` vignette](./vignette1getStarted.html) for +the basic information and installation instructions. Get paths to the files used in this vignette ```{r echo=T,message=FALSE} @@ -44,7 +46,8 @@ readBedFiles_resize = system.file( **What if your custom data processing function requires more arguments than just a PEP?** -For reference consider the `readBedFiles_resize.R` function and its interface. This function additionally requires the `resize.width` argument. +For reference consider the `readBedFiles_resize.R` function and its interface. +This function additionally requires the `resize.width` argument. ```{r include=FALSE, eval=TRUE} processFunction = system.file( "extdata", @@ -58,7 +61,10 @@ source(processFunction) ```{r echo=FALSE, comment=""} readBedFiles_resize ``` -There are a few ways to enable your function to get multiple arguments - not just a PEP ([`pepr::Project`](http://code.databio.org/pepr/reference/Project-class.html)) object, which is the basic scenario. +There are a few ways to enable your function to get multiple +arguments - not just a PEP +([`pepr::Project`](http://code.databio.org/pepr/reference/Project-class.html)) +object, which is the basic scenario. **The options:** @@ -68,7 +74,9 @@ There are a few ways to enable your function to get multiple arguments - not jus # How to provide addtional section in the config file -The easiest way to provide addtional arguments to your data reading/processing function is to add addtional section in the config file. See the config file below for reference: +The easiest way to provide addtional arguments to your data reading/processing +function is to add addtional section in the config file. See the config file +below for reference: ```{r, warning=FALSE, echo=FALSE, message=FALSE, collapse=TRUE, comment=" "} library(pepr) @@ -91,10 +99,14 @@ bp # How to use the `funcArgs` argument -Provide additional `funcArgs` argument to the `BiocProject` function. This argument has to be a named list. The names have to correspond to the argument names of your function. **The PEP (`pepr::Project` object) will be passed to your function by default**. For example: +Provide additional `funcArgs` argument to the `BiocProject` function. +This argument has to be a named list. The names have to correspond to the +argument names of your function. +**The PEP will be passed to your function by default**. For example: -Read the function into R environment and run the `BiocProject` function with the `funcArgs` argument +Read the function into R environment and run the `BiocProject` function with +the `funcArgs` argument ```{r include=F,eval=TRUE} library(BiocProject) ProjectConfigArgs = system.file( @@ -118,12 +130,18 @@ source(readBedFiles_resize) bpArgs = BiocProject(file=ProjectConfigArgs, funcArgs=list(resize.width=200)) bpArgs ``` -The `funcArgs` argument gets a one element list and passes the `resize.width` argument to your custom data processing function. If any arguments are present in the config file, they will be overwritten (the width of the ranges has changed from 100 to 200 in the example above). +The `funcArgs` argument gets a one element list and passes the `resize.width` +argument to your custom data processing function. If any arguments are present +in the config file, they will be overwritten (the width of the ranges has +changed from 100 to 200 in the example above). # How to use an anonymous function -You can use an [anonymous function](https://en.wikipedia.org/wiki/Anonymous_function) (that is implemented in the `BiocProject` function call) to provide additional arguments to your function of interest. For example: +You can use an +[anonymous function](https://en.wikipedia.org/wiki/Anonymous_function) +(that is implemented in the `BiocProject` function call) to provide additional +arguments to your function of interest. For example: ```{r} bpAnonymous = BiocProject(file=ProjectConfigArgs, func=function(x){ diff --git a/vignettes/vignette3simpleCache.Rmd b/vignettes/vignette3simpleCache.Rmd index fb0edba..c566b9d 100644 --- a/vignettes/vignette3simpleCache.Rmd +++ b/vignettes/vignette3simpleCache.Rmd @@ -1,6 +1,6 @@ --- title: "Working with large datasets - simpleCache" -author: "Michal Stolarczyk" +author: "Michał Stolarczyk" date: "`r Sys.Date()`" output: BiocStyle::html_document vignette: > @@ -18,17 +18,27 @@ knitr::opts_chunk$set( # Introduction -This vignette assumes you're familiar with the [Getting started with `BiocProject` vignette](./vignette1getStarted.html) for the basic `BiocProject` information and [An introduction to `simpleCache`](http://code.databio.org/simpleCache/articles/simpleCacheIntroduction.html) for the basic `simpleCache` information. +This vignette assumes you're familiar with the +["Getting started with `BiocProject` vignette"](./vignette1getStarted.html) for +the basic `BiocProject` information +and ["An introduction to `simpleCache`"](http://code.databio.org/simpleCache/articles/simpleCacheIntroduction.html) for the basic `simpleCache` information. # Why to use `simpleCache` with `BiocProject` -For a large project, it can take substantial computational effort to run the initial data loading function that will load your data into R. We'd like to cache that result so that it doesn't have to be reprocessed every time we want to load our project metadata and data. Pairing `simpleCache` with `BiocProject` allows us to do just that. This means that if your custom data processing function loads or processes large data sets that take a long time, the `R` object will not be recalculated, but simply reloaded. +For a large project, it can take substantial computational effort to run the +initial data loading function that will load your data into R. We'd like to +cache that result so that it doesn't have to be reprocessed every time we want +to load our project metadata and data. Pairing `simpleCache` with `BiocProject` +allows us to do just that. This means that if your custom data processing +function loads or processes large data sets that take a long time, the `R` +object will not be recalculated, but simply reloaded. **Briefly, this is the `simpleCache` logic: ** * if the object exists in memory already: do nothing, * if it does not exist in memory, but exists on disk: load it into memory, -* if it exists neither in memory or on disk: create it and store it to disk and memory. +* if it exists neither in memory or on disk: create it and store it +to disk and memory. # How to use `simpleCache` with `BiocProject` @@ -52,18 +62,24 @@ setCacheDir(tempdir()) simpleCache("dataSet1", { BiocProject(file = projectConfig) }) ``` -This loads your PEP and its data with `BiocProject`, and then caches the result with `simpleCache`. Say you rerun this line of code. `simpleCache` prevents the calculations from rerunning since the `dataSet1` object is already present in the memory: +This loads your PEP and its data with `BiocProject`, and then caches the +result with `simpleCache`. Say you rerun this line of code. `simpleCache` +prevents the calculations from rerunning since the `dataSet1` object is +already present in the memory: ```{r} simpleCache("dataSet1", { BiocProject(file = projectConfig) }) ``` -Say you come back to your analysis after a while and the `dataSet1` object is not in the memory (simulated by removing it with `rm()` function here). `simpleCache` loads the object from the directory you have specified in `setCacheDir()`. +Say you come back to your analysis after a while and the `dataSet1` object is +not in the memory (simulated by removing it with `rm()` function here). +`simpleCache` loads the object from the directory you have specified +in `setCacheDir()`. ```{r} rm(dataSet1) simpleCache("dataSet1", { BiocProject(file = projectConfig) }) ``` -And that's it! In the simplest case this is all you need to **organize, read, process your data and prevent from copious results recalculations** every time you come back to your project. - - +And that's it! In the simplest case this is all you need to **organize, +read, process your data and prevent from copious results recalculations** +every time you come back to your project. \ No newline at end of file diff --git a/vignettes/vignette4remoteData.Rmd b/vignettes/vignette4remoteData.Rmd index e4270d9..b2321cf 100644 --- a/vignettes/vignette4remoteData.Rmd +++ b/vignettes/vignette4remoteData.Rmd @@ -1,6 +1,6 @@ --- title: "Working with remote data" -author: "Michal Stolarczyk" +author: "Michał Stolarczyk" date: "`r Sys.Date()`" output: BiocStyle::html_document vignette: > @@ -18,11 +18,14 @@ knitr::opts_chunk$set( # Introduction -Before you start see the [Getting started with `BiocProject` vignette](./vignette1getStarted.html) for the basic information and installation instructions. +Before you start see the +[Getting started with `BiocProject` vignette](./vignette1getStarted.html) for +the basic information and installation instructions. # How to download the data with your function -There is *no limit* to the data processing function complexity. For example, the function can retrieve the data from a remote source and then process it. +There is *no limit* to the data processing function complexity. For example, +the function can retrieve the data from a remote source and then process it. ```{r echo=FALSE, eval=TRUE} processFunction = system.file( @@ -82,7 +85,8 @@ ProjectConfigRemote = system.file( package = "BiocProject" ) ``` -Run the `BiocProject` function. Creates an object returned with the data processing function with a PEP in its `metadata` slot: +Run the `BiocProject` function. Creates an object returned with the data +processing function with a PEP in its `metadata` slot: ```{r} bpRemote = BiocProject(file=ProjectConfigRemote) ``` @@ -100,6 +104,6 @@ bpRemote And the metadata ```{r} metadata(bpRemote) -samples(bpRemote) +sampleTable(bpRemote) config(bpRemote) ``` \ No newline at end of file diff --git a/vignettes/vignette6tximeta.Rmd b/vignettes/vignette6tximeta.Rmd new file mode 100644 index 0000000..8d6726e --- /dev/null +++ b/vignettes/vignette6tximeta.Rmd @@ -0,0 +1,309 @@ +--- +title: "Using BiocProject with tximeta" +author: "Michał Stolarczyk" +date: "2020-10-30" +output: BiocStyle::html_document +vignette: > + %\VignetteIndexEntry{Using BiocProject with tximeta"} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + + +# Introduction + +## Prerequisites + +This vignette demonstrates how to integrate BiocProject with the [tximeta Bioconductor package](https://www.bioconductor.org/packages/release/bioc/html/tximeta.html) for a really slick start-to-finish analysis of RNA-seq data. We assume you're familiar with BiocProject; if not, please start with [Getting started with `BiocProject` vignette](./vignette1getStarted.html) for basic instructions. + +## Introduction to Tximeta + +Tximeta is a package that imports transcript quantification files from the [salmon](https://salmon.readthedocs.io/en/latest/salmon.html) transcript quantifier. When importing, tximeta automatically annotates the data with the transcriptome used. How it works is that `salmon` records a unique identifier of the transcriptome it uses during quantification; then, tximeta reads this identifier and looks up metadata about those sequences using a local database of known transcriptome identifiers. For more details, refer to the [tximeta GitHub repository](https://github.com/mikelove/tximeta) or [publication in PLoS Computational Biology](https://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1007664). + +The `tximeta::tximeta` function takes as input a `data.frame` (`coldata`) object that, for Salmon results, points to a quantification results directory for each sample. The `tximeta` function reads the `*.sa` files and returns a single `SummarizedExperiment` object with the Salmon-generated metadata in the object `metadata` slot. + +Since `SummarizedExperiment` inherits from the Bioconductor `Annotated` class, it fits perfectly into `BiocProject` output object class requirements. + + + +```r +suppressPackageStartupMessages(library(BiocProject)) +suppressPackageStartupMessages(library(SummarizedExperiment)) +is(SummarizedExperiment(), "Annotated") +``` + +``` +## [1] TRUE +``` + +## Advantages of using BiocProject with tximeta + +If we add BiocProject in to the tximeta workflow, then sample metadata from the PEP project specification can be easily plugged in! For example, if a researcher used a PEP to run Salmon to quantify reads across multiple samples with PEP-compatible workflow management engine/job scatterer like [Snakemake](https://snakemake.github.io/), [CWL](https://www.commonwl.org/), or [looper](https://looper.databio.org/), the same PEP would be ready to use with tximeta as long as the samples had `files` attribute defined. This could be done either via a `files` column in the sample table, or by using one of the sample modifiers provided by the PEP framework. The advantages of calling `tximport` within `BiocProject` include: + + - project portability, inherent to projects following PEP specification + - single source of metadata from start of the analysis to finish -- all the PEP-defined metadata will be propagated to the output object of the `tximeta` function automatically. It will be accessible from within your R session using the [pepr](http://code.databio.org/pepr/) API, or with `@PEP` in the `metadata` slot of the `SummarizedExperiment` object, just as any other metadata attached to the result by `tximeta` function. + +Let's show you how this work with a simple demo. + +# Demo of the BiocProject + tximeta workflow + +## Download example data + +First, let's download some RNA-seq counts from salmon, described in PEP format: + + +```r +if (basename(getwd()) != "long_vignettes") setwd("long_vignettes") +pth = BiocFileCache::bfcrpath( + BiocFileCache::BiocFileCache(getwd()), + "http://big.databio.org/example_data/tximeta_pep.tar.gz" + ) +utils::untar(tarfile=pth) +abs_pep_path = file.path(getwd(), "tximeta_pep") +abs_cfg_path = file.path(abs_pep_path, "project_config.yaml") +``` + +Let's take a look at what we have here... + +## Examine and load the PEP into R + +The `Biocproject` + `tximeta` workflow requires a PEP. The example we just downloaded looks like this: + + +``` + pep_version: 2.0.0 + sample_table: sample_table.csv + sample_modifiers: + append: + files: FILE_PATH_PLACEHOLDER + derive: + attributes: files + sources: + FILE_PATH_PLACEHOLDER: $TXIMPORTDATA/salmon_dm/{names}/quant.sf + bioconductor: + readFunName: readTximeta + readFunPath: readTximeta.R +``` + +As you can see, this PEP configuration file uses a `$TXIMPORTDATA` environment variable to specify a file path. This is just an optional way to make this PEP work in any computing environment without being changed, so you can share your sample metadata more easily. For this vignette, we need to set the variable to the output directory where our downloaded results are stored: + + +```r +Sys.setenv("TXIMPORTDATA"=file.path(abs_pep_path, "/tximportData")) +``` + + + +Now, look at the `sample_table` key in the configuration file. It points to the second major part of a PEP: the +sample table CSV file (`sample_table.csv`). Check out the contents of that file: + + + + + + + + + + + + + + +
names condition
SRR1197474 A
+ +This sample table lacks the `files` column required by tximeta -- but this file is sufficient, since BiocProject, or more specifically pepr, will take care of constructing the portable `files` sample attribute automatically via `sample_modifiers.derive`, where the config file above specifies the `files` attribute and its path. + +Now we can load the file with BiocProject... but first, a short detour + +## Detour: the magic of PEP sample modifiers + +Before we jump into using `BiocProject`, let's take a minute to demonstrate how using the PEP helps us out here. Let's read in our PEP using the the generic `Project` function from `pepr`: + + + +```r +p=Project(abs_cfg_path) +``` + +We now have our PEP project read in, and we can see what is found in the sample table: + + +```r +sampleTable(p) +``` + +``` +## names condition +## 1: SRR1197474 A +## files +## 1: /home/nsheff/code/BiocProject/long_vignettes/tximeta_pep/tximportData/salmon_dm/SRR1197474/quant.sf +``` + +See how our sample table has now been automatically updated with the `files` attribute? *That* is the magic of the PEP sample modifiers. It's that simple. Now, let's move on to demonstrate what `BiocProject` adds. + +## The BiocProject data processing function + +If you look again at our configuration file above, you'll notice the `biconductor` section in the configuration file, which defines a function name and R script. These specify the BiocProject data processing function, which in this case, is simply a `tximeta` call that uses the PEP-managed processed sample table its input. Here's what that function looks like: + + +``` +function(pep) { + require(tximeta) + return(tximeta::tximeta(pep@samples)) +} +``` + +## Loading in the data with BiocProject + +We have everything we need: a salmon output file, a PEP that specifies a sample table and provides the `files` column, and a function that uses `tximeta` to create the final `SummarizedExperiment` object. Now, we can call `BiocProject` function: + + +```r +require(tximeta) +bp = BiocProject(abs_cfg_path) +``` + +The output of `BiocProject` function, the `bp` object in our case, is magical. In one object, it supports the functionality of `SummarizedExperiment`, `tximeta`, and `pepr`. Observe: + +### The BiocProject output supports SummarizedExperiment functions + +It is a `RangedSummarizedExperiment`, so it supports all methods defined in SummarizedExperiment package: + + +```r +suppressPackageStartupMessages(library(SummarizedExperiment)) +colData(bp) +``` + +``` +## DataFrame with 1 row and 2 columns +## names condition +## +## SRR1197474 SRR1197474 A +``` + +```r +assayNames(bp) +``` + +``` +## [1] "counts" "abundance" "length" +``` + +```r +rowRanges(bp) +``` + +``` +## GRanges object with 33706 ranges and 9 metadata columns: +## seqnames ranges strand | tx_id tx_biotype +## | +## FBtr0070129 X 656673-657899 + | FBtr0070129 protein_coding +## FBtr0070126 X 656356-657899 + | FBtr0070126 protein_coding +## FBtr0070128 X 656673-657899 + | FBtr0070128 protein_coding +## FBtr0070124 X 656114-657899 + | FBtr0070124 protein_coding +## FBtr0070127 X 656356-657899 + | FBtr0070127 protein_coding +## ... ... ... ... . ... ... +## FBtr0114299 2R 21325218-21325323 + | FBtr0114299 snoRNA +## FBtr0113582 3R 5598638-5598777 - | FBtr0113582 snoRNA +## FBtr0091635 3L 1488906-1489045 + | FBtr0091635 snoRNA +## FBtr0113599 3L 261803-261953 - | FBtr0113599 snoRNA +## FBtr0113600 3L 831870-832008 - | FBtr0113600 snoRNA +## tx_cds_seq_start tx_cds_seq_end gene_id tx_support_level +## +## FBtr0070129 657110 657595 FBgn0025637 +## FBtr0070126 657110 657595 FBgn0025637 +## FBtr0070128 657110 657595 FBgn0025637 +## FBtr0070124 657110 657595 FBgn0025637 +## FBtr0070127 657110 657595 FBgn0025637 +## ... ... ... ... ... +## FBtr0114299 FBgn0086023 +## FBtr0113582 FBgn0082989 +## FBtr0091635 FBgn0086670 +## FBtr0113599 FBgn0083014 +## FBtr0113600 FBgn0083057 +## tx_id_version gc_content tx_name +## +## FBtr0070129 FBtr0070129 44.7641 FBtr0070129 +## FBtr0070126 FBtr0070126 44.8128 FBtr0070126 +## FBtr0070128 FBtr0070128 44.7974 FBtr0070128 +## FBtr0070124 FBtr0070124 43.8859 FBtr0070124 +## FBtr0070127 FBtr0070127 44.8571 FBtr0070127 +## ... ... ... ... +## FBtr0114299 FBtr0114299 35.8491 FBtr0114299 +## FBtr0113582 FBtr0113582 32.8571 FBtr0113582 +## FBtr0091635 FBtr0091635 45.0000 FBtr0091635 +## FBtr0113599 FBtr0113599 48.3444 FBtr0113599 +## FBtr0113600 FBtr0113600 44.6043 FBtr0113600 +## ------- +## seqinfo: 25 sequences (1 circular) from BDGP6.22 genome +``` + +Naturally, we can use tximeta methods: + + +```r +retrieveDb(bp) +## EnsDb for Ensembl: +## |Backend: SQLite +## |Db type: EnsDb +## |Type of Gene ID: Ensembl Gene ID +## |Supporting package: ensembldb +## |Db created by: ensembldb package from Bioconductor +## |script_version: 0.3.5 +## |Creation time: Tue Nov 19 08:33:44 2019 +## |ensembl_version: 98 +## |ensembl_host: localhost +## |Organism: Drosophila melanogaster +## |taxonomy_id: 7227 +## |genome_build: BDGP6.22 +## |DBSCHEMAVERSION: 2.1 +## | No. of genes: 17753. +## | No. of transcripts: 34802. +## |Protein data available. +``` + +But wait, there's more! The `PEP` metadata information has been attached to the metadata as well. Let's extract the `Project` object from the result with `getProject` method: + + +```r +getProject(bp) +## PEP project object. Class: Project +## file: +## /home/nsheff/code/BiocProject/long_vignettes/tximeta_pep/project_config.yaml +## samples: 1 +``` + +You can use the `pepr` API for any R-based PEP processing tools: + + +```r +sampleTable(bp) +## names condition +## 1: SRR1197474 A +## files +## 1: /home/nsheff/code/BiocProject/long_vignettes/tximeta_pep/tximportData/salmon_dm/SRR1197474/quant.sf +config(bp) +## Config object. Class: Config +## pep_version: 2.0.0 +## sample_table: +## /home/nsheff/code/BiocProject/long_vignettes/tximeta_pep/sample_table.csv +## sample_modifiers: +## append: +## files: FILE_PATH_PLACEHOLDER +## derive: +## attributes: files +## sources: +## FILE_PATH_PLACEHOLDER: +## /home/nsheff/code/BiocProject/long_vignettes/tximeta_pep/tximportData/salmon_dm/{names}/quant.sf +## bioconductor: +## readFunName: readTximeta +## readFunPath: +## /home/nsheff/code/BiocProject/long_vignettes/tximeta_pep/readTximeta.R +## name: tximeta_pep +``` + +# Conclusion + +If you format your project metadata according to the PEP specification, it will be ready to use with tximeta and the resulting object will include project-wide metadata and expose [pepr](http://code.databio.org/pepr/) API for any PEP-compatible R packages for downstream analysis.