From 2de7232f6e0eee2837c4e393c92f60b85002bead Mon Sep 17 00:00:00 2001 From: Kevin Rue-Albrecht Date: Thu, 4 Jul 2024 16:41:38 +0100 Subject: [PATCH] rescue parseCSQToGRanges from ensemblVEP (#12) * bump x.y.z version to even y prior to creation of RELEASE_3_14 branch * bump x.y.z version to odd y following creation of RELEASE_3_14 branch * bump x.y.z version to even y prior to creation of RELEASE_3_15 branch * bump x.y.z version to odd y following creation of RELEASE_3_15 branch * bump x.y.z version to even y prior to creation of RELEASE_3_16 branch * bump x.y.z version to odd y following creation of RELEASE_3_16 branch * bump x.y.z version to even y prior to creation of RELEASE_3_17 branch * bump x.y.z version to odd y following creation of RELEASE_3_17 branch * bump x.y.z version to even y prior to creation of RELEASE_3_18 branch * bump x.y.z version to odd y following creation of RELEASE_3_18 branch * bump x.y.z version to even y prior to creation of RELEASE_3_19 branch * bump x.y.z version to odd y following creation of RELEASE_3_19 branch * rescue parseCSQToGRanges from ensemblVEP * fix import, export, and documentation * remove github action --------- Co-authored-by: Nitesh Turaga Co-authored-by: J Wokaty Co-authored-by: J Wokaty --- .github/workflows/build_check_deploy.yaml | 112 ---------------------- .github/workflows/check-bioc.yml | 50 +++++----- DESCRIPTION | 50 +++++++--- NAMESPACE | 13 ++- R/AllGenerics.R | 7 ++ R/parseCSQToGRanges.R | 41 ++++++++ inst/NEWS.Rd | 8 ++ man/VcfBasicRules-class.Rd | 60 +++++------- man/VcfFilterRules-class.Rd | 34 +++---- man/parseCSQToGRanges.Rd | 95 ++++++++++++++++++ man/vepInPhenoLevel-methods.Rd | 3 +- vignettes/VcfFilterRules.Rmd | 2 +- 12 files changed, 270 insertions(+), 205 deletions(-) delete mode 100644 .github/workflows/build_check_deploy.yaml create mode 100644 R/parseCSQToGRanges.R create mode 100644 man/parseCSQToGRanges.Rd diff --git a/.github/workflows/build_check_deploy.yaml b/.github/workflows/build_check_deploy.yaml deleted file mode 100644 index 9a4e055..0000000 --- a/.github/workflows/build_check_deploy.yaml +++ /dev/null @@ -1,112 +0,0 @@ -on: - push: - pull_request: - branches: - - main - -name: build_check_deploy - -jobs: - build_check_deploy: - runs-on: ubuntu-latest - container: bioconductor/bioconductor_docker:devel - env: - R_REMOTES_NO_ERRORS_FROM_WARNINGS: true - NOT_CRAN: true - TZ: UTC - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} - steps: - - name: Checkout - uses: actions/checkout@v1 - - - name: Query dependencies - run: | - install.packages('remotes') - saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2) - shell: Rscript {0} - - # This lets us augment with additional dependencies - - name: Install system dependencies - if: runner.os == 'Linux' - env: - RHUB_PLATFORM: linux-x86_64-ubuntu-gcc - run: | - Rscript -e "remotes::install_github('r-hub/sysreqs')" - sysreqs=$(Rscript -e "cat(sysreqs::sysreq_commands('DESCRIPTION'))") - echo "$sysreqs" - - - name: Install dependencies - run: | - remotes::install_deps(dependencies = TRUE, repos = BiocManager::repositories(), Ncpu = 2L) - remotes::install_cran("rcmdcheck") - shell: Rscript {0} - - - name: Install BiocCheck - run: | - BiocManager::install("BiocCheck") - shell: Rscript {0} - - - name: BiocCheck - run: | - BiocCheck::BiocCheck(".") - shell: Rscript {0} - - - name: R CMD check - env: - _R_CHECK_CRAN_INCOMING_REMOTE_: false - run: rcmdcheck::rcmdcheck(args = c("--no-manual"), error_on = "error", check_dir = "check") - shell: Rscript {0} - - - name: Show testthat output - if: always() - run: find check -name 'testthat.Rout*' -exec cat '{}' \; || true - shell: bash - - - name: Upload check results - if: failure() - uses: actions/upload-artifact@master - with: - name: check-results - path: check - - - name: Test coverage - run: | - install.packages("covr") - covr::codecov(token = "${{secrets.CODECOV_TOKEN}}") - shell: Rscript {0} - - - name: Build pkgdown - if: github.event_name == 'push' && github.ref == 'refs/heads/main' - run: | - PATH=$PATH:$HOME/bin/ Rscript -e 'BiocManager::install("pkgdown"); pkgdown::build_site(".")' - - # deploy needs rsync? Seems so. - - name: Install deploy dependencies - if: github.event_name == 'push' && github.ref == 'refs/heads/main' - run: | - apt-get update && apt-get -y install rsync - - - name: Deploy 🚀 - if: github.event_name == 'push' && github.ref == 'refs/heads/main' - uses: JamesIves/github-pages-deploy-action@releases/v3 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - BRANCH: gh-pages # The branch the action should deploy to. - FOLDER: docs # The folder the action should deploy. - - - name: Session info - run: | - options(width = 100) - pkgs <- installed.packages()[, "Package"] - sessioninfo::session_info(pkgs, include_base = TRUE) - shell: Rscript {0} - - - name: Publish to Registry - if: github.event_name == 'push' && github.ref == 'refs/heads/main' - uses: elgohr/Publish-Docker-Github-Action@master - with: - name: docker.pkg.github.com/${{ github.repository }}/TVTB:latest - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - registry: docker.pkg.github.com diff --git a/.github/workflows/check-bioc.yml b/.github/workflows/check-bioc.yml index d08d705..9d67b4d 100644 --- a/.github/workflows/check-bioc.yml +++ b/.github/workflows/check-bioc.yml @@ -52,9 +52,9 @@ jobs: fail-fast: false matrix: config: - - { os: ubuntu-latest, r: '4.1', bioc: '3.14', cont: "bioconductor/bioconductor_docker:devel", rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest" } - - { os: macOS-latest, r: '4.1', bioc: '3.14'} - ##- { os: windows-latest, r: '4.1', bioc: '3.14'} + - { os: ubuntu-latest, r: 'devel', bioc: '3.20', cont: "bioconductor/bioconductor_docker:devel", rspm: "https://packagemanager.rstudio.com/cran/__linux__/jammy/latest" } + - { os: macOS-latest, r: 'devel', bioc: '3.20'} + # - { os: windows-latest, r: 'devel', bioc: '3.20'} ## Check https://github.com/r-lib/actions/tree/master/examples ## for examples using the http-user-agent env: @@ -79,12 +79,12 @@ jobs: ## https://github.com/r-lib/actions/blob/master/examples/check-standard.yaml ## If they update their steps, we will also need to update ours. - name: Checkout Repository - uses: actions/checkout@v2 + uses: actions/checkout@v3 ## R is already included in the Bioconductor docker images - name: Setup R from r-lib if: runner.os != 'Linux' - uses: r-lib/actions/setup-r@master + uses: r-lib/actions/setup-r@v2 with: r-version: ${{ matrix.config.r }} http-user-agent: ${{ matrix.config.http-user-agent }} @@ -92,7 +92,7 @@ jobs: ## pandoc is already included in the Bioconductor docker images - name: Setup pandoc from r-lib if: runner.os != 'Linux' - uses: r-lib/actions/setup-pandoc@master + uses: r-lib/actions/setup-pandoc@v2 - name: Query dependencies run: | @@ -102,19 +102,19 @@ jobs: - name: Restore R package cache if: "!contains(github.event.head_commit.message, '/nocache') && runner.os != 'Linux'" - uses: actions/cache@v2 + uses: actions/cache@v3 with: path: ${{ env.R_LIBS_USER }} - key: ${{ env.cache-version }}-${{ runner.os }}-biocversion-devel-r-4.1-${{ hashFiles('.github/depends.Rds') }} - restore-keys: ${{ env.cache-version }}-${{ runner.os }}-biocversion-devel-r-4.1- + key: ${{ env.cache-version }}-${{ runner.os }}-biocversion-devel-r-devel-${{ hashFiles('.github/depends.Rds') }} + restore-keys: ${{ env.cache-version }}-${{ runner.os }}-biocversion-devel-r-devel- - name: Cache R packages on Linux if: "!contains(github.event.head_commit.message, '/nocache') && runner.os == 'Linux' " - uses: actions/cache@v2 + uses: actions/cache@v3 with: path: /home/runner/work/_temp/Library - key: ${{ env.cache-version }}-${{ runner.os }}-biocversion-devel-r-4.1-${{ hashFiles('.github/depends.Rds') }} - restore-keys: ${{ env.cache-version }}-${{ runner.os }}-biocversion-devel-r-4.1- + key: ${{ env.cache-version }}-${{ runner.os }}-biocversion-devel-r-devel-${{ hashFiles('.github/depends.Rds') }} + restore-keys: ${{ env.cache-version }}-${{ runner.os }}-biocversion-devel-r-devel- - name: Install Linux system dependencies if: runner.os == 'Linux' @@ -143,6 +143,9 @@ jobs: ## Required for tcltk brew install xquartz --cask + ## Required for terra + brew install gdal + - name: Install Windows system dependencies if: runner.os == 'Windows' run: | @@ -176,7 +179,7 @@ jobs: gha_repos <- if( .Platform$OS.type == "unix" && Sys.info()["sysname"] != "Darwin" ) c( - "AnVIL" = "https://bioconductordocker.blob.core.windows.net/packages/3.14/bioc", + "AnVIL" = "https://bioconductordocker.blob.core.windows.net/packages/3.20/bioc", BiocManager::repositories() ) else BiocManager::repositories() @@ -210,15 +213,15 @@ jobs: shell: Rscript {0} - name: Install covr - if: github.ref == 'refs/heads/master' && env.run_covr == 'true' && runner.os == 'Linux' + if: github.ref == 'refs/heads/devel' && env.run_covr == 'true' && runner.os == 'Linux' run: | remotes::install_cran("covr") shell: Rscript {0} - name: Install pkgdown - if: github.ref == 'refs/heads/master' && env.run_pkgdown == 'true' && runner.os == 'Linux' + if: github.ref == 'refs/heads/devel' && env.run_pkgdown == 'true' && runner.os == 'Linux' run: | - remotes::install_github("r-lib/pkgdown") + remotes::install_cran("pkgdown") shell: Rscript {0} - name: Session info @@ -259,27 +262,28 @@ jobs: run: | BiocCheck::BiocCheck( dir('check', 'tar.gz$', full.names = TRUE), - `quit-with-status` = TRUE, + `quit-with-status` = FALSE, `no-check-R-ver` = TRUE, `no-check-bioc-help` = TRUE ) shell: Rscript {0} - name: Test coverage - if: github.ref == 'refs/heads/master' && env.run_covr == 'true' && runner.os == 'Linux' + if: github.ref == 'refs/heads/devel' && env.run_covr == 'true' && runner.os == 'Linux' run: | covr::codecov() shell: Rscript {0} - name: Install package - if: github.ref == 'refs/heads/master' && env.run_pkgdown == 'true' && runner.os == 'Linux' + if: github.ref == 'refs/heads/devel' && env.run_pkgdown == 'true' && runner.os == 'Linux' run: R CMD INSTALL . - name: Build and deploy pkgdown site - if: github.ref == 'refs/heads/master' && env.run_pkgdown == 'true' && runner.os == 'Linux' + if: github.ref == 'refs/heads/devel' && env.run_pkgdown == 'true' && runner.os == 'Linux' run: | - git config --local user.name "$GITHUB_ACTOR" - git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com" + git config --global user.name "$GITHUB_ACTOR" + git config --global user.email "$GITHUB_ACTOR@users.noreply.github.com" + git config --global --add safe.directory /__w/kevinrue/TVTB Rscript -e "pkgdown::deploy_to_branch(new_process = FALSE)" shell: bash {0} ## Note that you need to run pkgdown::deploy_to_branch(new_process = FALSE) @@ -291,7 +295,7 @@ jobs: if: failure() uses: actions/upload-artifact@master with: - name: ${{ runner.os }}-biocversion-devel-r-4.1-results + name: ${{ runner.os }}-biocversion-devel-r-devel-results path: check - uses: docker/build-push-action@v1 diff --git a/DESCRIPTION b/DESCRIPTION index 55ff596..04ad1fb 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: TVTB Type: Package Title: TVTB: The VCF Tool Box -Version: 1.19.1 -Date: 2021-08-30 +Version: 1.31.1 +Date: 2024-07-04 Authors@R: person("Kevin", "Rue-Albrecht", role = c("aut", "cre"), email = "kevinrue67@gmail.com") Description: The package provides S4 classes and methods to filter, @@ -14,20 +14,47 @@ Description: The package provides S4 classes and methods to filter, Shiny Variant Explorer (tSVE). License: Artistic-2.0 Depends: R (>= 3.4), methods, utils, stats -Imports: AnnotationFilter, BiocGenerics (>= 0.25.1), BiocParallel, Biostrings, - ensembldb, ensemblVEP, GenomeInfoDb, GenomicRanges, GGally, ggplot2, Gviz, - limma, IRanges (>= 2.21.6), reshape2, Rsamtools, S4Vectors (>= 0.25.14), - SummarizedExperiment, VariantAnnotation (>= 1.19.9) +Imports: + AnnotationFilter, + BiocGenerics (>= 0.25.1), + BiocParallel, + Biostrings, + ensembldb, + GenomeInfoDb, + GenomicRanges, + GGally, + ggplot2, + Gviz, + limma, + IRanges (>= 2.21.6), + reshape2, + Rsamtools, + S4Vectors (>= 0.25.14), + SummarizedExperiment, + VariantAnnotation (>= 1.19.9) Suggests: EnsDb.Hsapiens.v75 (>= 0.99.7), shiny (>= 0.13.2.9005), DT (>= 0.1.67), rtracklayer, BiocStyle (>= 2.5.19), knitr (>= 1.12), rmarkdown, testthat, covr, pander -biocViews: Software, Genetics, GeneticVariability, GenomicVariation, - DataRepresentation, GUI, Genetics, DNASeq, WholeGenome, - Visualization, MultipleComparison, DataImport, - VariantAnnotation, Sequencing, Coverage, Alignment, +biocViews: + Software, + Genetics, + GeneticVariability, + GenomicVariation, + DataRepresentation, + GUI, + Genetics, + DNASeq, + WholeGenome, + Visualization, + MultipleComparison, + DataImport, + VariantAnnotation, + Sequencing, + Coverage, + Alignment, SequenceMatching Collate: utils.R tSVE.R AllClasses.R AllGenerics.R Genotypes-class.R - TVTBparam-class.R VcfFilterRules-class.R countGenos-methods.R + TVTBparam-class.R VcfFilterRules-class.R parseCSQToGRanges.R countGenos-methods.R autodetectGenotypes.R addCountGenos-methods.R addFrequencies-methods.R addOverallFrequencies-methods.R addPhenoLevelFrequencies-methods.R dropInfo.R readVcf-methods.R @@ -36,4 +63,3 @@ Collate: utils.R tSVE.R AllClasses.R AllGenerics.R Genotypes-class.R VignetteBuilder: knitr URL: https://github.com/kevinrue/TVTB BugReports: https://github.com/kevinrue/TVTB/issues -RoxygenNote: 7.1.1 diff --git a/NAMESPACE b/NAMESPACE index 97fc3fd..ca00a06 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -39,7 +39,7 @@ importFrom( importFrom( "stats", - "as.formula" + "as.formula", "setNames" ) # When Source and Version will be possible to add to VCF INFO fields @@ -71,8 +71,6 @@ importFrom( "vcfSamples<-", "vcfWhich<-", "vcfFixed<-", "vcfInfo<-", "vcfGeno<-" ) -importFrom("ensemblVEP", "parseCSQToGRanges") - importFrom( "ggplot2", "ggplot", "aes_string", "facet_wrap", @@ -96,6 +94,12 @@ importFrom( "reshape2", "melt", "dcast") +importFrom( + "GenomeInfoDb", + "genome", + "genome<-" +) + #exportPattern("^[[:alpha:]]+") # runShinyApp.R ---- @@ -142,7 +146,8 @@ exportClasses( # Default methods export( "TVTBparam", "Genotypes", - "VcfFixedRules", "VcfInfoRules", "VcfVepRules", "VcfFilterRules" + "VcfFixedRules", "VcfInfoRules", "VcfVepRules", "VcfFilterRules", + "parseCSQToGRanges" ) # Re-export useful methods defined in other packages ---- diff --git a/R/AllGenerics.R b/R/AllGenerics.R index 0f2391f..e6a0d12 100644 --- a/R/AllGenerics.R +++ b/R/AllGenerics.R @@ -187,3 +187,10 @@ setGeneric( function(x, value) standardGeneric("svp<-") ) + +# ensemblVEP ---- + +setGeneric("parseCSQToGRanges", signature = "x", + function(x, VCFRowID=TRUE, ...) + standardGeneric("parseCSQToGRanges") +) diff --git a/R/parseCSQToGRanges.R b/R/parseCSQToGRanges.R new file mode 100644 index 0000000..b16ed6d --- /dev/null +++ b/R/parseCSQToGRanges.R @@ -0,0 +1,41 @@ +setMethod("parseCSQToGRanges", "VCF", + function(x, VCFRowID=character(), ..., info.key="CSQ") + { + ## no 'info.key' + if (!info.key %in% names(info(x))) + return(rowRanges(x)) + + hdr <- info(header(x))[info.key, "Description"] + nms <- unlist(strsplit(strsplit(hdr, "Format: ")[[1]][2], "\\|")) + ulst <- unlist(info(x)[[info.key]], use.names=FALSE) + ## 'info.key' without data + if (all(is.na(ulst))) { + gr <- rowRanges(x) + csq <- + DataFrame(setNames(replicate(length(nms), character(0)), nms)) + } else { + ## 'info.key' with data + elt <- elementNROWS(info(x)[[info.key]]) + raw <- strsplit(ulst, "\\|") + csq <- matrix(nrow=length(ulst), ncol=length(nms)) + for (i in 1:nrow(csq)) + csq[i, 1:length(raw[[i]])] <- raw[[i]] + csq[!nzchar(csq)] <- NA + colnames(csq) <- nms + csq <- data.frame(csq, stringsAsFactors=FALSE) + + rd <- rowRanges(x) + gr <- rd[rep(seq_along(rd), elt)] + if (length(VCFRowID)) { + if (any(no_match <- !VCFRowID %in% rownames(x))) + warning(paste0("rownames not found in 'x' : ", + paste(VCFRowID[no_match], collapse=","))) + VCFRowID <- rep(match(rownames(x), VCFRowID), elt) + csq <- DataFrame(VCFRowID=VCFRowID, csq) + } + } + mcols(gr) <- csq + genome(gr) <- genome(x) + gr + } +) diff --git a/inst/NEWS.Rd b/inst/NEWS.Rd index fa51df6..0f9ddde 100644 --- a/inst/NEWS.Rd +++ b/inst/NEWS.Rd @@ -1,6 +1,14 @@ \name{TVTB-NEWS} \title{NEWS: The VCF Tool Box} +\section{Changes in version 1.31.1 2024-07-04}{ +\subsection{Bug fix}{ +\itemize{ +\item Copy \code{parseCSQToGRanges} from \code{ensemblVEP} +} +} +} + \section{Changes in version 1.19.1 2021-08-30}{ \subsection{Bug fix}{ \itemize{ diff --git a/man/VcfBasicRules-class.Rd b/man/VcfBasicRules-class.Rd index 2a87f4c..97542aa 100644 --- a/man/VcfBasicRules-class.Rd +++ b/man/VcfBasicRules-class.Rd @@ -109,7 +109,6 @@ decribed in this help page, except when specified otherwise. } \section{Constructors}{ -\describe{}{ \code{VcfFixedRules(exprs = list(), ..., active = TRUE)} \code{VcfInfoRules(exprs = list(), ..., active = TRUE)} @@ -123,33 +122,24 @@ which is recycled as necessary. See the constructor of \code{FilterRules} for more details. } -} \section{Subsetting and Replacement}{ In the following code snippets \code{x} and \code{value} are objects from any of the classes described in this help page. -\describe{ - \item{}{ - \code{x[i]}: Subsets the filter rules using the same interface as for - \code{\linkS4class{List}}. - } - \item{}{ - \code{x[[i]]}: Extracts an expression or function via the same interface - as for \code{\linkS4class{List}}. - } - \item{}{ - \code{x[i] <- value}: Replaces a filter rule by one of the - \strong{same} class. - The active state(s) and name(s) are transferred from \code{value} to - \code{x}. - } - \item{}{ - \code{x[[i]] <- value}: - The same interface as for \code{\linkS4class{List}}. - The default active state for new rules is TRUE. - } -} +\itemize{ + \item{\code{x[i]}: Subsets the filter rules using the same interface as for + \code{\linkS4class{List}}.} + \item{\code{x[[i]]}: Extracts an expression or function via the same interface + as for \code{\linkS4class{List}}.} + \item{\code{x[i] <- value}: Replaces a filter rule by one of the + \strong{same} class. + The active state(s) and name(s) are transferred from \code{value} to + \code{x}.} + \item{\code{x[[i]] <- value}: + The same interface as for \code{\linkS4class{List}}. + The default active state for new rules is TRUE.} + } } \section{Combining}{ @@ -157,15 +147,15 @@ In the following code snippets \code{x}, \code{values}, and \code{...} are objects from any of the classes described in this help page, or \code{VcfFilterRules}. -\describe{ - \item{}{ - \code{append(x, values, after = length(x))}: - Appends the values onto \code{x} at the index given by \code{after}. - } - \item{}{ - \code{c(x, ...,)}: - Concatenates the filters objects in \code{...} onto the end of \code{x}. - } +\itemize{ + \item{ + \code{append(x, values, after = length(x))}: + Appends the values onto \code{x} at the index given by \code{after}. + } + \item{ + \code{c(x, ...,)}: + Concatenates the filters objects in \code{...} onto the end of \code{x}. + } } Note that combining rules of different types @@ -175,8 +165,8 @@ produces a \code{VcfFilterRules} object. \section{Evaluating}{ As described in the \code{S4Vectors} documentation: -\describe{ -\item{}{ +\itemize{ +\item{ \code{eval(expr, envir, enclos)}: Evaluates a rule instance (passed as the \code{expr} argument) in their respective context of a \code{VCF} object @@ -190,7 +180,7 @@ As described in the \code{S4Vectors} documentation: \item{\strong{\code{FilterRules}}: \code{envir}} } } -\item{}{ +\item{ \code{evalSeparately(expr, envir, enclos)}: \code{subsetByFilter(x, filter)} diff --git a/man/VcfFilterRules-class.Rd b/man/VcfFilterRules-class.Rd index 9144b19..35c5958 100644 --- a/man/VcfFilterRules-class.Rd +++ b/man/VcfFilterRules-class.Rd @@ -70,19 +70,21 @@ In the following code snippets \code{x} is a \code{VcfFilterRules} object. } \section{Constructors}{ -\describe{}{ -\code{VcfFilterRules(...)} -constructs an \code{VcfFilterRules} object from -\code{VcfFixedRules}, \code{VcfInfoRules}, \code{VcfVepRules}, -and \code{VcfFilterRules} objects in \code{...}. +\itemize{ +\item{ + \code{VcfFilterRules(...)} + constructs an \code{VcfFilterRules} object from + \code{VcfFixedRules}, \code{VcfInfoRules}, \code{VcfVepRules}, + and \code{VcfFilterRules} objects in \code{...}. +} } } \section{Subsetting and Replacement}{ In the code snippets below, \code{x} is a \code{VcfFilterRules} object. -\describe{ - \item{}{ +\itemize{ + \item{ \code{x[i, drop = TRUE]}: Subsets the filter rules using the same interface as for \code{\linkS4class{Vector}}. If all filter rules are of the same type and \code{drop=TRUE} (default), @@ -90,18 +92,18 @@ In the code snippets below, \code{x} is a \code{VcfFilterRules} object. if possible. In other words, if all remaining filter rules are of type \code{"vep"}, the object will be type as \code{VcfVepRules}. } - \item{}{ + \item{ \code{x[[i]]}: Extracts an expression or function via the same interface as for \code{\linkS4class{List}}. } - \item{}{ + \item{ \code{x[i] <- value}: Replaces a filter rule by one of any valid class (\code{VcfFixedRules}, \code{VcfInfoRules}, \code{VcfVepRules}, or \code{VcfFilterRules}). The active state(s), name(s), and type(s) (if applicable) are transferred from \code{value}. } - \item{}{ + \item{ \code{x[[i]] <- value}: The same interface as for \code{\linkS4class{List}}. The default active state for new rules is \code{TRUE}. @@ -116,12 +118,12 @@ while \code{values} and \code{...} are objects from any of the classes \code{VcfFixedRules}, \code{VcfInfoRules}, \code{VcfVepRules}, or \code{VcfFilterRules}: -\describe{ - \item{}{ +\itemize{ + \item{ \code{append(x, values, after = length(x))}: Appends the values onto \code{x} at the index given by \code{after}. } - \item{}{ + \item{ \code{c(x, ...,)}: Concatenates the filters objects in \code{...} onto the end of \code{x}. } @@ -130,15 +132,15 @@ or \code{VcfFilterRules}: \section{Evaluating}{ As described in the \code{S4Vectors} documentation: -\describe{ - \item{}{ +\itemize{ + \item{ \code{eval(expr, envir, enclos)} Evaluates each active rule in a \code{VcfFilterRules} instance (passed as the \code{expr} argument) in their respective context of a \code{VCF} object (passed as the \code{envir} argument). } - \item{}{ + \item{ \code{evalSeparately(expr, envir, enclos)}: \code{subsetByFilter(x, filter)} diff --git a/man/parseCSQToGRanges.Rd b/man/parseCSQToGRanges.Rd new file mode 100644 index 0000000..bd1d675 --- /dev/null +++ b/man/parseCSQToGRanges.Rd @@ -0,0 +1,95 @@ +\name{parseCSQToGRanges} + +\alias{parseCSQToGRanges} +\alias{parseCSQToGRanges,VCF-method} + +\title{ + Parse the CSQ column of a VCF object into a GRanges object +} + +\description{ + Parse the CSQ column in a VCF object returned from the Ensembl + Variant Effect Predictor (VEP). + + \strong{**This method was rescued following the deprecation of the package + \code{ensemblVEP} in the Bioconductor release \code{3.20}.**} +} + +\usage{ +\S4method{parseCSQToGRanges}{VCF}(x, VCFRowID=character(), + ..., info.key = "CSQ") +} + +\arguments{ + \item{x}{ + A \code{VCF} object. + } + \item{VCFRowID}{ + A \code{character} vector of rownames from the original VCF. + When provided, the result includes a metadata column named + \sQuote{VCFRowID} which maps the result back to the row + (variant) in the original VCF. + + When \code{VCFRowID} is not provided no \sQuote{VCFRowID} + column is included. + } + \item{info.key}{ + The name of the INFO key that VEP writes the consequences to in the output + (default is \code{CSQ}). This should only be used if something other that + \code{CSQ} was passed in the --vcf_info_field flag in the output options. + } + \item{\dots}{ + Arguments passed to other methods. Currently not used. + } +} + +\details{ + \describe{ + \item{-}{ + When \code{ensemblVEP} returns a \code{VCF} object, the consequence data + are returned unparsed in the 'CSQ' INFO column. \code{parseCSQToGRanges} + parses these data into a \code{GRanges} object that is expanded to match + the dimension of the 'CSQ' data. Because each variant can have multiple + matches, the ranges in the \code{GRanges} are repeated. + + If rownames from the original VCF are provided as \code{VCFRowID} a + metadata column is included in the result that maps back to the row + (variant) in the original VCF. This option is only applicable when the + \code{info.key} field has data (is not empty). + + If no \code{info.key} column is found the function returns the data in + \code{rowRanges()}. + } + } +} + +\value{ + Returns a \code{GRanges} object with consequence data as the + metadata columns. If no 'CSQ' column is found the \code{GRanges} + from \code{rowRanges()} is returned. +} + +\author{ + Valerie Obenchain, Kevin Rue-Albrecht +} + +\references{ + Ensembl VEP Home: + \url{http://uswest.ensembl.org/info/docs/tools/vep/index.html} +} + +\examples{ + library(VariantAnnotation) + file <- system.file("extdata", "moderate.vcf", package = "TVTB") + vep <- readVcf(file) + + ## The returned 'CSQ' data are unparsed. + info(vep)$CSQ + + ## Parse into a GRanges and include the 'VCFRowID' column. + vcf <- readVcf(file, "hg19") + csq <- parseCSQToGRanges(vep, VCFRowID=rownames(vcf)) + csq[1:4] +} + +\keyword{methods} diff --git a/man/vepInPhenoLevel-methods.Rd b/man/vepInPhenoLevel-methods.Rd index 0aa4a78..76a6aa1 100644 --- a/man/vepInPhenoLevel-methods.Rd +++ b/man/vepInPhenoLevel-methods.Rd @@ -32,7 +32,7 @@ considered. } \item{vepCol}{ VEP prediction fields; \code{character} vector of metadata columns in -\code{ensemblVEP::parseCSQToGRanges(vcf)}. +\code{parseCSQToGRanges(vcf)}. } \item{unique}{ If \code{TRUE}, consider only variants unique to the phenotype level @@ -66,7 +66,6 @@ Kevin Rue-Albrecht \seealso{ \code{\linkS4class{VCF}}, -\code{\link{ensemblVEP}}, \code{\linkS4class{GRanges}}, and \code{\linkS4class{DataFrame}}. } diff --git a/vignettes/VcfFilterRules.Rmd b/vignettes/VcfFilterRules.Rmd index c221061..b6280d3 100644 --- a/vignettes/VcfFilterRules.Rmd +++ b/vignettes/VcfFilterRules.Rmd @@ -168,7 +168,7 @@ fixedVcf <- colnames(fixed(vcf)) fixedVcf infoVcf <- colnames(info(vcf)) infoVcf -csq <- ensemblVEP::parseCSQToGRanges(x = evcf) +csq <- parseCSQToGRanges(x = evcf) vepVcf <- colnames(mcols(csq)) vepVcf ```