Skip to content

Commit

Permalink
Merge pull request #2196 from nsoranzo/deseq2_gff3
Browse files Browse the repository at this point in the history
 Fix DESeq2 with tximport from GFF3 annotation
  • Loading branch information
mvdbeek authored Dec 4, 2018
2 parents 600e99f + 2ce7a84 commit 5b6dc96
Show file tree
Hide file tree
Showing 5 changed files with 171 additions and 52 deletions.
2 changes: 1 addition & 1 deletion tools/deseq2/deseq2.R
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ spec <- matrix(c(
"plots" , "p", 1, "character",
"tximport", "i", 0, "logical",
"txtype", "y", 1, "character",
"tx2gene", "x", 1, "character", # a space-sep tx-to-gene map or GTF file (auto detect .gtf/.GTF)
"tx2gene", "x", 1, "character", # a space-sep tx-to-gene map or GTF/GFF3 file
"esf", "e", 1, "character",
"fit_type", "t", 1, "integer",
"many_contrasts", "m", 0, "logical",
Expand Down
69 changes: 49 additions & 20 deletions tools/deseq2/deseq2.xml
Original file line number Diff line number Diff line change
@@ -1,11 +1,16 @@
<tool id="deseq2" name="DESeq2" version="2.11.40.3">
<tool id="deseq2" name="DESeq2" version="2.11.40.4">
<description>Determines differentially expressed features from count tables</description>
<requirements>
<requirement type="package" version="1.18.1">bioconductor-deseq2</requirement>
<requirement type="package" version="1.6.0">bioconductor-tximport</requirement>
<requirement type="package" version="1.30.0">bioconductor-genomicfeatures</requirement>
<requirement type="package" version="0.6.5">r-ggrepel</requirement>
<requirement type="package" version="1.0.8">r-pheatmap</requirement>
<requirement type="package" version="1.20.0">bioconductor-deseq2</requirement>
<!-- Optional dependency of tximport, needed to import kallisto results https://github.com/galaxyproject/usegalaxy-playbook/issues/161 -->
<requirement type="package" version="2.24.0">bioconductor-rhdf5</requirement>
<requirement type="package" version="1.8.0">bioconductor-tximport</requirement>
<requirement type="package" version="1.32.3">bioconductor-genomicfeatures</requirement>
<requirement type="package" version="1.20.2">r-getopt</requirement>
<requirement type="package" version="0.8.0">r-ggrepel</requirement>
<requirement type="package" version="3.0.1">r-gplots</requirement>
<requirement type="package" version="1.0.10">r-pheatmap</requirement>
<requirement type="package" version="0.2.20">r-rjson</requirement>
</requirements>
<stdio>
<regex match="Execution halted"
Expand All @@ -27,7 +32,7 @@ echo $(R --version | grep version | grep -v GNU)", DESeq2 version" $(R --vanilla
<command><![CDATA[
#if $tximport.tximport_selector == 'tximport':
#if $tximport.mapping_format.mapping_format_selector == 'gtf':
ln -s '$tximport.mapping_format.gtf_file' mapping.gtf &&
ln -s '$tximport.mapping_format.gtf_file' mapping.gff &&
#else:
ln -s '$tximport.mapping_format.tabular_file' mapping.txt &&
#end if
Expand Down Expand Up @@ -92,7 +97,7 @@ Rscript '${__tool_directory__}/deseq2.R'
-i
-y $tximport.txtype
#if $tximport.mapping_format.mapping_format_selector == 'gtf':
-x mapping.gtf
-x mapping.gff
#else:
-x mapping.txt
#end if
Expand Down Expand Up @@ -133,14 +138,14 @@ Rscript '${__tool_directory__}/deseq2.R'
</param>
<conditional name="mapping_format">
<param name="mapping_format_selector" type="select" label="Gene mapping format">
<option value="gtf" selected="True">GTF</option>
<option value="tabular">Transcript-ID and Gene-ID mapping file</option>
<option value="gtf" selected="True">GTF/GFF3</option>
<option value="tabular">Transcript-ID to Gene-ID mapping file</option>
</param>
<when value="gtf">
<param name="gtf_file" type="data" format="gtf,gff3" label="GTF/GFF3 file with Transcript - Gene mapping"/>
<param name="gtf_file" type="data" format="gtf,gff3" label="GTF/GFF3 annotation file"/>
</when>
<when value="tabular">
<param name="tabular_file" type="data" format="tabular" label="Tabular file with Transcript - Gene mapping"/>
<param name="tabular_file" type="data" format="tabular" label="Tabular file with Transcript-ID to Gene-ID mapping"/>
</when>
</conditional>
</when>
Expand Down Expand Up @@ -190,7 +195,7 @@ Rscript '${__tool_directory__}/deseq2.R'
help=" DESeq2 performs independent filtering by default using the mean of normalized counts as a filter statistic" />
</inputs>
<outputs>
<data format="tabular" name="deseq_out" label="DESeq2 result file on ${on_string}">
<data name="deseq_out" format="tabular" label="DESeq2 result file on ${on_string}">
<filter>many_contrasts is False</filter>
<actions>
<action name="column_names" type="metadata" default="GeneID,Base mean,log2(FC),StdErr,Wald-Stats,P-value,P-adj" />
Expand All @@ -200,16 +205,16 @@ Rscript '${__tool_directory__}/deseq2.R'
<filter>many_contrasts is True</filter>
<discover_datasets pattern="None.(?P&lt;designation&gt;.+_vs_.+)" format="tabular" directory="." visible="false"/>
</collection>
<data format="pdf" name="plots" label="DESeq2 plots on ${on_string}">
<data name="plots" format="pdf" label="DESeq2 plots on ${on_string}">
<filter>pdf == True</filter>
</data>
<data format="tabular" name="counts_out" label="Normalized counts file on ${on_string}">
<data name="counts_out" format="tabular" label="Normalized counts file on ${on_string}">
<filter>normCounts == True</filter>
</data>
<data format="tabular" name="rlog_out" label="rLog-Normalized counts file on ${on_string}">
<data name="rlog_out" format="tabular" label="rLog-Normalized counts file on ${on_string}">
<filter>normRLog == True</filter>
</data>
<data format="tabular" name="vst_out" label="VST-Normalized counts file on ${on_string}">
<data name="vst_out" format="tabular" label="VST-Normalized counts file on ${on_string}">
<filter>normVST == True</filter>
</data>
</outputs>
Expand Down Expand Up @@ -251,7 +256,7 @@ Rscript '${__tool_directory__}/deseq2.R'
</output>
<output name="deseq_out" >
<assert_contents>
<has_text_matching expression="FBgn0003360\t1933.9504.*\t-2.8399.*\t0.1309.*-21.6851.*2.831.*8.024" />
<has_text_matching expression="FBgn0003360\t1933\.9504.*\t-2\.8399.*\t0\.1309.*\t-21\.68.*\t.*e-104\t.*e-101" />
</assert_contents>
</output>
</test>
Expand Down Expand Up @@ -315,7 +320,7 @@ Rscript '${__tool_directory__}/deseq2.R'
</output>
<output name="deseq_out" >
<assert_contents>
<has_text_matching expression="FBgn0003360\t1933.9504.*\t-2.8399.*\t0.1309.*-21.6851.*2.831.*8.024" />
<has_text_matching expression="FBgn0003360\t1933\.9504.*\t-2\.8399.*\t0\.1309.*\t-21\.68.*\t.*e-104\t.*e-101" />
</assert_contents>
</output>
</test>
Expand All @@ -339,7 +344,31 @@ Rscript '${__tool_directory__}/deseq2.R'
<param name="tabular_file" value="tx2gene.tab"/>
<output name="deseq_out" >
<assert_contents>
<has_text_matching expression="MIR6859-2\t1.1858.*\t-1.5832.*\t1.2956.*\t-1.2219.*\t0.2217.*\t0.8868.*" />
<has_text_matching expression="UGT3A2\t1.8841.*\t-0.1329.*\t0.6936.*\t-0.1917.*\t0.8479.*\t0.9999.*" />
</assert_contents>
</output>
</test>
<!--Ensure Sailfish/Salmon input with GFF3 annotation works-->
<test expect_num_outputs="1">
<repeat name="rep_factorName">
<param name="factorName" value="Treatment"/>
<repeat name="rep_factorLevel">
<param name="factorLevel" value="Treated"/>
<param name="countsFile" value="sailfish/sailfish_quant.sf1.tab,sailfish/sailfish_quant.sf2.tab,sailfish/sailfish_quant.sf3.tab"/>
</repeat>
<repeat name="rep_factorLevel">
<param name="factorLevel" value="Untreated"/>
<param name="countsFile" value="sailfish/sailfish_quant.sf4.tab,sailfish/sailfish_quant.sf5.tab,sailfish/sailfish_quant.sf6.tab"/>
</repeat>
</repeat>
<param name="pdf" value="False"/>
<param name="tximport_selector" value="tximport"/>
<param name="txtype" value="sailfish"/>
<param name="mapping_format_selector" value="gtf"/>
<param name="gtf_file" value="GRCh38_latest_genomic.gff"/>
<output name="deseq_out" >
<assert_contents>
<has_text_matching expression="UGT3A2\t1.8841.*\t-0.1329.*\t0.6936.*\t-0.1917.*\t0.8479.*\t0.9999.*" />
</assert_contents>
</output>
</test>
Expand Down
36 changes: 20 additions & 16 deletions tools/deseq2/get_deseq_dataset.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@ get_deseq_dataset <- function(sampleTable, header, designFormula, tximport, txty
}

if (!is.null(tximport)) {
if (is.null(tx2gene)) stop("A transcript-to-gene map or a GTF file is required for tximport")
if (tolower(file_ext(opt$tx2gene)) == "gtf") {
gtfFile <-tx2gene
if (is.null(tx2gene)) stop("A transcript-to-gene map or a GTF/GFF3 file is required for tximport")
if (tolower(file_ext(opt$tx2gene)) == "gff") {
gffFile <-tx2gene
} else {
gtfFile <- NULL
gffFile <- NULL
tx2gene <- read.table(tx2gene, header=FALSE)
}
useTXI <- TRUE
Expand Down Expand Up @@ -45,22 +45,26 @@ get_deseq_dataset <- function(sampleTable, header, designFormula, tximport, txty

} else {
# construct the object using tximport
# first need to make the tx2gene table
# this takes ~2-3 minutes using Bioconductor functions
if (!is.null(gtfFile)) {
suppressPackageStartupMessages({
library("GenomicFeatures")
})
txdb <- makeTxDbFromGFF(gtfFile, format="gtf")
k <- keys(txdb, keytype = "GENEID")
df <- select(txdb, keys = k, keytype = "GENEID", columns = "TXNAME")
tx2gene <- df[, 2:1] # tx ID, then gene ID
}
library("tximport")
txiFiles <- as.character(sampleTable$filename)
labs <- row.names(sampleTable)
names(txiFiles) <- labs
txi <- tximport(txiFiles, type=txtype, tx2gene=tx2gene)
if (!is.null(gffFile)) {
# first need to make the tx2gene table
# this takes ~2-3 minutes using Bioconductor functions
suppressPackageStartupMessages({
library("GenomicFeatures")
})
txdb <- makeTxDbFromGFF(gffFile)
k <- keys(txdb, keytype = "TXNAME")
tx2gene <- select(txdb, k, "GENEID", "TXNAME")
}
try(txi <- tximport(txiFiles, type=txtype, tx2gene=tx2gene))
if (!exists("txi")) {
# Remove version from transcript IDs
tx2gene$TXNAME <- sub('\\.[0-9]+', '', tx2gene$TXNAME)
txi <- tximport(txiFiles, type=txtype, tx2gene=tx2gene)
}
dds <- DESeqDataSetFromTximport(txi,
subset(sampleTable, select=-c(filename)),
designFormula)
Expand Down
Loading

0 comments on commit 5b6dc96

Please sign in to comment.