Skip to content

Commit

Permalink
feat: mergevcfsandgenotype and add docker
Browse files Browse the repository at this point in the history
  • Loading branch information
dnousome committed Nov 15, 2023
1 parent a26c05a commit cd48fd1
Show file tree
Hide file tree
Showing 6 changed files with 241 additions and 139 deletions.
12 changes: 3 additions & 9 deletions docker/logan_base/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -52,15 +52,9 @@ RUN wget https://github.com/broadinstitute/gatk/releases/download/4.3.0.0/gatk-4
&& /opt2/gatk-4.3.0.0/gatk --list
ENV PATH="/opt2/gatk-4.3.0.0:$PATH"

# Install last release of GATK3 (GATK/3.8-1)
# Only being used for the CombineVariants
# command that is not available in GATK4
# Available via env variable: $GATK_JAR
# Requires Java8 or 1.8
RUN wget https://storage.googleapis.com/gatk-software/package-archive/gatk/GenomeAnalysisTK-3.8-1-0-gf15c1c3ef.tar.bz2 \
&& tar -xvjf /opt2/GenomeAnalysisTK-3.8-1-0-gf15c1c3ef.tar.bz2 \
&& rm /opt2/GenomeAnalysisTK-3.8-1-0-gf15c1c3ef.tar.bz2
ENV GATK_JAR="/opt2/GenomeAnalysisTK-3.8-1-0-gf15c1c3ef/GenomeAnalysisTK.jar"
# Use DISCVRSeq For CombineVariants Replacement
RUN wget https://github.com/BimberLab/DISCVRSeq/releases/download/1.3.61/DISCVRSeq-1.3.61.jar
ENV DISCVRSeq_JAR="/opt2/DISCVRSeq-1.3.61.jar"

# Install dependencies needed to add a new repository over HTTPS
RUN DEBIAN_FRONTEND=noninteractive apt-get install -y \
Expand Down
2 changes: 1 addition & 1 deletion nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ profiles {
}
withName: 'octopus_tn|octopus_tonly' {
container = 'docker://dancooke/octopus:latest'
memory=70.GB
memory=72.GB
time=24.h
cpus=16
}
Expand Down
121 changes: 81 additions & 40 deletions workflow/modules/variant_calling.nf
Original file line number Diff line number Diff line change
Expand Up @@ -224,17 +224,17 @@ process mutect2filter {
publishDir(path: "${outdir}/vcfs/mutect2", mode: 'copy')

input:
tuple val(sample), path(mutvcfs), path(stats), path(obs), path(pileups), path(normal_pileups),path(tumorcontamination),path(normalcontamination)
tuple val(sample), path(mutvcfs), path(stats), path(obs),
path(pileups), path(normal_pileups),path(tumorcontamination),path(normalcontamination)
output:
tuple val(sample), path("${sample}.mut2.marked.vcf.gz"),
path("${sample}.mut2.norm.vcf.gz"),
tuple val(sample),
path("${sample}.mut2.marked.vcf.gz"), path("${sample}.mut2.marked.vcf.gz.tbi"),
path("${sample}.mut2.norm.vcf.gz"), path("${sample}.mut2.norm.vcf.gz.tbi"),
path("${sample}.mut2.marked.vcf.gz.filteringStats.tsv")

script:
//Include the stats and concat ${mutvcfs} -Oz -o ${sample}.concat.vcf.gz
mut2in = mutvcfs.join(" -I ")


"""
gatk GatherVcfs -I ${mut2in} -O ${sample}.concat.vcf.gz
gatk IndexFeatureFile -I ${sample}.concat.vcf.gz
Expand All @@ -258,12 +258,13 @@ process mutect2filter {
awk '{{gsub(/\\y[W|K|Y|R|S|M]\\y/,"N",\$4); OFS = "\\t"; print}}' |\
sed '/^\$/d' > ${sample}.mut2.norm.vcf |\
bcftools view - -Oz -o ${sample}.mut2.norm.vcf.gz
bcftools index -t ${sample}.mut2.norm.vcf.gz
"""

stub:
"""
touch ${sample}.mut2.marked.vcf.gz
touch ${sample}.mut2.norm.vcf.gz
touch ${sample}.mut2.marked.vcf.gz ${sample}.mut2.marked.vcf.gz.tbi
touch ${sample}.mut2.norm.vcf.gz ${sample}.mut2.norm.vcf.gz.tbi
touch ${sample}.mut2.marked.vcf.gz.filteringStats.tsv
"""

Expand Down Expand Up @@ -395,7 +396,7 @@ process octopus_tn {


output:
tuple val(tumorname),
tuple val("${tumorname}_vs_${normalname}"),
path("${tumorname}_vs_${normalname}_${bed.simpleName}.octopus.vcf.gz")

script:
Expand Down Expand Up @@ -506,8 +507,11 @@ process combineVariants {

output:
tuple val(sample),
path("${vc}/${sample}.${vc}.marked.vcf.gz"), path("${vc}/${sample}.${vc}.norm.vcf.gz")

path("${vc}/${sample}.${vc}.marked.vcf.gz"),
path("${vc}/${sample}.${vc}.marked.vcf.gz.tbi"),
path("${vc}/${sample}.${vc}.norm.vcf.gz"),
path("${vc}/${sample}.${vc}.norm.vcf.gz.tbi")

script:
vcfin = inputvcf.join(" -I ")

Expand All @@ -518,13 +522,16 @@ process combineVariants {
-D $GENOMEDICT \
-I $vcfin
bcftools sort ${sample}.${vc}.temp.vcf.gz -Oz -o ${sample}.${vc}.marked.vcf.gz
bcftools norm ${sample}.${vc}.marked.vcf.gz --threads $task.cpus --check-ref s -f $GENOMEREF -O v |\
bcftools norm ${sample}.${vc}.marked.vcf.gz -m- --threads $task.cpus --check-ref s -f $GENOMEREF -O v |\
awk '{{gsub(/\\y[W|K|Y|R|S|M]\\y/,"N",\$4); OFS = "\\t"; print}}' |\
sed '/^\$/d' > ${sample}.${vc}.temp.vcf
bcftools view ${sample}.${vc}.temp.vcf -f PASS -Oz -o ${vc}/${sample}.${vc}.norm.vcf.gz
mv ${sample}.${vc}.marked.vcf.gz ${vc}
bcftools index ${vc}/${sample}.${vc}.marked.vcf.gz -t
bcftools index ${vc}/${sample}.${vc}.norm.vcf.gz -t
"""

stub:
Expand All @@ -533,7 +540,8 @@ process combineVariants {
mkdir ${vc}
touch ${vc}/${sample}.${vc}.marked.vcf.gz
touch ${vc}/${sample}.${vc}.norm.vcf.gz
touch ${vc}/${sample}.${vc}.marked.vcf.gz.tbi
touch ${vc}/${sample}.${vc}.norm.vcf.gz.tbi
"""

}
Expand All @@ -559,8 +567,7 @@ process bcftools_index_octopus {

stub:
"""
touch ${vcf}
touch ${vcf}.tbi
touch ${vcf} ${vcf}.tbi
"""

}
Expand All @@ -574,7 +581,10 @@ process combineVariants_octopus {

output:
tuple val(sample),
path("${vc}/${sample}.${vc}.marked.vcf.gz"), path("${vc}/${sample}.${vc}.norm.vcf.gz")
path("${vc}/${sample}.${vc}.marked.vcf.gz"),
path("${vc}/${sample}.${vc}.marked.vcf.gz.tbi"),
path("${vc}/${sample}.${vc}.norm.vcf.gz"),
path("${vc}/${sample}.${vc}.norm.vcf.gz.tbi")

script:
vcfin = vcfs.join(" ")
Expand All @@ -583,13 +593,16 @@ process combineVariants_octopus {
mkdir ${vc}
bcftools concat $vcfin -a -Oz -o ${sample}.${vc}.temp.vcf.gz
bcftools sort ${sample}.${vc}.temp.vcf.gz -Oz -o ${sample}.${vc}.marked.vcf.gz
bcftools norm ${sample}.${vc}.marked.vcf.gz --threads $task.cpus --check-ref s -f $GENOMEREF -O v |\
bcftools norm ${sample}.${vc}.marked.vcf.gz -m- --threads $task.cpus --check-ref s -f $GENOMEREF -O v |\
awk '{{gsub(/\\y[W|K|Y|R|S|M]\\y/,"N",\$4); OFS = "\\t"; print}}' |\
sed '/^\$/d' > ${sample}.${vc}.temp.vcf
bcftools view ${sample}.${vc}.temp.vcf -f PASS -Oz -o ${vc}/${sample}.${vc}.norm.vcf.gz
mv ${sample}.${vc}.marked.vcf.gz ${vc}
bcftools index ${vc}/${sample}.${vc}.marked.vcf.gz -t
bcftools index ${vc}/${sample}.${vc}.norm.vcf.gz -t
"""

stub:
Expand All @@ -598,16 +611,14 @@ process combineVariants_octopus {
mkdir ${vc}
touch ${vc}/${sample}.${vc}.marked.vcf.gz
touch ${vc}/${sample}.${vc}.norm.vcf.gz
touch ${vc}/${sample}.${vc}.marked.vcf.gz.tbi
touch ${vc}/${sample}.${vc}.norm.vcf.gz.tbi
"""

}






process combineVariants_strelka {
//Concat all somatic snvs/indels across all files, strelka separates snv/indels
label 'process_mid'
Expand All @@ -617,7 +628,9 @@ process combineVariants_strelka {
tuple val(sample), path(strelkasnvs), path(strelkaindels)

output:
tuple val(sample), path("${sample}.strelka.vcf.gz"),path("${sample}.filtered.strelka.vcf.gz")
tuple val(sample),
path("${sample}.strelka.vcf.gz"),path("${sample}.strelka.vcf.gz.tbi"),
path("${sample}.filtered.strelka.vcf.gz"),path("${sample}.filtered.strelka.vcf.gz.tbi")


script:
Expand All @@ -628,29 +641,72 @@ process combineVariants_strelka {

"""
bcftools concat $vcfin $indelsin --threads $task.cpus -Oz -o ${sample}.temp.strelka.vcf.gz
bcftools sort ${sample}.temp.strelka.vcf.gz -Oz -o ${sample}.strelka.vcf.gz
bcftools norm ${sample}.temp.strelka.vcf.gz -m- --threads $task.cpus --check-ref s -f $GENOMEREF -O v |\
awk '{{gsub(/\\y[W|K|Y|R|S|M]\\y/,"N",\$4); OFS = "\\t"; print}}' |\
sed '/^\$/d' > ${sample}.temp1.strelka.vcf.gz
bcftools sort ${sample}.temp1.strelka.vcf.gz -Oz -o ${sample}.strelka.vcf.gz
bcftools view ${sample}.strelka.vcf.gz --threads $task.cpus -f PASS -Oz -o ${sample}.filtered.strelka.vcf.gz
bcftools index ${sample}.strelka.vcf.gz -t
bcftools index ${sample}.filtered.strelka.vcf.gz -t
"""

stub:

"""
touch ${sample}.strelka.vcf.gz
touch ${sample}.filtered.strelka.vcf.gz
touch ${sample}.strelka.vcf.gz ${sample}.strelka.vcf.gz.tbi
touch ${sample}.filtered.strelka.vcf.gz ${sample}.filtered.strelka.vcf.gz.tbi
"""

}

process somaticcombine {
label 'process_mid'
publishDir(path: "${outdir}/vcfs/combined", mode: 'copy')

input:
tuple val(tumorsample), val(normal),
val(callers),
path(vcfs), path(vcfindex)

output:
tuple val(tumorsample), val(normal),
path("${tumorsample}_combined.vcf.gz"),
path("${tumorsample}_combined.vcf.gz.tbi")

script:
vcfin1=[callers, vcfs].transpose().collect { a, b -> a + " " + b }
vcfin2="-V:" + vcfin1.join(" -V:")
println vcfin2

"""
java -jar DISCVRSeq-1.3.61.jar MergeVcfsAndGenotypes \
-R $GENOMEREF \
--genotypeMergeOption PRIORITIZE \
--priority_list mutect2,strelka,octopus,muse,lofreq,vardict,varscan \
--filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED
-O ${tumorsample}_combined.vcf.gz \
$vcfin2
"""

stub:

"""
touch ${tumorsample}_combined.vcf.gz
touch ${tumorsample}_combined.vcf.gz.tbi
"""

}

process annotvep_tn {
publishDir(path: "${outdir}/mafs/", mode: 'copy')

input:
tuple val(tumorsample), val(normalsample),
val(vc), path(tumorvcf)
val(vc), path(tumorvcf),path(vcfindex)

output:
path("paired/${vc}/${tumorsample}.maf")
Expand Down Expand Up @@ -739,18 +795,3 @@ process combinemafs_tn {
}



/*
process combineVariants_allcallers {
publishDir(path: "${outdir}/vcfs/", mode: 'copy')
input:
tuple val(sample), path(inputvcf), val(vc)
output:
tuple val(sample),
path("${vc}/${sample}.${vc}.marked.vcf.gz"), path("${vc}/${sample}.${vc}.norm.vcf.gz")
}
*/
55 changes: 47 additions & 8 deletions workflow/modules/variant_calling_tonly.nf
Original file line number Diff line number Diff line change
Expand Up @@ -186,8 +186,9 @@ process mutect2filter_tonly {
input:
tuple val(sample), path(mutvcfs), path(stats), path(obs), path(pileups),path(tumorcontamination)
output:
tuple val(sample), path("${sample}.tonly.mut2.marked.vcf.gz"),
path("${sample}.tonly.mut2.norm.vcf.gz"),
tuple val(sample),
path("${sample}.tonly.mut2.marked.vcf.gz"),path("${sample}.tonly.mut2.marked.vcf.gz.tbi"),
path("${sample}.tonly.mut2.norm.vcf.gz"),path("${sample}.tonly.mut2.norm.vcf.gz.tbi"),
path("${sample}.tonly.mut2.marked.vcf.gz.filteringStats.tsv")

script:
Expand Down Expand Up @@ -217,13 +218,14 @@ process mutect2filter_tonly {
awk '{{gsub(/\\y[W|K|Y|R|S|M]\\y/,"N",\$4); OFS = "\t"; print}}' |\
sed '/^\$/d' |\
bcftools view - -Oz -o ${sample}.tonly.mut2.norm.vcf.gz
bcftools index -t ${sample}.tonly.mut2.norm.vcf.gz
"""

stub:
"""
touch ${sample}.tonly.mut2.marked.vcf.gz
touch ${sample}.tonly.mut2.norm.vcf.gz
touch ${sample}.tonly.mut2.marked.vcf.gz ${sample}.tonly.mut2.marked.vcf.gz.tbi
touch ${sample}.tonly.mut2.norm.vcf.gz ${sample}.tonly.mut2.norm.vcf.gz.tbi
touch ${sample}.tonly.mut2.marked.vcf.gz.filteringStats.tsv
"""
}
Expand Down Expand Up @@ -310,33 +312,70 @@ process octopus_tonly {

output:
tuple val(tumorname),
path("${tumorname}_${bed.simpleName}.octopus.vcf.gz")
path("${tumorname}_${bed.simpleName}.tonly.octopus.vcf.gz")

script:

"""
octopus -R $GENOMEREF -C cancer -I ${tumor} \
--annotations AC AD DP -t ${bed} \
$SOMATIC_FOREST \
-o ${tumorname}_${bed.simpleName}.octopus.vcf.gz --threads $task.cpus
-o ${tumorname}_${bed.simpleName}.tonly.octopus.vcf.gz --threads $task.cpus
"""

stub:

"""
touch ${tumorname}_${bed.simpleName}.octopus.vcf.gz
touch ${tumorname}_${bed.simpleName}.tonly.octopus.vcf.gz
"""
}


process somaticcombine_tonly {
label 'process_mid'
publishDir(path: "${outdir}/vcfs/combined_tonly", mode: 'copy')

input:
tuple val(tumorsample),
val(callers),
path(vcfs), path(vcfindex)

output:
tuple val(tumorsample),
path("${tumorsample}_combined_tonly.vcf.gz"),
path("${tumorsample}_combined_tonly.vcf.gz.tbi")

script:
vcfin1=[callers, vcfs].transpose().collect { a, b -> a + " " + b }
vcfin2="-V:" + vcfin1.join(" -V:")
println vcfin2

"""
java -jar DISCVRSeq-1.3.61.jar MergeVcfsAndGenotypes \
-R $GENOMEREF \
--genotypeMergeOption PRIORITIZE \
--priority_list mutect2,octopus,vardict,varscan \
--filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED
-O ${tumorsample}_combined.vcf.gz \
$vcfin2
"""

stub:
"""
touch ${tumorsample}_combined_tonly.vcf.gz ${tumorsample}_combined_tonly.vcf.gz.tbi
"""

}

process annotvep_tonly {
publishDir("${outdir}/mafs", mode: "copy")

input:
tuple val(tumorsample),
val(vc), path(tumorvcf)
val(vc), path(tumorvcf),
path(vcfindex)


output:
Expand Down
Loading

0 comments on commit cd48fd1

Please sign in to comment.