From cd48fd1d8d71348c09258f701ddd44c49f6cca4c Mon Sep 17 00:00:00 2001 From: Darryl Nousome Date: Wed, 15 Nov 2023 16:42:32 -0500 Subject: [PATCH] feat: mergevcfsandgenotype and add docker --- docker/logan_base/Dockerfile | 12 +- nextflow.config | 2 +- workflow/modules/variant_calling.nf | 121 +++++++++++++------- workflow/modules/variant_calling_tonly.nf | 55 +++++++-- workflow/modules/workflows.nf | 132 +++++++++++++--------- workflow/modules/workflows_tonly.nf | 58 +++++----- 6 files changed, 241 insertions(+), 139 deletions(-) diff --git a/docker/logan_base/Dockerfile b/docker/logan_base/Dockerfile index 3ed4fb6..55832b5 100644 --- a/docker/logan_base/Dockerfile +++ b/docker/logan_base/Dockerfile @@ -52,15 +52,9 @@ RUN wget https://github.com/broadinstitute/gatk/releases/download/4.3.0.0/gatk-4 && /opt2/gatk-4.3.0.0/gatk --list ENV PATH="/opt2/gatk-4.3.0.0:$PATH" -# Install last release of GATK3 (GATK/3.8-1) -# Only being used for the CombineVariants -# command that is not available in GATK4 -# Available via env variable: $GATK_JAR -# Requires Java8 or 1.8 -RUN wget https://storage.googleapis.com/gatk-software/package-archive/gatk/GenomeAnalysisTK-3.8-1-0-gf15c1c3ef.tar.bz2 \ - && tar -xvjf /opt2/GenomeAnalysisTK-3.8-1-0-gf15c1c3ef.tar.bz2 \ - && rm /opt2/GenomeAnalysisTK-3.8-1-0-gf15c1c3ef.tar.bz2 -ENV GATK_JAR="/opt2/GenomeAnalysisTK-3.8-1-0-gf15c1c3ef/GenomeAnalysisTK.jar" +# Use DISCVRSeq For CombineVariants Replacement +RUN wget https://github.com/BimberLab/DISCVRSeq/releases/download/1.3.61/DISCVRSeq-1.3.61.jar +ENV DISCVRSeq_JAR="/opt2/DISCVRSeq-1.3.61.jar" # Install dependencies needed to add a new repository over HTTPS RUN DEBIAN_FRONTEND=noninteractive apt-get install -y \ diff --git a/nextflow.config b/nextflow.config index 2f094e7..d868ce3 100644 --- a/nextflow.config +++ b/nextflow.config @@ -229,7 +229,7 @@ profiles { } withName: 'octopus_tn|octopus_tonly' { container = 'docker://dancooke/octopus:latest' - memory=70.GB + memory=72.GB time=24.h cpus=16 } diff --git a/workflow/modules/variant_calling.nf b/workflow/modules/variant_calling.nf index d87c33c..8354cc8 100644 --- a/workflow/modules/variant_calling.nf +++ b/workflow/modules/variant_calling.nf @@ -224,17 +224,17 @@ process mutect2filter { publishDir(path: "${outdir}/vcfs/mutect2", mode: 'copy') input: - tuple val(sample), path(mutvcfs), path(stats), path(obs), path(pileups), path(normal_pileups),path(tumorcontamination),path(normalcontamination) + tuple val(sample), path(mutvcfs), path(stats), path(obs), + path(pileups), path(normal_pileups),path(tumorcontamination),path(normalcontamination) output: - tuple val(sample), path("${sample}.mut2.marked.vcf.gz"), - path("${sample}.mut2.norm.vcf.gz"), + tuple val(sample), + path("${sample}.mut2.marked.vcf.gz"), path("${sample}.mut2.marked.vcf.gz.tbi"), + path("${sample}.mut2.norm.vcf.gz"), path("${sample}.mut2.norm.vcf.gz.tbi"), path("${sample}.mut2.marked.vcf.gz.filteringStats.tsv") script: - //Include the stats and concat ${mutvcfs} -Oz -o ${sample}.concat.vcf.gz mut2in = mutvcfs.join(" -I ") - """ gatk GatherVcfs -I ${mut2in} -O ${sample}.concat.vcf.gz gatk IndexFeatureFile -I ${sample}.concat.vcf.gz @@ -258,12 +258,13 @@ process mutect2filter { awk '{{gsub(/\\y[W|K|Y|R|S|M]\\y/,"N",\$4); OFS = "\\t"; print}}' |\ sed '/^\$/d' > ${sample}.mut2.norm.vcf |\ bcftools view - -Oz -o ${sample}.mut2.norm.vcf.gz + bcftools index -t ${sample}.mut2.norm.vcf.gz """ stub: """ - touch ${sample}.mut2.marked.vcf.gz - touch ${sample}.mut2.norm.vcf.gz + touch ${sample}.mut2.marked.vcf.gz ${sample}.mut2.marked.vcf.gz.tbi + touch ${sample}.mut2.norm.vcf.gz ${sample}.mut2.norm.vcf.gz.tbi touch ${sample}.mut2.marked.vcf.gz.filteringStats.tsv """ @@ -395,7 +396,7 @@ process octopus_tn { output: - tuple val(tumorname), + tuple val("${tumorname}_vs_${normalname}"), path("${tumorname}_vs_${normalname}_${bed.simpleName}.octopus.vcf.gz") script: @@ -506,8 +507,11 @@ process combineVariants { output: tuple val(sample), - path("${vc}/${sample}.${vc}.marked.vcf.gz"), path("${vc}/${sample}.${vc}.norm.vcf.gz") - + path("${vc}/${sample}.${vc}.marked.vcf.gz"), + path("${vc}/${sample}.${vc}.marked.vcf.gz.tbi"), + path("${vc}/${sample}.${vc}.norm.vcf.gz"), + path("${vc}/${sample}.${vc}.norm.vcf.gz.tbi") + script: vcfin = inputvcf.join(" -I ") @@ -518,13 +522,16 @@ process combineVariants { -D $GENOMEDICT \ -I $vcfin bcftools sort ${sample}.${vc}.temp.vcf.gz -Oz -o ${sample}.${vc}.marked.vcf.gz - bcftools norm ${sample}.${vc}.marked.vcf.gz --threads $task.cpus --check-ref s -f $GENOMEREF -O v |\ + bcftools norm ${sample}.${vc}.marked.vcf.gz -m- --threads $task.cpus --check-ref s -f $GENOMEREF -O v |\ awk '{{gsub(/\\y[W|K|Y|R|S|M]\\y/,"N",\$4); OFS = "\\t"; print}}' |\ sed '/^\$/d' > ${sample}.${vc}.temp.vcf bcftools view ${sample}.${vc}.temp.vcf -f PASS -Oz -o ${vc}/${sample}.${vc}.norm.vcf.gz mv ${sample}.${vc}.marked.vcf.gz ${vc} + + bcftools index ${vc}/${sample}.${vc}.marked.vcf.gz -t + bcftools index ${vc}/${sample}.${vc}.norm.vcf.gz -t """ stub: @@ -533,7 +540,8 @@ process combineVariants { mkdir ${vc} touch ${vc}/${sample}.${vc}.marked.vcf.gz touch ${vc}/${sample}.${vc}.norm.vcf.gz - + touch ${vc}/${sample}.${vc}.marked.vcf.gz.tbi + touch ${vc}/${sample}.${vc}.norm.vcf.gz.tbi """ } @@ -559,8 +567,7 @@ process bcftools_index_octopus { stub: """ - touch ${vcf} - touch ${vcf}.tbi + touch ${vcf} ${vcf}.tbi """ } @@ -574,7 +581,10 @@ process combineVariants_octopus { output: tuple val(sample), - path("${vc}/${sample}.${vc}.marked.vcf.gz"), path("${vc}/${sample}.${vc}.norm.vcf.gz") + path("${vc}/${sample}.${vc}.marked.vcf.gz"), + path("${vc}/${sample}.${vc}.marked.vcf.gz.tbi"), + path("${vc}/${sample}.${vc}.norm.vcf.gz"), + path("${vc}/${sample}.${vc}.norm.vcf.gz.tbi") script: vcfin = vcfs.join(" ") @@ -583,13 +593,16 @@ process combineVariants_octopus { mkdir ${vc} bcftools concat $vcfin -a -Oz -o ${sample}.${vc}.temp.vcf.gz bcftools sort ${sample}.${vc}.temp.vcf.gz -Oz -o ${sample}.${vc}.marked.vcf.gz - bcftools norm ${sample}.${vc}.marked.vcf.gz --threads $task.cpus --check-ref s -f $GENOMEREF -O v |\ + bcftools norm ${sample}.${vc}.marked.vcf.gz -m- --threads $task.cpus --check-ref s -f $GENOMEREF -O v |\ awk '{{gsub(/\\y[W|K|Y|R|S|M]\\y/,"N",\$4); OFS = "\\t"; print}}' |\ sed '/^\$/d' > ${sample}.${vc}.temp.vcf bcftools view ${sample}.${vc}.temp.vcf -f PASS -Oz -o ${vc}/${sample}.${vc}.norm.vcf.gz mv ${sample}.${vc}.marked.vcf.gz ${vc} + + bcftools index ${vc}/${sample}.${vc}.marked.vcf.gz -t + bcftools index ${vc}/${sample}.${vc}.norm.vcf.gz -t """ stub: @@ -598,16 +611,14 @@ process combineVariants_octopus { mkdir ${vc} touch ${vc}/${sample}.${vc}.marked.vcf.gz touch ${vc}/${sample}.${vc}.norm.vcf.gz + touch ${vc}/${sample}.${vc}.marked.vcf.gz.tbi + touch ${vc}/${sample}.${vc}.norm.vcf.gz.tbi """ } - - - - process combineVariants_strelka { //Concat all somatic snvs/indels across all files, strelka separates snv/indels label 'process_mid' @@ -617,7 +628,9 @@ process combineVariants_strelka { tuple val(sample), path(strelkasnvs), path(strelkaindels) output: - tuple val(sample), path("${sample}.strelka.vcf.gz"),path("${sample}.filtered.strelka.vcf.gz") + tuple val(sample), + path("${sample}.strelka.vcf.gz"),path("${sample}.strelka.vcf.gz.tbi"), + path("${sample}.filtered.strelka.vcf.gz"),path("${sample}.filtered.strelka.vcf.gz.tbi") script: @@ -628,29 +641,72 @@ process combineVariants_strelka { """ bcftools concat $vcfin $indelsin --threads $task.cpus -Oz -o ${sample}.temp.strelka.vcf.gz - bcftools sort ${sample}.temp.strelka.vcf.gz -Oz -o ${sample}.strelka.vcf.gz + bcftools norm ${sample}.temp.strelka.vcf.gz -m- --threads $task.cpus --check-ref s -f $GENOMEREF -O v |\ + awk '{{gsub(/\\y[W|K|Y|R|S|M]\\y/,"N",\$4); OFS = "\\t"; print}}' |\ + sed '/^\$/d' > ${sample}.temp1.strelka.vcf.gz + + bcftools sort ${sample}.temp1.strelka.vcf.gz -Oz -o ${sample}.strelka.vcf.gz bcftools view ${sample}.strelka.vcf.gz --threads $task.cpus -f PASS -Oz -o ${sample}.filtered.strelka.vcf.gz + bcftools index ${sample}.strelka.vcf.gz -t + bcftools index ${sample}.filtered.strelka.vcf.gz -t """ stub: """ - touch ${sample}.strelka.vcf.gz - touch ${sample}.filtered.strelka.vcf.gz + touch ${sample}.strelka.vcf.gz ${sample}.strelka.vcf.gz.tbi + touch ${sample}.filtered.strelka.vcf.gz ${sample}.filtered.strelka.vcf.gz.tbi """ } +process somaticcombine { + label 'process_mid' + publishDir(path: "${outdir}/vcfs/combined", mode: 'copy') + + input: + tuple val(tumorsample), val(normal), + val(callers), + path(vcfs), path(vcfindex) + + output: + tuple val(tumorsample), val(normal), + path("${tumorsample}_combined.vcf.gz"), + path("${tumorsample}_combined.vcf.gz.tbi") + + script: + vcfin1=[callers, vcfs].transpose().collect { a, b -> a + " " + b } + vcfin2="-V:" + vcfin1.join(" -V:") + println vcfin2 + + """ + java -jar DISCVRSeq-1.3.61.jar MergeVcfsAndGenotypes \ + -R $GENOMEREF \ + --genotypeMergeOption PRIORITIZE \ + --priority_list mutect2,strelka,octopus,muse,lofreq,vardict,varscan \ + --filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED + -O ${tumorsample}_combined.vcf.gz \ + $vcfin2 + """ + + stub: + + """ + touch ${tumorsample}_combined.vcf.gz + touch ${tumorsample}_combined.vcf.gz.tbi + """ + +} process annotvep_tn { publishDir(path: "${outdir}/mafs/", mode: 'copy') input: tuple val(tumorsample), val(normalsample), - val(vc), path(tumorvcf) + val(vc), path(tumorvcf),path(vcfindex) output: path("paired/${vc}/${tumorsample}.maf") @@ -739,18 +795,3 @@ process combinemafs_tn { } - -/* -process combineVariants_allcallers { - - publishDir(path: "${outdir}/vcfs/", mode: 'copy') - - input: - tuple val(sample), path(inputvcf), val(vc) - - output: - tuple val(sample), - path("${vc}/${sample}.${vc}.marked.vcf.gz"), path("${vc}/${sample}.${vc}.norm.vcf.gz") - -} -*/ \ No newline at end of file diff --git a/workflow/modules/variant_calling_tonly.nf b/workflow/modules/variant_calling_tonly.nf index 33d5009..3da5360 100644 --- a/workflow/modules/variant_calling_tonly.nf +++ b/workflow/modules/variant_calling_tonly.nf @@ -186,8 +186,9 @@ process mutect2filter_tonly { input: tuple val(sample), path(mutvcfs), path(stats), path(obs), path(pileups),path(tumorcontamination) output: - tuple val(sample), path("${sample}.tonly.mut2.marked.vcf.gz"), - path("${sample}.tonly.mut2.norm.vcf.gz"), + tuple val(sample), + path("${sample}.tonly.mut2.marked.vcf.gz"),path("${sample}.tonly.mut2.marked.vcf.gz.tbi"), + path("${sample}.tonly.mut2.norm.vcf.gz"),path("${sample}.tonly.mut2.norm.vcf.gz.tbi"), path("${sample}.tonly.mut2.marked.vcf.gz.filteringStats.tsv") script: @@ -217,13 +218,14 @@ process mutect2filter_tonly { awk '{{gsub(/\\y[W|K|Y|R|S|M]\\y/,"N",\$4); OFS = "\t"; print}}' |\ sed '/^\$/d' |\ bcftools view - -Oz -o ${sample}.tonly.mut2.norm.vcf.gz + bcftools index -t ${sample}.tonly.mut2.norm.vcf.gz """ stub: """ - touch ${sample}.tonly.mut2.marked.vcf.gz - touch ${sample}.tonly.mut2.norm.vcf.gz + touch ${sample}.tonly.mut2.marked.vcf.gz ${sample}.tonly.mut2.marked.vcf.gz.tbi + touch ${sample}.tonly.mut2.norm.vcf.gz ${sample}.tonly.mut2.norm.vcf.gz.tbi touch ${sample}.tonly.mut2.marked.vcf.gz.filteringStats.tsv """ } @@ -310,7 +312,7 @@ process octopus_tonly { output: tuple val(tumorname), - path("${tumorname}_${bed.simpleName}.octopus.vcf.gz") + path("${tumorname}_${bed.simpleName}.tonly.octopus.vcf.gz") script: @@ -318,25 +320,62 @@ process octopus_tonly { octopus -R $GENOMEREF -C cancer -I ${tumor} \ --annotations AC AD DP -t ${bed} \ $SOMATIC_FOREST \ - -o ${tumorname}_${bed.simpleName}.octopus.vcf.gz --threads $task.cpus + -o ${tumorname}_${bed.simpleName}.tonly.octopus.vcf.gz --threads $task.cpus """ stub: """ - touch ${tumorname}_${bed.simpleName}.octopus.vcf.gz + touch ${tumorname}_${bed.simpleName}.tonly.octopus.vcf.gz """ } +process somaticcombine_tonly { + label 'process_mid' + publishDir(path: "${outdir}/vcfs/combined_tonly", mode: 'copy') + + input: + tuple val(tumorsample), + val(callers), + path(vcfs), path(vcfindex) + + output: + tuple val(tumorsample), + path("${tumorsample}_combined_tonly.vcf.gz"), + path("${tumorsample}_combined_tonly.vcf.gz.tbi") + + script: + vcfin1=[callers, vcfs].transpose().collect { a, b -> a + " " + b } + vcfin2="-V:" + vcfin1.join(" -V:") + println vcfin2 + + """ + java -jar DISCVRSeq-1.3.61.jar MergeVcfsAndGenotypes \ + -R $GENOMEREF \ + --genotypeMergeOption PRIORITIZE \ + --priority_list mutect2,octopus,vardict,varscan \ + --filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED + -O ${tumorsample}_combined.vcf.gz \ + $vcfin2 + """ + + stub: + """ + touch ${tumorsample}_combined_tonly.vcf.gz ${tumorsample}_combined_tonly.vcf.gz.tbi + """ + +} + process annotvep_tonly { publishDir("${outdir}/mafs", mode: "copy") input: tuple val(tumorsample), - val(vc), path(tumorvcf) + val(vc), path(tumorvcf), + path(vcfindex) output: diff --git a/workflow/modules/workflows.nf b/workflow/modules/workflows.nf index b4b8646..59343cc 100644 --- a/workflow/modules/workflows.nf +++ b/workflow/modules/workflows.nf @@ -27,7 +27,8 @@ include {mutect2; mutect2filter; pileup_paired_t; pileup_paired_n; annotvep_tn as annotvep_tn_mut2; annotvep_tn as annotvep_tn_strelka; annotvep_tn as annotvep_tn_varscan; annotvep_tn as annotvep_tn_vardict; annotvep_tn as annotvep_tn_octopus; annotvep_tn as annotvep_tn_lofreq; annotvep_tn as annotvep_tn_muse; - combinemafs_tn} from './variant_calling.nf' + annotvep_tn as annotvep_tn_combined; + combinemafs_tn; somaticcombine} from './variant_calling.nf' include {mutect2_t_tonly; mutect2filter_tonly; varscan_tonly; vardict_tonly; octopus_tonly; @@ -36,7 +37,8 @@ include {mutect2_t_tonly; mutect2filter_tonly; mergemut2stats_tonly; annotvep_tonly as annotvep_tonly_varscan; annotvep_tonly as annotvep_tonly_vardict; annotvep_tonly as annotvep_tonly_mut2; annotvep_tonly as annotvep_tonly_octopus; - combinemafs_tonly} from './variant_calling_tonly.nf' + annotvep_tonly as annotvep_tonly_combined; + combinemafs_tonly;somaticcombine_tonly} from './variant_calling_tonly.nf' include {svaba_somatic; manta_somatic; survivor_sv; gunzip; @@ -199,8 +201,6 @@ workflow VC { .join(mergemut2stats.out) .join(learnreadorientationmodel.out) .join(contamination_paired.out) - mutect2filter(mut2tn_filter) - //Tumor Only Calling bambyinterval_t=bambyinterval.map{tumorname,tumor,tumorbai,normalname,normalbam,normalbai,bed ->tuple(tumorname,tumor,tumorbai,bed)} @@ -235,87 +235,109 @@ workflow VC { .join(learnreadorientationmodel_tonly.out) .join(contamination_tumoronly.out) - mutect2filter_tonly(mut2tonly_filter) - mutect2filter.out - .join(sample_sheet) - .map{tumor,markedvcf,finalvcf,stats,normal -> tuple(tumor,normal,"mutect2",finalvcf)} | annotvep_tn_mut2 - - mutect2filter_tonly.out - .join(sample_sheet) - .map{tumor,markedvcf,finalvcf,stats,normal -> tuple(tumor,"mutect2",finalvcf)} | annotvep_tonly_mut2 + + //Annotation) + mutect2_in=mutect2filter(mut2tn_filter) + | join(sample_sheet) + | map{tumor,markedvcf,markedindex,normvcf,normindex,stats,normal -> tuple(tumor,normal,"mutect2",normvcf,normindex)} + annotvep_tn_mut2(mutect2_in) - //Strelka + + mutect2_in_tonly=mutect2filter_tonly(mut2tonly_filter) + | join(sample_sheet) + | map{tumor,markedvcf,markedindex,normvcf,normindex, stats,normal -> tuple(tumor,"mutect2",normvcf,normindex)} + annotvep_tonly_mut2(mutect2_in_tonly) + + //Strelka TN strelka_tn(bambyinterval) strelkaout=strelka_tn.out.groupTuple() .map { samplename,vcfs,indels -> tuple( samplename, vcfs.toSorted{ it -> (it.name =~ /${samplename}_(.*?).somatic.snvs.vcf.gz/)[0][1].toInteger() }, indels.toSorted{ it -> (it.name =~ /${samplename}_(.*?).somatic.indels.vcf.gz/)[0][1].toInteger() } )} - combineVariants_strelka(strelkaout) - combineVariants_strelka.out.join(sample_sheet) - .map{tumor,markedvcf,finalvcf,normal -> tuple(tumor,normal,"strelka",finalvcf)} | annotvep_tn_strelka + strelka_in=combineVariants_strelka(strelkaout) | join(sample_sheet) + | map{tumor,markedvcf,markedindex,finalvcf,finalindex,normal -> tuple(tumor,normal,"strelka",finalvcf,finalindex)} + annotvep_tn_strelka(strelka_in) //Vardict vardict_comb=vardict_tn(bambyinterval).groupTuple().map{tumor,vcf-> tuple(tumor,vcf,"vardict")} | combineVariants_vardict - vardict_comb.join(sample_sheet) - .map{tumor,marked,normvcf,normal ->tuple(tumor,normal,"vardict",normvcf)} | annotvep_tn_vardict + vardict_in=vardict_comb.join(sample_sheet) + .map{tumor,marked,markedindex,normvcf,normindex,normal ->tuple(tumor,normal,"vardict",normvcf,normindex)} + annotvep_tn_vardict(vardict_in) //VarDict_tonly vardict_tonly_comb=bambyinterval.map{tumorname,tumorbam,tumorbai,normname,normbam,normbai,bed -> tuple(tumorname,tumorbam,tumorbai,bed)} - vardict_tonly(vardict_tonly_comb).groupTuple().map{tumor,vcf-> tuple(tumor,vcf,"vardict_tonly")} |combineVariants_vardict_tonly - combineVariants_vardict_tonly.out.join(sample_sheet) - .map{tumor,marked,normvcf,normal ->tuple(tumor,"vardict_tonly",normvcf)} | annotvep_tonly_vardict - + vardict_tonly(vardict_tonly_comb).groupTuple().map{tumor,vcf-> tuple(tumor,vcf,"vardict_tonly")} | combineVariants_vardict_tonly + + vardict_in_tonly=combineVariants_vardict_tonly.out.join(sample_sheet) + .map{tumor,marked,markedindex,normvcf,normindex,normal ->tuple(tumor,"vardict_tonly",normvcf,normindex)} + annotvep_tonly_vardict(vardict_in_tonly) + //VarScan TN - varscan_in=bambyinterval.join(contamination_paired.out) - varscan_comb=varscan_tn(varscan_in).groupTuple().map{tumor,vcf-> tuple(tumor,vcf,"varscan")} | combineVariants_varscan - varscan_comb.join(sample_sheet) - .map{tumor,marked,normvcf,normal ->tuple(tumor,normal,"varscan",normvcf)} | annotvep_tn_varscan - - //VarScan_TOnly - varscan_tonly_comb=varscan_in.map{tumor,bam,bai,normal,nbam,nbai,bed,tpile,npile,tumorc,normalc -> - tuple(tumor,bam,bai,bed,tpile,tumorc)} | varscan_tonly - varscan_tonly_comb1=varscan_tonly_comb.groupTuple().map{tumor,vcf-> tuple(tumor,vcf,"varscan_tonly")} | combineVariants_varscan_tonly + varscan_in=bambyinterval.join(contamination_paired.out) + | varscan_tn | groupTuple() |map{tumor,vcf-> tuple(tumor,vcf,"varscan")} | combineVariants_varscan + | join(sample_sheet) + | map{tumor,marked,markedindex,normvcf,normindex,normal ->tuple(tumor,normal,"varscan",normvcf,normindex)} + annotvep_tn_varscan(varscan_in) - varscan_tonly_comb1.join(sample_sheet) - .map{tumor,marked,normvcf,normal ->tuple(tumor,"varscan_tonly",normvcf)} | annotvep_tonly_varscan - + //VarScan_TOnly + varscan_in_tonly=bambyinterval.join(contamination_paired.out) + | map{tumor,bam,bai,normal,nbam,nbai,bed,tpile,npile,tumorc,normalc -> + tuple(tumor,bam,bai,bed,tpile,tumorc)} | varscan_tonly + | groupTuple() | map{tumor,vcf-> tuple(tumor,vcf,"varscan_tonly")} | combineVariants_varscan_tonly + | join(sample_sheet) + | map{tumor,marked,markedindex,normvcf,normindex,normal ->tuple(tumor,"varscan_tonly",normvcf,normindex)} + annotvep_tonly_varscan(varscan_in_tonly) + //Lofreq TN - lofreq_tn(bambyinterval).groupTuple().map{tumor,snv,dbsnv,indel,dbindel,vcf-> tuple(tumor,vcf,"lofreq")} - | combineVariants_lofreq | join(sample_sheet)| map{tumor,marked,normvcf,normal ->tuple(tumor,normal,"lofreq",normvcf)} - | annotvep_tn_lofreq + lofreq_in=lofreq_tn(bambyinterval).groupTuple().map{tumor,snv,dbsnv,indel,dbindel,vcf-> tuple(tumor,vcf,"lofreq")} + | combineVariants_lofreq | join(sample_sheet) + | map{tumor,marked,markedindex,normvcf,normindex,normal->tuple(tumor,normal,"lofreq",normvcf,normindex)} + annotvep_tn_lofreq(lofreq_in) //MuSE TN - muse_tn(bamwithsample).groupTuple().map{tumor,vcf-> tuple(tumor,vcf,"muse")} - | combineVariants_muse | join(sample_sheet)| map{tumor,marked,normvcf,normal ->tuple(tumor,normal,"muse",normvcf)} - | annotvep_tn_muse + muse_in=muse_tn(bamwithsample).groupTuple().map{tumor,vcf-> tuple(tumor,vcf,"muse")} + | combineVariants_muse | join(sample_sheet) + | map{tumor,marked,markedindex,normvcf,normindex,normal ->tuple(tumor,normal,"muse",normvcf,normindex)} + annotvep_tn_muse(muse_in) //Octopus_TN - octopus_annotin=octopus_tn(bambyinterval) | bcftools_index_octopus - | groupTuple() |map{tumor,vcf,vcfindex-> tuple(tumor,vcf,vcfindex,"octopus")} - | combineVariants_octopus | join(sample_sheet)|map{tumor,marked,normvcf,normal ->tuple(tumor,normal,"octopus",normvcf)} - annotvep_tn_octopus(octopus_annotin) + octopus_in=octopus_tn(bambyinterval) | bcftools_index_octopus + | groupTuple() | map{samplename,vcf,vcfindex-> tuple(samplename,vcf.toSorted{it->(it.name =~ /${samplename}_(.*).octopus.vcf.gz/)[0][1].toInteger()},vcfindex,"octopus")} + | combineVariants_octopus | map{samplename,marked,markedindex,normvcf,normindex -> + tuple(samplename.split('_vs_')[0],samplename.split('_vs_')[1],"octopus",normvcf,normindex)} + annotvep_tn_octopus(octopus_in) - //Octopus_TOnly - octopus_tonly_out=bambyinterval.map{tumor,bam,bai,normal,nbam,nbai,bed-> + octopus_in_tonly=bambyinterval.map{tumor,bam,bai,normal,nbam,nbai,bed-> tuple(tumor,bam,bai,bed)} | octopus_tonly | bcftools_index_octopus_tonly - octopus_tonly_comb=octopus_tonly_out.groupTuple().map{tumor,vcf,vcfindex-> tuple(tumor,vcf,vcfindex,"octopus_tonly")} - | combineVariants_octopus_tonly - - octopus_tonly_comb.join(sample_sheet) | - map{tumor,marked,normvcf,normal ->tuple(tumor,"octopus_tonly",normvcf)} | annotvep_tonly_octopus + | groupTuple() + | map{samplename,vcf,vcfindex->tuple(samplename,vcf.toSorted{it->(it.name =~ /${samplename}_(.*).tonly.octopus.vcf.gz/)[0][1].toInteger()},vcfindex,"octopus_tonly")} + | combineVariants_octopus_tonly + | join(sample_sheet) | + map{tumor,marked,markedindex,normvcf,normindex,normal ->tuple(tumor,"octopus_tonly",normvcf,normindex)} + annotvep_tonly_octopus(octopus_in_tonly) //Combine All Variants Using VCF and Then Reannotate - //annotvep_tn_mut2.out.concat(annotvep_tn_strelka.out).concat(annotvep_tn_vardict.out).concat(annotvep_tn_varscan.out) | combinemafs_tn - //annotvep_tonly_mut2.out.concat(annotvep_tonly_vardict.out).concat(annotvep_tonly_varscan.out) | combinemafs_tonly + mutect2_in|concat(strelka_in)|concat(octopus_in)|concat(muse_in)|concat(lofreq_in) + | concat(vardict_in) |concat(varscan_in)|groupTuple() + | somaticcombine + | map{tumor,normal,vcf,index ->tuple(tumor,normal,"combined",vcf,index)} + | annotvep_tn_combined + + mutect2_in_tonly|concat(octopus_in_tonly) + | concat(vardict_in_tonly)|concat(varscan_in_tonly) + | somaticcombine_tonly + | map{tumor,vcf,index ->tuple(tumor,"combined_tonly",vcf,index)} + | annotvep_tonly_combined + //Implement PCGR Annotator/CivIC Next emit: - somaticcall_input=octopus_annotin - + somaticcall_input=octopus_in + } diff --git a/workflow/modules/workflows_tonly.nf b/workflow/modules/workflows_tonly.nf index 5143cbe..af05fe1 100644 --- a/workflow/modules/workflows_tonly.nf +++ b/workflow/modules/workflows_tonly.nf @@ -32,7 +32,8 @@ include {mutect2_t_tonly; mutect2filter_tonly; pileup_paired_tonly; mergemut2stats_tonly; annotvep_tonly as annotvep_tonly_varscan; annotvep_tonly as annotvep_tonly_vardict; annotvep_tonly as annotvep_tonly_mut2; annotvep_tonly as annotvep_tonly_octopus; - combinemafs_tonly} from './variant_calling_tonly.nf' + annotvep_tonly as annotvep_tonly_combined; + combinemafs_tonly; somaticcombine_tonly} from './variant_calling_tonly.nf' include {manta_tonly; svaba_tonly; survivor_sv; gunzip; annotsv_tonly as annotsv_manta_tonly; annotsv_tonly as annotsv_svaba_tonly; @@ -167,36 +168,41 @@ workflow VC_TONLY { .join(learnreadorientationmodel_tonly.out) .join(contamination_tumoronly.out) - mutect2filter_tonly(mut2tonly_filter) - - //Annotate - mutect2filter_tonly.out - .join(sample_sheet) - .map{tumor,markedvcf,finalvcf,stats -> tuple(tumor,"mutect2",finalvcf)} | annotvep_tonly_mut2 + mutect2_tonly_in=mutect2filter_tonly(mut2tonly_filter) + | join(sample_sheet) + | map{tumor,markedvcf,markedindex,finalvcf,finalindex,stats -> tuple(tumor,"mutect2",finalvcf,finalindex)} + annotvep_tonly_mut2(mutect2_tonly_in) - //VarDict_tonly - vardict_tonly(bambyinterval).groupTuple().map{tumor,vcf-> tuple(tumor,vcf,"vardict_tonly")} | combineVariants_vardict_tonly - combineVariants_vardict_tonly.out.join(sample_sheet) - .map{tumor,marked,normvcf ->tuple(tumor,"vardict_tonly",normvcf)} | annotvep_tonly_vardict + //VarDict + vardict_in_tonly=vardict_tonly(bambyinterval) | groupTuple()| map{tumor,vcf -> tuple(tumor,vcf,"vardict_tonly")} + | combineVariants_vardict_tonly + | join(sample_sheet) + | map{tumor,marked,markedindex,normvcf,normindex ->tuple(tumor,"vardict_tonly",normvcf,normindex)} + annotvep_tonly_vardict(vardict_in_tonly) //VarScan_tonly - varscan_in=bambyinterval.join(contamination_tumoronly.out) - varscan_tonly_comb=varscan_tonly(varscan_in).groupTuple().map{tumor,vcf-> tuple(tumor,vcf,"varscan")} | combineVariants_varscan_tonly - - varscan_tonly_comb.join(sample_sheet) - .map{tumor,marked,normvcf ->tuple(tumor,"varscan_tonly",normvcf)} | annotvep_tonly_varscan + varscan_in_tonly=bambyinterval.join(contamination_tumoronly.out) + | varscan_tonly | groupTuple() | map{tumor,vcf-> tuple(tumor,vcf,"varscan")} + | combineVariants_varscan_tonly + | join(sample_sheet) + | map{tumor,marked,markedindex,normvcf,normindex ->tuple(tumor,"varscan_tonly",normvcf,normindex)} + annotvep_tonly_varscan(varscan_in_tonly) //Octopus_tonly - octopus_tonly_comb=bambyinterval.map{tumor,bam,bai,bed-> - tuple(tumor,bam,bai,bed)} | octopus_tonly | bcftools_index_octopus - octopus_tonly_comb1=octopus_tonly_comb.groupTuple().map{tumor,vcf,vcfindex-> tuple(tumor,vcf,vcfindex, "octopus_tonly")} | combineVariants_octopus - - octopus_tonly_comb1.join(sample_sheet) - .map{tumor,marked,normvcf ->tuple(tumor,"octopus_tonly",normvcf)} | annotvep_tonly_octopus - - - //Combine All Final - //annotvep_tonly_mut2.out.concat(annotvep_tonly_vardict.out).concat(annotvep_tonly_varscan.out) | combinemafs_tonly + octopus_in_tonly=bambyinterval | octopus_tonly | bcftools_index_octopus + | groupTuple() + | map{tumor,vcf,vcfindex -> tuple(tumor,vcf.toSorted{it -> it.name} + ,vcfindex, "octopus_tonly")} + | combineVariants_octopus | join(sample_sheet) + | map{tumor,marked,markedindex,normvcf,normindex ->tuple(tumor,"octopus_tonly",normvcf,normindex)} + annotvep_tonly_octopus(octopus_in_tonly) + + + mutect2_tonly_in|concat(octopus_in_tonly) + | concat(vardict_in_tonly)|concat(varscan_in_tonly) + | somaticcombine_tonly + | map{tumor,vcf,index ->tuple(tumor,"combined_tonly",vcf,index)} + | annotvep_tonly_combined emit: