From cd48fd1d8d71348c09258f701ddd44c49f6cca4c Mon Sep 17 00:00:00 2001
From: Darryl Nousome <dnousome@gmail.com>
Date: Wed, 15 Nov 2023 16:42:32 -0500
Subject: [PATCH] feat: mergevcfsandgenotype and add docker

---
 docker/logan_base/Dockerfile              |  12 +-
 nextflow.config                           |   2 +-
 workflow/modules/variant_calling.nf       | 121 +++++++++++++-------
 workflow/modules/variant_calling_tonly.nf |  55 +++++++--
 workflow/modules/workflows.nf             | 132 +++++++++++++---------
 workflow/modules/workflows_tonly.nf       |  58 +++++-----
 6 files changed, 241 insertions(+), 139 deletions(-)

diff --git a/docker/logan_base/Dockerfile b/docker/logan_base/Dockerfile
index 3ed4fb6..55832b5 100644
--- a/docker/logan_base/Dockerfile
+++ b/docker/logan_base/Dockerfile
@@ -52,15 +52,9 @@ RUN wget https://github.com/broadinstitute/gatk/releases/download/4.3.0.0/gatk-4
     && /opt2/gatk-4.3.0.0/gatk --list
 ENV PATH="/opt2/gatk-4.3.0.0:$PATH"
 
-# Install last release of GATK3 (GATK/3.8-1)
-# Only being used for the CombineVariants
-# command that is not available in GATK4
-# Available via env variable: $GATK_JAR
-# Requires Java8 or 1.8
-RUN wget https://storage.googleapis.com/gatk-software/package-archive/gatk/GenomeAnalysisTK-3.8-1-0-gf15c1c3ef.tar.bz2 \
-    && tar -xvjf /opt2/GenomeAnalysisTK-3.8-1-0-gf15c1c3ef.tar.bz2 \
-    && rm /opt2/GenomeAnalysisTK-3.8-1-0-gf15c1c3ef.tar.bz2
-ENV GATK_JAR="/opt2/GenomeAnalysisTK-3.8-1-0-gf15c1c3ef/GenomeAnalysisTK.jar"
+# Use DISCVRSeq For CombineVariants Replacement
+RUN wget https://github.com/BimberLab/DISCVRSeq/releases/download/1.3.61/DISCVRSeq-1.3.61.jar 
+ENV DISCVRSeq_JAR="/opt2/DISCVRSeq-1.3.61.jar"
 
 # Install dependencies needed to add a new repository over HTTPS
 RUN DEBIAN_FRONTEND=noninteractive apt-get install -y \
diff --git a/nextflow.config b/nextflow.config
index 2f094e7..d868ce3 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -229,7 +229,7 @@ profiles {
                 }
             withName: 'octopus_tn|octopus_tonly' {
                 container = 'docker://dancooke/octopus:latest'
-                memory=70.GB
+                memory=72.GB
                 time=24.h
                 cpus=16
             }
diff --git a/workflow/modules/variant_calling.nf b/workflow/modules/variant_calling.nf
index d87c33c..8354cc8 100644
--- a/workflow/modules/variant_calling.nf
+++ b/workflow/modules/variant_calling.nf
@@ -224,17 +224,17 @@ process mutect2filter {
     publishDir(path: "${outdir}/vcfs/mutect2", mode: 'copy')
 
     input:
-        tuple val(sample), path(mutvcfs), path(stats), path(obs), path(pileups), path(normal_pileups),path(tumorcontamination),path(normalcontamination)
+        tuple val(sample), path(mutvcfs), path(stats), path(obs), 
+        path(pileups), path(normal_pileups),path(tumorcontamination),path(normalcontamination)
     output:
-        tuple val(sample), path("${sample}.mut2.marked.vcf.gz"), 
-        path("${sample}.mut2.norm.vcf.gz"), 
+        tuple val(sample), 
+        path("${sample}.mut2.marked.vcf.gz"), path("${sample}.mut2.marked.vcf.gz.tbi"),
+        path("${sample}.mut2.norm.vcf.gz"), path("${sample}.mut2.norm.vcf.gz.tbi"), 
         path("${sample}.mut2.marked.vcf.gz.filteringStats.tsv")
 
     script:
-    //Include the stats and  concat ${mutvcfs} -Oz -o ${sample}.concat.vcf.gz
     mut2in = mutvcfs.join(" -I ")
 
-
     """
     gatk GatherVcfs -I ${mut2in} -O ${sample}.concat.vcf.gz 
     gatk IndexFeatureFile -I ${sample}.concat.vcf.gz 
@@ -258,12 +258,13 @@ process mutect2filter {
         awk '{{gsub(/\\y[W|K|Y|R|S|M]\\y/,"N",\$4); OFS = "\\t"; print}}' |\
         sed '/^\$/d' > ${sample}.mut2.norm.vcf |\
     bcftools view - -Oz -o  ${sample}.mut2.norm.vcf.gz
+    bcftools index -t ${sample}.mut2.norm.vcf.gz
     """
 
     stub:
     """
-    touch ${sample}.mut2.marked.vcf.gz
-    touch ${sample}.mut2.norm.vcf.gz
+    touch ${sample}.mut2.marked.vcf.gz ${sample}.mut2.marked.vcf.gz.tbi
+    touch ${sample}.mut2.norm.vcf.gz ${sample}.mut2.norm.vcf.gz.tbi
     touch ${sample}.mut2.marked.vcf.gz.filteringStats.tsv
     """
 
@@ -395,7 +396,7 @@ process octopus_tn {
     
 
     output:
-        tuple val(tumorname),
+        tuple val("${tumorname}_vs_${normalname}"), 
         path("${tumorname}_vs_${normalname}_${bed.simpleName}.octopus.vcf.gz")
     
     script:
@@ -506,8 +507,11 @@ process combineVariants {
     
     output:
         tuple val(sample), 
-        path("${vc}/${sample}.${vc}.marked.vcf.gz"), path("${vc}/${sample}.${vc}.norm.vcf.gz")
-    
+        path("${vc}/${sample}.${vc}.marked.vcf.gz"), 
+        path("${vc}/${sample}.${vc}.marked.vcf.gz.tbi"), 
+        path("${vc}/${sample}.${vc}.norm.vcf.gz"),
+        path("${vc}/${sample}.${vc}.norm.vcf.gz.tbi")
+
     script:
     vcfin = inputvcf.join(" -I ")
     
@@ -518,13 +522,16 @@ process combineVariants {
         -D $GENOMEDICT \
         -I $vcfin
     bcftools sort ${sample}.${vc}.temp.vcf.gz -Oz -o ${sample}.${vc}.marked.vcf.gz
-    bcftools norm ${sample}.${vc}.marked.vcf.gz --threads $task.cpus --check-ref s -f $GENOMEREF -O v |\
+    bcftools norm ${sample}.${vc}.marked.vcf.gz -m- --threads $task.cpus --check-ref s -f $GENOMEREF -O v |\
         awk '{{gsub(/\\y[W|K|Y|R|S|M]\\y/,"N",\$4); OFS = "\\t"; print}}' |\
         sed '/^\$/d' > ${sample}.${vc}.temp.vcf
 
     bcftools view ${sample}.${vc}.temp.vcf -f PASS -Oz -o ${vc}/${sample}.${vc}.norm.vcf.gz
 
     mv ${sample}.${vc}.marked.vcf.gz ${vc}
+    
+    bcftools index ${vc}/${sample}.${vc}.marked.vcf.gz -t
+    bcftools index ${vc}/${sample}.${vc}.norm.vcf.gz -t
     """
 
     stub:
@@ -533,7 +540,8 @@ process combineVariants {
     mkdir ${vc}
     touch ${vc}/${sample}.${vc}.marked.vcf.gz
     touch ${vc}/${sample}.${vc}.norm.vcf.gz
-    
+    touch ${vc}/${sample}.${vc}.marked.vcf.gz.tbi
+    touch ${vc}/${sample}.${vc}.norm.vcf.gz.tbi
     """
 
 }
@@ -559,8 +567,7 @@ process bcftools_index_octopus {
 
     stub:
     """
-    touch ${vcf}
-    touch ${vcf}.tbi
+    touch ${vcf} ${vcf}.tbi
     """
 
 }
@@ -574,7 +581,10 @@ process combineVariants_octopus {
     
     output:
         tuple val(sample), 
-        path("${vc}/${sample}.${vc}.marked.vcf.gz"), path("${vc}/${sample}.${vc}.norm.vcf.gz")
+        path("${vc}/${sample}.${vc}.marked.vcf.gz"), 
+        path("${vc}/${sample}.${vc}.marked.vcf.gz.tbi"), 
+        path("${vc}/${sample}.${vc}.norm.vcf.gz"),
+        path("${vc}/${sample}.${vc}.norm.vcf.gz.tbi")
     
     script:
     vcfin = vcfs.join(" ")
@@ -583,13 +593,16 @@ process combineVariants_octopus {
     mkdir ${vc}
     bcftools concat $vcfin -a -Oz -o ${sample}.${vc}.temp.vcf.gz
     bcftools sort ${sample}.${vc}.temp.vcf.gz -Oz -o ${sample}.${vc}.marked.vcf.gz
-    bcftools norm ${sample}.${vc}.marked.vcf.gz --threads $task.cpus --check-ref s -f $GENOMEREF -O v |\
+    bcftools norm ${sample}.${vc}.marked.vcf.gz -m- --threads $task.cpus --check-ref s -f $GENOMEREF -O v |\
         awk '{{gsub(/\\y[W|K|Y|R|S|M]\\y/,"N",\$4); OFS = "\\t"; print}}' |\
         sed '/^\$/d' > ${sample}.${vc}.temp.vcf
 
     bcftools view ${sample}.${vc}.temp.vcf -f PASS -Oz -o ${vc}/${sample}.${vc}.norm.vcf.gz
 
     mv ${sample}.${vc}.marked.vcf.gz ${vc}
+
+    bcftools index ${vc}/${sample}.${vc}.marked.vcf.gz -t
+    bcftools index ${vc}/${sample}.${vc}.norm.vcf.gz -t
     """
 
     stub:
@@ -598,16 +611,14 @@ process combineVariants_octopus {
     mkdir ${vc}
     touch ${vc}/${sample}.${vc}.marked.vcf.gz
     touch ${vc}/${sample}.${vc}.norm.vcf.gz
+    touch ${vc}/${sample}.${vc}.marked.vcf.gz.tbi
+    touch ${vc}/${sample}.${vc}.norm.vcf.gz.tbi
     
     """
 
 }
 
 
-
-
-
-
 process combineVariants_strelka {
     //Concat all somatic snvs/indels across all files, strelka separates snv/indels
     label 'process_mid'
@@ -617,7 +628,9 @@ process combineVariants_strelka {
         tuple val(sample), path(strelkasnvs), path(strelkaindels)
     
     output:
-        tuple val(sample), path("${sample}.strelka.vcf.gz"),path("${sample}.filtered.strelka.vcf.gz")
+        tuple val(sample), 
+        path("${sample}.strelka.vcf.gz"),path("${sample}.strelka.vcf.gz.tbi"),
+        path("${sample}.filtered.strelka.vcf.gz"),path("${sample}.filtered.strelka.vcf.gz.tbi")
     
     
     script:
@@ -628,29 +641,72 @@ process combineVariants_strelka {
 
     """
     bcftools concat $vcfin $indelsin --threads $task.cpus -Oz -o ${sample}.temp.strelka.vcf.gz
-    bcftools sort ${sample}.temp.strelka.vcf.gz -Oz -o ${sample}.strelka.vcf.gz 
+    bcftools norm ${sample}.temp.strelka.vcf.gz -m- --threads $task.cpus --check-ref s -f $GENOMEREF -O v |\
+        awk '{{gsub(/\\y[W|K|Y|R|S|M]\\y/,"N",\$4); OFS = "\\t"; print}}' |\
+        sed '/^\$/d' > ${sample}.temp1.strelka.vcf.gz
+
+    bcftools sort ${sample}.temp1.strelka.vcf.gz -Oz -o ${sample}.strelka.vcf.gz 
 
     bcftools view ${sample}.strelka.vcf.gz --threads $task.cpus -f PASS -Oz -o ${sample}.filtered.strelka.vcf.gz
 
+    bcftools index ${sample}.strelka.vcf.gz -t
+    bcftools index ${sample}.filtered.strelka.vcf.gz -t
     """
 
     stub:
 
     """
-    touch ${sample}.strelka.vcf.gz
-    touch ${sample}.filtered.strelka.vcf.gz
+    touch ${sample}.strelka.vcf.gz ${sample}.strelka.vcf.gz.tbi
+    touch ${sample}.filtered.strelka.vcf.gz ${sample}.filtered.strelka.vcf.gz.tbi
     
     """
 
 }
 
+process somaticcombine {
+    label 'process_mid'
+    publishDir(path: "${outdir}/vcfs/combined", mode: 'copy')
+
+    input: 
+        tuple val(tumorsample), val(normal),
+        val(callers),
+        path(vcfs), path(vcfindex)
+
+    output:
+        tuple val(tumorsample), val(normal),
+        path("${tumorsample}_combined.vcf.gz"),
+        path("${tumorsample}_combined.vcf.gz.tbi")
+
+    script:
+    vcfin1=[callers, vcfs].transpose().collect { a, b -> a + " " + b }
+    vcfin2="-V:" + vcfin1.join(" -V:")
+    println vcfin2
+
+    """
+    java -jar DISCVRSeq-1.3.61.jar MergeVcfsAndGenotypes \
+        -R $GENOMEREF \
+        --genotypeMergeOption PRIORITIZE \
+        --priority_list mutect2,strelka,octopus,muse,lofreq,vardict,varscan \
+        --filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED
+        -O ${tumorsample}_combined.vcf.gz \
+        $vcfin2
+    """
+
+    stub:
+
+    """
+    touch ${tumorsample}_combined.vcf.gz
+    touch ${tumorsample}_combined.vcf.gz.tbi
+    """
+
+}
 
 process annotvep_tn {    
     publishDir(path: "${outdir}/mafs/", mode: 'copy')
 
     input:
         tuple val(tumorsample), val(normalsample), 
-        val(vc), path(tumorvcf) 
+        val(vc), path(tumorvcf),path(vcfindex) 
 
     output:
         path("paired/${vc}/${tumorsample}.maf")
@@ -739,18 +795,3 @@ process combinemafs_tn {
 }
 
 
-
-/*
-process combineVariants_allcallers {
-
-    publishDir(path: "${outdir}/vcfs/", mode: 'copy')
-
-    input:
-        tuple val(sample), path(inputvcf), val(vc)
-    
-    output:
-        tuple val(sample), 
-        path("${vc}/${sample}.${vc}.marked.vcf.gz"), path("${vc}/${sample}.${vc}.norm.vcf.gz")
-
-}
-*/
\ No newline at end of file
diff --git a/workflow/modules/variant_calling_tonly.nf b/workflow/modules/variant_calling_tonly.nf
index 33d5009..3da5360 100644
--- a/workflow/modules/variant_calling_tonly.nf
+++ b/workflow/modules/variant_calling_tonly.nf
@@ -186,8 +186,9 @@ process mutect2filter_tonly {
     input:
         tuple val(sample), path(mutvcfs), path(stats), path(obs), path(pileups),path(tumorcontamination)
     output:
-        tuple val(sample), path("${sample}.tonly.mut2.marked.vcf.gz"), 
-        path("${sample}.tonly.mut2.norm.vcf.gz"), 
+        tuple val(sample), 
+        path("${sample}.tonly.mut2.marked.vcf.gz"),path("${sample}.tonly.mut2.marked.vcf.gz.tbi"), 
+        path("${sample}.tonly.mut2.norm.vcf.gz"),path("${sample}.tonly.mut2.norm.vcf.gz.tbi"), 
         path("${sample}.tonly.mut2.marked.vcf.gz.filteringStats.tsv")
 
     script:
@@ -217,13 +218,14 @@ process mutect2filter_tonly {
         awk '{{gsub(/\\y[W|K|Y|R|S|M]\\y/,"N",\$4); OFS = "\t"; print}}' |\
         sed '/^\$/d' |\
     bcftools view - -Oz -o  ${sample}.tonly.mut2.norm.vcf.gz
+    bcftools index -t ${sample}.tonly.mut2.norm.vcf.gz
 
     """
 
     stub:
     """
-    touch ${sample}.tonly.mut2.marked.vcf.gz
-    touch ${sample}.tonly.mut2.norm.vcf.gz
+    touch ${sample}.tonly.mut2.marked.vcf.gz ${sample}.tonly.mut2.marked.vcf.gz.tbi
+    touch ${sample}.tonly.mut2.norm.vcf.gz ${sample}.tonly.mut2.norm.vcf.gz.tbi
     touch ${sample}.tonly.mut2.marked.vcf.gz.filteringStats.tsv
     """
 }
@@ -310,7 +312,7 @@ process octopus_tonly {
     
     output:
         tuple val(tumorname),
-        path("${tumorname}_${bed.simpleName}.octopus.vcf.gz")
+        path("${tumorname}_${bed.simpleName}.tonly.octopus.vcf.gz")
     
     script:
 
@@ -318,25 +320,62 @@ process octopus_tonly {
     octopus -R $GENOMEREF -C cancer -I ${tumor} \
     --annotations AC AD DP -t ${bed} \
     $SOMATIC_FOREST \
-    -o ${tumorname}_${bed.simpleName}.octopus.vcf.gz --threads $task.cpus
+    -o ${tumorname}_${bed.simpleName}.tonly.octopus.vcf.gz --threads $task.cpus
 
     """
 
     stub:
     
     """
-    touch ${tumorname}_${bed.simpleName}.octopus.vcf.gz
+    touch ${tumorname}_${bed.simpleName}.tonly.octopus.vcf.gz
 
     """
 }
 
 
+process somaticcombine_tonly {
+    label 'process_mid'
+    publishDir(path: "${outdir}/vcfs/combined_tonly", mode: 'copy')
+
+    input: 
+        tuple val(tumorsample), 
+        val(callers),
+        path(vcfs), path(vcfindex)
+
+    output:
+        tuple val(tumorsample),
+        path("${tumorsample}_combined_tonly.vcf.gz"),
+        path("${tumorsample}_combined_tonly.vcf.gz.tbi")
+
+    script:
+        vcfin1=[callers, vcfs].transpose().collect { a, b -> a + " " + b }
+        vcfin2="-V:" + vcfin1.join(" -V:")
+        println vcfin2
+
+    """
+    java -jar DISCVRSeq-1.3.61.jar MergeVcfsAndGenotypes \
+        -R $GENOMEREF \
+        --genotypeMergeOption PRIORITIZE \
+        --priority_list mutect2,octopus,vardict,varscan \
+        --filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED
+        -O ${tumorsample}_combined.vcf.gz \
+        $vcfin2
+    """
+
+    stub:
+    """
+    touch ${tumorsample}_combined_tonly.vcf.gz ${tumorsample}_combined_tonly.vcf.gz.tbi
+    """
+
+}
+
 process annotvep_tonly {
     publishDir("${outdir}/mafs", mode: "copy")
 
     input:
         tuple val(tumorsample), 
-        val(vc), path(tumorvcf) 
+        val(vc), path(tumorvcf), 
+        path(vcfindex)
 
 
     output:
diff --git a/workflow/modules/workflows.nf b/workflow/modules/workflows.nf
index b4b8646..59343cc 100644
--- a/workflow/modules/workflows.nf
+++ b/workflow/modules/workflows.nf
@@ -27,7 +27,8 @@ include {mutect2; mutect2filter; pileup_paired_t; pileup_paired_n;
     annotvep_tn as annotvep_tn_mut2; annotvep_tn as annotvep_tn_strelka; 
     annotvep_tn as annotvep_tn_varscan; annotvep_tn as annotvep_tn_vardict; annotvep_tn as annotvep_tn_octopus;
     annotvep_tn as annotvep_tn_lofreq; annotvep_tn as annotvep_tn_muse;
-    combinemafs_tn} from './variant_calling.nf'
+    annotvep_tn as annotvep_tn_combined;
+    combinemafs_tn; somaticcombine} from './variant_calling.nf'
 
 include {mutect2_t_tonly; mutect2filter_tonly; 
     varscan_tonly; vardict_tonly; octopus_tonly;
@@ -36,7 +37,8 @@ include {mutect2_t_tonly; mutect2filter_tonly;
     mergemut2stats_tonly;
     annotvep_tonly as annotvep_tonly_varscan; annotvep_tonly as annotvep_tonly_vardict; 
     annotvep_tonly as annotvep_tonly_mut2; annotvep_tonly as annotvep_tonly_octopus;
-    combinemafs_tonly} from './variant_calling_tonly.nf'
+    annotvep_tonly as annotvep_tonly_combined;
+    combinemafs_tonly;somaticcombine_tonly} from './variant_calling_tonly.nf'
 
 include {svaba_somatic; manta_somatic; 
     survivor_sv; gunzip;
@@ -199,8 +201,6 @@ workflow VC {
     .join(mergemut2stats.out)
     .join(learnreadorientationmodel.out)
     .join(contamination_paired.out)
-    mutect2filter(mut2tn_filter)
-
 
     //Tumor Only Calling
     bambyinterval_t=bambyinterval.map{tumorname,tumor,tumorbai,normalname,normalbam,normalbai,bed ->tuple(tumorname,tumor,tumorbai,bed)}
@@ -235,87 +235,109 @@ workflow VC {
     .join(learnreadorientationmodel_tonly.out)
     .join(contamination_tumoronly.out)
 
-    mutect2filter_tonly(mut2tonly_filter)
-    mutect2filter.out
-    .join(sample_sheet)
-    .map{tumor,markedvcf,finalvcf,stats,normal -> tuple(tumor,normal,"mutect2",finalvcf)} | annotvep_tn_mut2
-
-    mutect2filter_tonly.out
-    .join(sample_sheet)
-    .map{tumor,markedvcf,finalvcf,stats,normal -> tuple(tumor,"mutect2",finalvcf)} | annotvep_tonly_mut2
+    
+    //Annotation)
+    mutect2_in=mutect2filter(mut2tn_filter)
+    | join(sample_sheet)
+    | map{tumor,markedvcf,markedindex,normvcf,normindex,stats,normal -> tuple(tumor,normal,"mutect2",normvcf,normindex)}  
+    annotvep_tn_mut2(mutect2_in)
 
-    //Strelka
+    
+    mutect2_in_tonly=mutect2filter_tonly(mut2tonly_filter)
+    | join(sample_sheet)
+    | map{tumor,markedvcf,markedindex,normvcf,normindex, stats,normal -> tuple(tumor,"mutect2",normvcf,normindex)} 
+    annotvep_tonly_mut2(mutect2_in_tonly)
+    
+    //Strelka TN 
     strelka_tn(bambyinterval)
     strelkaout=strelka_tn.out.groupTuple()
     .map { samplename,vcfs,indels -> tuple( samplename,
     vcfs.toSorted{ it -> (it.name =~ /${samplename}_(.*?).somatic.snvs.vcf.gz/)[0][1].toInteger() },
     indels.toSorted{ it -> (it.name =~ /${samplename}_(.*?).somatic.indels.vcf.gz/)[0][1].toInteger() }  
     )}
-    combineVariants_strelka(strelkaout)
-    combineVariants_strelka.out.join(sample_sheet)
-    .map{tumor,markedvcf,finalvcf,normal -> tuple(tumor,normal,"strelka",finalvcf)} | annotvep_tn_strelka
+    strelka_in=combineVariants_strelka(strelkaout) | join(sample_sheet)
+    | map{tumor,markedvcf,markedindex,finalvcf,finalindex,normal -> tuple(tumor,normal,"strelka",finalvcf,finalindex)} 
+    annotvep_tn_strelka(strelka_in)
 
     //Vardict
     vardict_comb=vardict_tn(bambyinterval).groupTuple().map{tumor,vcf-> tuple(tumor,vcf,"vardict")} | combineVariants_vardict
-    vardict_comb.join(sample_sheet)
-     .map{tumor,marked,normvcf,normal ->tuple(tumor,normal,"vardict",normvcf)} | annotvep_tn_vardict
+    vardict_in=vardict_comb.join(sample_sheet)
+     .map{tumor,marked,markedindex,normvcf,normindex,normal ->tuple(tumor,normal,"vardict",normvcf,normindex)}  
+    annotvep_tn_vardict(vardict_in)
 
     //VarDict_tonly
     vardict_tonly_comb=bambyinterval.map{tumorname,tumorbam,tumorbai,normname,normbam,normbai,bed ->
         tuple(tumorname,tumorbam,tumorbai,bed)} 
-    vardict_tonly(vardict_tonly_comb).groupTuple().map{tumor,vcf-> tuple(tumor,vcf,"vardict_tonly")} |combineVariants_vardict_tonly
-    combineVariants_vardict_tonly.out.join(sample_sheet)
-    .map{tumor,marked,normvcf,normal ->tuple(tumor,"vardict_tonly",normvcf)} | annotvep_tonly_vardict
-
+    vardict_tonly(vardict_tonly_comb).groupTuple().map{tumor,vcf-> tuple(tumor,vcf,"vardict_tonly")} | combineVariants_vardict_tonly
+    
+    vardict_in_tonly=combineVariants_vardict_tonly.out.join(sample_sheet)
+    .map{tumor,marked,markedindex,normvcf,normindex,normal ->tuple(tumor,"vardict_tonly",normvcf,normindex)} 
+    annotvep_tonly_vardict(vardict_in_tonly)
+    
     //VarScan TN
-    varscan_in=bambyinterval.join(contamination_paired.out)
-    varscan_comb=varscan_tn(varscan_in).groupTuple().map{tumor,vcf-> tuple(tumor,vcf,"varscan")} | combineVariants_varscan
-    varscan_comb.join(sample_sheet)
-    .map{tumor,marked,normvcf,normal ->tuple(tumor,normal,"varscan",normvcf)} | annotvep_tn_varscan
-
-    //VarScan_TOnly
-    varscan_tonly_comb=varscan_in.map{tumor,bam,bai,normal,nbam,nbai,bed,tpile,npile,tumorc,normalc ->
-    tuple(tumor,bam,bai,bed,tpile,tumorc)} | varscan_tonly 
-    varscan_tonly_comb1=varscan_tonly_comb.groupTuple().map{tumor,vcf-> tuple(tumor,vcf,"varscan_tonly")} | combineVariants_varscan_tonly
+    varscan_in=bambyinterval.join(contamination_paired.out) 
+    | varscan_tn | groupTuple() |map{tumor,vcf-> tuple(tumor,vcf,"varscan")} | combineVariants_varscan
+    | join(sample_sheet)
+    | map{tumor,marked,markedindex,normvcf,normindex,normal ->tuple(tumor,normal,"varscan",normvcf,normindex)} 
+    annotvep_tn_varscan(varscan_in)
     
-    varscan_tonly_comb1.join(sample_sheet)
-    .map{tumor,marked,normvcf,normal ->tuple(tumor,"varscan_tonly",normvcf)} | annotvep_tonly_varscan
-
+    //VarScan_TOnly
+    varscan_in_tonly=bambyinterval.join(contamination_paired.out)
+    | map{tumor,bam,bai,normal,nbam,nbai,bed,tpile,npile,tumorc,normalc ->
+            tuple(tumor,bam,bai,bed,tpile,tumorc)} | varscan_tonly  
+    | groupTuple() | map{tumor,vcf-> tuple(tumor,vcf,"varscan_tonly")} | combineVariants_varscan_tonly
+    | join(sample_sheet)
+    | map{tumor,marked,markedindex,normvcf,normindex,normal ->tuple(tumor,"varscan_tonly",normvcf,normindex)} 
+    annotvep_tonly_varscan(varscan_in_tonly)
+        
     //Lofreq TN
-    lofreq_tn(bambyinterval).groupTuple().map{tumor,snv,dbsnv,indel,dbindel,vcf-> tuple(tumor,vcf,"lofreq")} 
-        | combineVariants_lofreq | join(sample_sheet)| map{tumor,marked,normvcf,normal ->tuple(tumor,normal,"lofreq",normvcf)} 
-        | annotvep_tn_lofreq
+    lofreq_in=lofreq_tn(bambyinterval).groupTuple().map{tumor,snv,dbsnv,indel,dbindel,vcf-> tuple(tumor,vcf,"lofreq")} 
+        | combineVariants_lofreq | join(sample_sheet)
+        | map{tumor,marked,markedindex,normvcf,normindex,normal->tuple(tumor,normal,"lofreq",normvcf,normindex)} 
+    annotvep_tn_lofreq(lofreq_in)
 
     //MuSE TN
-    muse_tn(bamwithsample).groupTuple().map{tumor,vcf-> tuple(tumor,vcf,"muse")} 
-        | combineVariants_muse | join(sample_sheet)| map{tumor,marked,normvcf,normal ->tuple(tumor,normal,"muse",normvcf)} 
-        | annotvep_tn_muse
+    muse_in=muse_tn(bamwithsample).groupTuple().map{tumor,vcf-> tuple(tumor,vcf,"muse")} 
+        | combineVariants_muse | join(sample_sheet)
+        | map{tumor,marked,markedindex,normvcf,normindex,normal ->tuple(tumor,normal,"muse",normvcf,normindex)} 
+    annotvep_tn_muse(muse_in)
 
     //Octopus_TN
-    octopus_annotin=octopus_tn(bambyinterval) | bcftools_index_octopus
-        | groupTuple()  |map{tumor,vcf,vcfindex-> tuple(tumor,vcf,vcfindex,"octopus")} 
-        | combineVariants_octopus | join(sample_sheet)|map{tumor,marked,normvcf,normal ->tuple(tumor,normal,"octopus",normvcf)} 
-    annotvep_tn_octopus(octopus_annotin)
+    octopus_in=octopus_tn(bambyinterval) | bcftools_index_octopus 
+        | groupTuple() | map{samplename,vcf,vcfindex-> tuple(samplename,vcf.toSorted{it->(it.name =~ /${samplename}_(.*).octopus.vcf.gz/)[0][1].toInteger()},vcfindex,"octopus")}
+        | combineVariants_octopus | map{samplename,marked,markedindex,normvcf,normindex -> 
+            tuple(samplename.split('_vs_')[0],samplename.split('_vs_')[1],"octopus",normvcf,normindex)}
+    annotvep_tn_octopus(octopus_in) 
 
-    
     //Octopus_TOnly
-    octopus_tonly_out=bambyinterval.map{tumor,bam,bai,normal,nbam,nbai,bed->
+    octopus_in_tonly=bambyinterval.map{tumor,bam,bai,normal,nbam,nbai,bed->
     tuple(tumor,bam,bai,bed)} | octopus_tonly | bcftools_index_octopus_tonly
-    octopus_tonly_comb=octopus_tonly_out.groupTuple().map{tumor,vcf,vcfindex-> tuple(tumor,vcf,vcfindex,"octopus_tonly")} 
-        | combineVariants_octopus_tonly
-    
-    octopus_tonly_comb.join(sample_sheet) |
-        map{tumor,marked,normvcf,normal ->tuple(tumor,"octopus_tonly",normvcf)} | annotvep_tonly_octopus
+    | groupTuple() 
+        | map{samplename,vcf,vcfindex->tuple(samplename,vcf.toSorted{it->(it.name =~ /${samplename}_(.*).tonly.octopus.vcf.gz/)[0][1].toInteger()},vcfindex,"octopus_tonly")}
+        | combineVariants_octopus_tonly 
+        | join(sample_sheet) |
+        map{tumor,marked,markedindex,normvcf,normindex,normal ->tuple(tumor,"octopus_tonly",normvcf,normindex)} 
+    annotvep_tonly_octopus(octopus_in_tonly)
 
     //Combine All Variants Using VCF and Then Reannotate
-    //annotvep_tn_mut2.out.concat(annotvep_tn_strelka.out).concat(annotvep_tn_vardict.out).concat(annotvep_tn_varscan.out) | combinemafs_tn
-    //annotvep_tonly_mut2.out.concat(annotvep_tonly_vardict.out).concat(annotvep_tonly_varscan.out) | combinemafs_tonly
+    mutect2_in|concat(strelka_in)|concat(octopus_in)|concat(muse_in)|concat(lofreq_in)
+        | concat(vardict_in) |concat(varscan_in)|groupTuple()
+        | somaticcombine 
+        | map{tumor,normal,vcf,index ->tuple(tumor,normal,"combined",vcf,index)} 
+        | annotvep_tn_combined
+
+    mutect2_in_tonly|concat(octopus_in_tonly)
+        | concat(vardict_in_tonly)|concat(varscan_in_tonly)
+        | somaticcombine_tonly 
+        | map{tumor,vcf,index ->tuple(tumor,"combined_tonly",vcf,index)} 
+        | annotvep_tonly_combined
+    
 
     //Implement PCGR Annotator/CivIC Next
 
     emit:
-        somaticcall_input=octopus_annotin
-
+        somaticcall_input=octopus_in
+    
 }
 
 
diff --git a/workflow/modules/workflows_tonly.nf b/workflow/modules/workflows_tonly.nf
index 5143cbe..af05fe1 100644
--- a/workflow/modules/workflows_tonly.nf
+++ b/workflow/modules/workflows_tonly.nf
@@ -32,7 +32,8 @@ include {mutect2_t_tonly; mutect2filter_tonly; pileup_paired_tonly;
     mergemut2stats_tonly;
     annotvep_tonly as annotvep_tonly_varscan; annotvep_tonly as annotvep_tonly_vardict; 
     annotvep_tonly as annotvep_tonly_mut2; annotvep_tonly as annotvep_tonly_octopus;
-    combinemafs_tonly} from './variant_calling_tonly.nf'
+    annotvep_tonly as annotvep_tonly_combined;
+    combinemafs_tonly; somaticcombine_tonly} from './variant_calling_tonly.nf'
 
 include {manta_tonly; svaba_tonly; survivor_sv; gunzip;
 annotsv_tonly as annotsv_manta_tonly; annotsv_tonly as annotsv_svaba_tonly;
@@ -167,36 +168,41 @@ workflow VC_TONLY {
     .join(learnreadorientationmodel_tonly.out)
     .join(contamination_tumoronly.out)
 
-    mutect2filter_tonly(mut2tonly_filter)
-    
-    //Annotate
-    mutect2filter_tonly.out
-    .join(sample_sheet)
-    .map{tumor,markedvcf,finalvcf,stats -> tuple(tumor,"mutect2",finalvcf)} | annotvep_tonly_mut2
+    mutect2_tonly_in=mutect2filter_tonly(mut2tonly_filter) 
+    | join(sample_sheet)
+    | map{tumor,markedvcf,markedindex,finalvcf,finalindex,stats -> tuple(tumor,"mutect2",finalvcf,finalindex)} 
+    annotvep_tonly_mut2(mutect2_tonly_in)
 
-    //VarDict_tonly
-    vardict_tonly(bambyinterval).groupTuple().map{tumor,vcf-> tuple(tumor,vcf,"vardict_tonly")} | combineVariants_vardict_tonly
-    combineVariants_vardict_tonly.out.join(sample_sheet)
-    .map{tumor,marked,normvcf ->tuple(tumor,"vardict_tonly",normvcf)} | annotvep_tonly_vardict
+    //VarDict
+    vardict_in_tonly=vardict_tonly(bambyinterval) | groupTuple()| map{tumor,vcf -> tuple(tumor,vcf,"vardict_tonly")} 
+    | combineVariants_vardict_tonly
+    | join(sample_sheet)
+    | map{tumor,marked,markedindex,normvcf,normindex ->tuple(tumor,"vardict_tonly",normvcf,normindex)}
+    annotvep_tonly_vardict(vardict_in_tonly)
 
     //VarScan_tonly
-    varscan_in=bambyinterval.join(contamination_tumoronly.out)
-    varscan_tonly_comb=varscan_tonly(varscan_in).groupTuple().map{tumor,vcf-> tuple(tumor,vcf,"varscan")} | combineVariants_varscan_tonly
-    
-    varscan_tonly_comb.join(sample_sheet)
-    .map{tumor,marked,normvcf ->tuple(tumor,"varscan_tonly",normvcf)} | annotvep_tonly_varscan
+    varscan_in_tonly=bambyinterval.join(contamination_tumoronly.out)
+        | varscan_tonly | groupTuple() | map{tumor,vcf-> tuple(tumor,vcf,"varscan")} 
+        | combineVariants_varscan_tonly 
+        | join(sample_sheet)
+        | map{tumor,marked,markedindex,normvcf,normindex ->tuple(tumor,"varscan_tonly",normvcf,normindex)} 
+    annotvep_tonly_varscan(varscan_in_tonly)
 
     //Octopus_tonly
-    octopus_tonly_comb=bambyinterval.map{tumor,bam,bai,bed->
-    tuple(tumor,bam,bai,bed)} | octopus_tonly | bcftools_index_octopus
-    octopus_tonly_comb1=octopus_tonly_comb.groupTuple().map{tumor,vcf,vcfindex-> tuple(tumor,vcf,vcfindex, "octopus_tonly")} | combineVariants_octopus
-    
-    octopus_tonly_comb1.join(sample_sheet)
-    .map{tumor,marked,normvcf ->tuple(tumor,"octopus_tonly",normvcf)} | annotvep_tonly_octopus
-
-
-    //Combine All Final
-    //annotvep_tonly_mut2.out.concat(annotvep_tonly_vardict.out).concat(annotvep_tonly_varscan.out) | combinemafs_tonly
+    octopus_in_tonly=bambyinterval | octopus_tonly | bcftools_index_octopus
+    | groupTuple()
+    | map{tumor,vcf,vcfindex -> tuple(tumor,vcf.toSorted{it -> it.name}
+            ,vcfindex, "octopus_tonly")} 
+    | combineVariants_octopus | join(sample_sheet)
+    | map{tumor,marked,markedindex,normvcf,normindex ->tuple(tumor,"octopus_tonly",normvcf,normindex)} 
+    annotvep_tonly_octopus(octopus_in_tonly)
+
+
+    mutect2_tonly_in|concat(octopus_in_tonly)
+        | concat(vardict_in_tonly)|concat(varscan_in_tonly)
+        | somaticcombine_tonly 
+        | map{tumor,vcf,index ->tuple(tumor,"combined_tonly",vcf,index)} 
+        | annotvep_tonly_combined
 
 
     emit: