diff --git a/VERSION b/VERSION
index 1efec57..a918a2a 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-0.5.0-alpha
+0.6.0
diff --git a/config/cluster.json b/config/cluster.json
index ff1902a..4190fbf 100644
--- a/config/cluster.json
+++ b/config/cluster.json
@@ -170,7 +170,8 @@
     "glnexus": {
         "threads": "24",
         "mem": "240G",
-        "time": "4-00:00:00"
+        "time": "4-00:00:00",
+        "gres": "lscratch:256"
     },
     "gtype_refinement": {
         "threads": "2",
@@ -189,9 +190,9 @@
         "mem": "96G"
     },
     "hmftools_amber": {
-        "mem": "96G",
+        "mem": "365G",
         "time" : "48:00:00",
-        "threads": "24"
+        "threads": "56"
     },
     "hmftools_cobalt": {
         "mem": "96G",
@@ -203,6 +204,22 @@
         "time" : "48:00:00",
         "threads": "8"
     },
+    "somatic_purple_maf": {
+        "threads": "4",
+        "mem": "32G",
+        "time": "1-12:00:00",
+        "gres": "lscratch:32"
+    },
+    "purple_cohort_maf": {
+        "threads": "2",
+        "mem": "8G",
+        "time": "4:00:00"
+    },
+    "purple_cohort_maftools": {
+        "threads": "2",
+        "mem": "8G",
+        "time": "4:00:00"
+    },
     "inbreeding": {
         "threads": "8",
         "partition": "norm"
@@ -221,6 +238,11 @@
         "mem": "96G",
         "time": "1-00:00:00"
     },
+    "manta_somatic_filter": {
+        "threads": "4",
+        "mem": "72G",
+        "time": "1-00:00:00"
+    },
     "opencravat_germline_merge": {
         "threads": "4",
         "mem": "120G",
@@ -256,7 +278,7 @@
     "opencravat_germline": {
         "threads": "20",
         "mem": "48G",
-        "time": "12:00:00"
+        "time": "1-12:00:00"
     },
     "opencravat_somatic": {
         "threads": "32",
@@ -270,14 +292,19 @@
     },
     "octopus_somatic": {
         "threads": "28",
-        "mem": "48G",
-        "time": "1-18:00:00"
+        "mem": "120G",
+        "time": "2-12:00:00"
     },
     "octopus_merge": {
         "threads": "4",
         "mem": "48G",
         "time": "12:00:00"
     },
+    "octopus_filter": {
+        "threads": "4",
+        "mem": "48G",
+        "time": "12:00:00"
+    },
     "ped": {
         "threads": "2",
         "partition": "norm",
@@ -363,6 +390,11 @@
         "mem": "64G",
         "time": "24:00:00"
     },
+    "strelka_format": {
+        "threads": "16",
+        "mem": "64G",
+        "time": "24:00:00"
+    },
     "vcftools": {
         "threads": "2",
         "time": "12:00:00"
diff --git a/config/containers.json b/config/containers.json
index ff0340d..40e63cf 100644
--- a/config/containers.json
+++ b/config/containers.json
@@ -4,7 +4,7 @@
         "genome-seek_trim_map": "docker://skchronicles/genome-seek_trim_map:v0.1.0",
         "genome-seek_somatic":  "docker://skchronicles/genome-seek_somatic:v0.1.0",
         "genome-seek_qc":       "docker://skchronicles/genome-seek_qc:v0.1.0",
-        "genome-seek_sv":       "docker://skchronicles/genome-seek_sv:v0.1.0",
+        "genome-seek_sv":       "docker://skchronicles/genome-seek_sv:v0.2.0",
         "genome-seek_cnv":      "docker://skchronicles/genome-seek_cnv:v0.2.0",
         "genome-seek_hla":      "docker://skchronicles/genome-seek_hla:v0.1.0",
         "base":                 "docker://skchronicles/ccbr_wes_base:v0.1.0",
diff --git a/config/genome.json b/config/genome.json
index 576d004..47d7a14 100644
--- a/config/genome.json
+++ b/config/genome.json
@@ -56,6 +56,9 @@
         ],
         "GENOME": "/data/OpenOmics/references/genome-seek/Homo_sapiens_assembly38.fasta",
         "GNOMAD": "/data/OpenOmics/references/genome-seek/GNOMAD/somatic-hg38-af-only-gnomad.hg38.vcf.gz",
+        "SLIVARGNOMAD": "/data/OpenOmics/references/genome-seek/slivar/gnomad.hg38.v2.zip",
+        "SLIVARGNOMAD3": "/data/OpenOmics/references/genome-seek/slivar/gnomad.hg38.genomes.v3.fix.zip",
+        "SLIVARTOPMED": "/data/OpenOmics/references/genome-seek/slivar/topmed.hg38.dbsnp.151.zip",
         "GISTIC_REFGENE": "/data/OpenOmics/references/genome-seek/gistic/hg38.UCSC.add_miR.160920.refgene.mat",
         "HLA_LA_GRAPH": "/data/OpenOmics/references/genome-seek/HLA-LA/graphs/PRG_MHC_GRCh38_withIMGT",
         "HMFTOOLS_REF_VERSION": "38",
@@ -88,6 +91,7 @@
             "recal5": ["chr11", "chr12", "chr17", "chr19", "chr22", "chrX"]
         },
         "MANTA_CALLREGIONS": "/data/OpenOmics/references/genome-seek/Manta/Homo_sapiens_assembly38_main_chromosomes.bed.gz",
+        "MANTA_FILTER_CHROMOSEQ_TRANSLOCATION": "/data/OpenOmics/references/genome-seek/chromoseq/chromoseq_translocations.bedpe",
         "OC_LIFTOVER": "hg38",
         "OC_MODULES": "/data/OpenOmics/references/genome-seek/OpenCRAVAT/modules",
         "OCTOPUS_SOMATIC_FOREST_MODEL": "/data/OpenOmics/references/genome-seek/Octopus/somatic.v0.7.4.forest",
diff --git a/config/modules.json b/config/modules.json
index 94a57f0..aa35c7e 100644
--- a/config/modules.json
+++ b/config/modules.json
@@ -5,6 +5,7 @@
         "fastp": "fastp/0.23.1",
         "bowtie": "bowtie/2-2.4.5",
         "bcftools": "bcftools/1.13",
+        "bedtools": "bedtools/2.27.1",
         "canvas": "Canvas/1.40",
         "circos": "circos/0.69-9",
         "deepvariant": "deepvariant/1.4.0",
@@ -20,6 +21,7 @@
         "rlang": "R/4.2.0",
         "samblaster": "samblaster/0.1.25",
         "strelka": "strelka/2.9.10",
+        "svtools": "svtools/0.5.1",
         "python2": "python/2.7",
         "python3": "python/3.8",
         "fastq_screen": "fastq_screen/0.14.1",
@@ -28,6 +30,7 @@
         "gatk3": "GATK/3.8-1",
         "gatk4": "GATK/4.2.0.0",
         "multiqc": "multiqc/1.11",
+        "minimap2": "minimap2/2.26",
         "vcftools": "vcftools/0.1.16"
     }
 }
diff --git a/workflow/Snakefile b/workflow/Snakefile
index f272ebb..0f0ccd6 100644
--- a/workflow/Snakefile
+++ b/workflow/Snakefile
@@ -309,6 +309,10 @@ rule all:
             join(workpath, "MANTA", "somatic", "{name}", "results", "variants", "somaticSV.vcf.gz"),
             name=provided(provided(tumors, call_somatic), call_sv),
         ),
+        expand( 
+            join(workpath, "MANTA", "somatic", "{name}", "results", "variants", "somaticSV.filtered.vcf"),
+            name=provided(provided(tumors, call_somatic), call_sv),
+        ),
         # Mutect2, call somatic variants,
         # scattered and merged on chroms
         # to speed up overall run time,
@@ -371,44 +375,19 @@ rule all:
             caller=provided(tn_somatic_callers, call_somatic),
             name=provided(tumorWnormal, call_somatic),
         ),
-        # Post-process somatic VCF, splits sites
-        # found in the tumor and normal sample, 
-        # and to merge callsets across callers,
+        # Post-process somatic VCF, 
+        # and merge callsets across callers,
         # only runs if provided --call-somatic 
         # @imported from rules/somatic.smk
         expand( 
-            join(workpath, "{caller}", "somatic", "{name}.{caller}.filtered.norm.tumor.vcf.gz"),
-            caller=provided(somatic_callers, call_somatic),
-            name=provided(tumors, call_somatic),
-        ),
-        # Conditionally add tumor-normal callers,
-        # MuSE and Strelka
-        expand( 
-            join(workpath, "{caller}", "somatic", "{name}.{caller}.filtered.norm.tumor.vcf.gz"),
-            caller=provided(tn_somatic_callers, call_somatic),
-            name=provided(tumorWnormal, call_somatic),
-        ),
-        expand( 
-            join(workpath, "{caller}", "somatic", "{name}.{caller}.filtered.norm.normal.vcf.gz"),
-            caller=provided(somatic_callers, call_somatic),
-            name=provided(tumorWnormal, call_somatic),
-        ),
-        # Conditionally add tumor-normal callers,
-        # MuSE and Strelka
-        expand( 
-            join(workpath, "{caller}", "somatic", "{name}.{caller}.filtered.norm.normal.vcf.gz"),
-            caller=provided(tn_somatic_callers, call_somatic),
-            name=provided(tumorWnormal, call_somatic),
-        ),
-        expand( 
-            join(workpath, "merged", "somatic", "{name}.merged.filtered.norm.tumor.vcf.gz"),
+            join(workpath, "merged", "somatic", "{name}.merged.filtered.norm.vcf.gz"),
             name=provided(tumors, call_somatic),
         ),
         # vcf2maf, create per-sample MAF file,
         # only runs if provided --call-somatic 
         # @imported from rules/somatic.smk
         expand( 
-            join(workpath, "merged", "somatic", "{name}.merged.filtered.norm.tumor.maf"),
+            join(workpath, "merged", "somatic", "{name}.merged.filtered.norm.maf"),
             name=provided(tumors, call_somatic),
         ),
         # vcf2maf, create cohort-level MAF file,
@@ -443,16 +422,29 @@ rule all:
         # @imported from rules/cnv.smk
         expand( 
             join(workpath, "hmftools", "amber", "{name}", "{name}.amber.baf.tsv"),
-            name=provided(provided(tumors, call_somatic), call_cnv),
+            name=provided(provided(tumors, call_somatic), call_cnv)
         ),
         expand( 
             join(workpath, "hmftools", "cobalt", "{name}", "{name}.cobalt.ratio.tsv"),
-            name=provided(provided(tumors, call_somatic), call_cnv),
+            name=provided(provided(tumors, call_somatic), call_cnv)
         ),
         expand( 
-            join(workpath, "hmftools", "purple", "{name}", "{caller}", "{name}.purple.cnv.somatic.tsv"),
-            name=provided(provided(tumors, call_somatic), call_cnv),
-            caller=provided(purple_callers, call_somatic),
+            join(workpath, "hmftools", "purple", "{name}", "{name}.purple.cnv.somatic.tsv"),
+            name=provided(provided(tumors, call_somatic), call_cnv)
+        ),
+        expand( 
+            join(workpath, "hmftools", "purple", "{name}", "{name}.purple.maf"),
+            name=provided(provided(tumors, call_somatic), call_cnv)
+        ),
+        # Maftools, create cohort-level summary plots from purple somatic VCF,
+        # only runs if provided --call-somatic AND --call-cnv
+        # @imported from rules/somatic.smk    
+        provided(
+            provided(
+                [join(workpath, "hmftools", "cohort_oncoplot.pdf")],
+                call_somatic
+            ),
+            call_cnv
         ),
         # OpenCRAVAT, somatic annotation, rank and score variants,
         # only runs if provided --open-cravat AND --call-somatic
diff --git a/workflow/rules/cnv.smk b/workflow/rules/cnv.smk
index fda5082..7490635 100644
--- a/workflow/rules/cnv.smk
+++ b/workflow/rules/cnv.smk
@@ -24,11 +24,12 @@ def get_manta_calls(wildcards):
     tumor = wildcards.name
     if call_sv:
         # Runs in a tumor, normal mode
-        return join(workpath, "MANTA", "somatic", tumor, "results", "variants", "somaticSV.vcf.gz"),
+        return join(workpath, "MANTA", "somatic", tumor, "results", "variants", "somaticSV.filtered.vcf"),
     else:
         # Runs in tumor-only mode
         return []
 
+
 # Germline Copy Number Variation
 rule peddy:
     """
@@ -207,7 +208,6 @@ rule hmftools_amber:
         tumor  = join(workpath, "BAM", "{name}.recal.bam"),
         normal = get_normal_recal_bam,
     output:
-        con = join(workpath, "hmftools", "amber", "{name}", "{name}.amber.contamination.vcf.gz"),
         baf = join(workpath, "hmftools", "amber", "{name}", "{name}.amber.baf.tsv"),
     params:
         rname     = 'hmfamber',
@@ -240,7 +240,6 @@ rule hmftools_amber:
     """
 
 
-
 rule hmftools_cobalt:
     """Data-processing step to determines the read depth ratios for
     Purple's copy number fitting. HMF Tools is a suite of tools the
@@ -307,17 +306,22 @@ rule hmftools_purple:
     input:
         amber  = join(workpath, "hmftools", "amber", "{name}", "{name}.amber.baf.tsv"),
         cobalt = join(workpath, "hmftools", "cobalt", "{name}", "{name}.cobalt.ratio.tsv"),
-        vcf    = join(workpath, "{caller}", "somatic", "{name}.{caller}.filtered.norm.vcf"),
+        vcf    = join(workpath, "merged", "somatic", "{name}.merged.filtered.norm.temp.vep.vcf"),
         sv     = get_manta_calls,
     output:
-        purity = join(workpath, "hmftools", "purple", "{name}", "{caller}", "{name}.purple.purity.tsv"),
-        cnv    = join(workpath, "hmftools", "purple", "{name}", "{caller}", "{name}.purple.cnv.somatic.tsv"),
-        driver = join(workpath, "hmftools", "purple", "{name}", "{caller}", "{name}.driver.catalog.somatic.tsv"),
+        purity = join(workpath, "hmftools", "purple", "{name}", "{name}.purple.purity.tsv"),
+        cnv    = join(workpath, "hmftools", "purple", "{name}", "{name}.purple.cnv.somatic.tsv"),
+        driver = join(workpath, "hmftools", "purple", "{name}", "{name}.driver.catalog.somatic.tsv"),
+        vcf    = join(workpath, "hmftools", "purple", "{name}", "{name}.merged.filtered.norm.biallelic.vcf"),
+        purplevcf    = join(workpath, "hmftools", "purple", "{name}", "{name}.purple.somatic.vcf.gz"),
     params:
         rname      = 'hmfpurple',
         tumor      = '{name}',
-        outdir     = join(workpath, "hmftools", "purple", "{name}", "{caller}"),
+        outdir     = join(workpath, "hmftools", "purple", "{name}"),
         genome     = config['references']['GENOME'],
+        gnomadv2   = config['references']['SLIVARGNOMAD'],
+        gnomadv3   = config['references']['SLIVARGNOMAD3'],
+        topmed     = config['references']['SLIVARTOPMED'],
         ref_ver    = config['references']['HMFTOOLS_REF_VERSION'],
         purple_jar = config['references']['HMFTOOLS_PURPLE_JAR'],
         gc_profile = config['references']['HMFTOOLS_GC_PROFILE'],
@@ -333,7 +337,7 @@ rule hmftools_purple:
         tumor_flag = lambda w: "" if tumor2normal[w.name] else "-tumor_only",
         # Building optional flag for SV calls
         sv_option = lambda w: "-structural_vcf {0}".format(
-            join(workpath, "MANTA", "somatic", w.name, "results", "variants", "somaticSV.vcf.gz"),
+            join(workpath, "MANTA", "somatic", w.name, "results", "variants", "somaticSV.filtered.vcf"),
         ) if call_sv else "",
     threads: 
         int(allocated("threads", "hmftools_purple", cluster)),
@@ -341,6 +345,7 @@ rule hmftools_purple:
     envmodules:
         config['tools']['rlang'],
         config['tools']['circos'],
+        config['tools']['bcftools'],
     shell: """
     # Set output directories
     # for Amber and Cobalt
@@ -348,7 +353,28 @@ rule hmftools_purple:
     cobalt_outdir="$(dirname "{input.cobalt}")"
     echo "Amber output directory: $amber_outdir"
     echo "Cobalt output directory: $cobalt_outdir"
-
+    
+    # Decompress and filter merged 
+    # somatic variant VCF to remove 
+    # biallelic variants and variants 
+    # with > 0.01 frequency in the 
+    # general population
+    bcftools view \\
+        -O v \\
+        --max-alleles 2 \\
+        {input.vcf} \\
+        | slivar expr \\
+            --vcf - --pass-only \\
+            -g {params.gnomadv2} \\
+            -g {params.topmed} \\
+            --info 'INFO.impactful && INFO.gnomad_popmax_af < 0.01 && INFO.topmed_af < 0.05' \\
+        | sed 's/gnomad_/gnomadV2_/g' \\
+        | slivar expr \\
+            --vcf - --pass-only \\
+            -g {params.gnomadv3} \\
+            --info 'INFO.gnomad_popmax_af < 0.01' \\
+    > {output.vcf}
+    
     # Run Purple to find CNVs,
     # purity and ploidy, and 
     # cancer driver events
@@ -366,5 +392,119 @@ rule hmftools_purple:
         -somatic_hotspots {params.somatic_hotspot} \\
         -germline_hotspots {params.germline_hotspot} \\
         -threads {threads} {params.tumor_flag} \\
-        -somatic_vcf {input.vcf} {params.sv_option}
+        -somatic_vcf {output.vcf} {params.sv_option}
+    """
+
+
+rule somatic_purple_maf:
+    """Data-processing step to convert the merged somatic calls from 
+    each caller into a MAF file. This step takes filtered, norm, tumor 
+    callset from all callers and annotates the variants with VEP/106
+    and converts the resulting VCF file into MAF file format. vcf2maf 
+    requires the input vcf file is NOT compressed.
+    @Input:
+        Somatic variants found in the tumor sample (scatter-per-sample)
+    @Output:
+        Annotated, merged MAF file contaning somatic callsets
+    """
+    input:
+        vcf    = join(workpath, "hmftools", "purple", "{name}", "{name}.purple.somatic.vcf.gz"),
+    output:
+        vcf = temp(join(workpath, "hmftools", "purple", "{name}", "{name}.purple.somatic.vcf")),
+        vep = join(workpath, "hmftools", "purple", "{name}", "{name}.purple.somatic.vep.vcf"),
+        maf = join(workpath, "hmftools", "purple", "{name}", "{name}.purple.maf"),
+    params:
+        rname  = 'purplemaf',
+        tumor  = '{name}',
+        memory = allocated("mem", "somatic_purple_maf", cluster).lower().rstrip('g'),
+        vep_data    = config['references']['VEP_DATA'],
+        vep_build   = config['references']['VEP_BUILD'],
+        vep_species = config['references']['VEP_SPECIES'],
+        ref_version = config['references']['VEP_REF_VERSION'],
+        genome      = config['references']['GENOME'],
+        # Building optional argument for paired normal
+        normal_option = lambda w: "--normal-id {0}".format(
+            tumor2normal[w.name]
+        ) if tumor2normal[w.name] else "",
+    threads: 
+        int(allocated("threads", "somatic_purple_maf", cluster))
+    container: config['images']['vcf2maf']
+    shell: """
+    # vcf2maf needs an uncompressed VCF file
+    zcat {input.vcf} \\
+    > {output.vcf}
+    # Run VEP and convert VCF into MAF file
+    vcf2maf.pl \\
+        --input-vcf {output.vcf} \\
+        --output-maf {output.maf} \\
+        --vep-path ${{VEP_HOME}} \\
+        --vep-data {params.vep_data} \\
+        --cache-version {params.ref_version} \\
+        --ref-fasta {params.genome} \\
+        --vep-forks {threads} \\
+        --tumor-id {params.tumor} {params.normal_option} \\
+        --ncbi-build {params.vep_build} \\
+        --species {params.vep_species} \\
+        --retain-info set,PURPLE_CN,SUBCL,HOTSPOT,IMPACT,NEAR_HOTSPOT,PNOISE,PNOISE2,PON,PURPLE_AF,PURPLE_GERMLINE,PURPLE_MACN,PURPLE_VCN,REPORTED,SOMATIC,SomaticEVS
+    """
+
+
+rule purple_cohort_maf:
+    """Data-processing step to merge the per-sample MAF files into a 
+    single MAF file for all samples, a cohort MAF file. 
+    @Input:
+        Somatic tumor MAF files (gather-per-sample)
+    @Output:
+        Merged somatic tumor MAF, cohort-level, with all call sets
+    """
+    input: 
+        mafs = expand(join(workpath, "hmftools", "purple", "{name}", "{name}.purple.maf"), name=tumors),
+    output: 
+        maf  = join(workpath, "hmftools", "cohort_somatic_variants.maf"),
+    params:
+        rname  = 'cohortmaf',
+        memory = allocated("mem", "purple_cohort_maf", cluster).lower().rstrip('g'),
+    threads: 
+        int(allocated("threads", "purple_cohort_maf", cluster))
+    container: config['images']['vcf2maf']
+    shell: """
+    echo "Combining MAFs..."
+    head -2 {input.mafs[0]} > {output.maf}
+    awk 'FNR>2 {{print}}' {input.mafs} >> {output.maf}
+    """
+
+
+rule purple_cohort_maftools:
+    """Data-processing step to run maftools on merged cohort-level 
+    MAF file, produces summarized plots like an oncoplot. 
+    @Input:
+        Cohort-level somatic MAF file (indirect-gather-due-to-aggregation) 
+    @Output:
+        TCGA comparsion plot
+        Top 20 genes by Vaf plot
+        MAF Summary plot
+        Oncoplot
+    """
+    input: 
+        maf  = join(workpath, "hmftools", "cohort_somatic_variants.maf"),
+    output:
+        tcga     = join(workpath, "hmftools", "cohort_tcga_comparison.pdf"),
+        gvaf     = join(workpath, "hmftools", "cohort_genes_by_VAF.pdf"),
+        summary  = join(workpath, "hmftools", "cohort_maf_summary.pdf"),
+        oncoplot = join(workpath, "hmftools", "cohort_oncoplot.pdf"),
+    params:
+        rname  = 'maftools',
+        wdir   = join(workpath, "hmftools"),
+        memory = allocated("mem", "purple_cohort_maftools", cluster).lower().rstrip('g'),
+        script = join("workflow", "scripts", "maftools.R"),
+    threads: 
+        int(allocated("threads", "purple_cohort_maftools", cluster)),
+    container: config['images']['genome-seek_somatic']
+    envmodules: config['tools']['rlang']
+    shell: """
+    Rscript {params.script} \\
+        {params.wdir} \\
+        {input.maf} \\
+        {output.summary} \\
+        {output.oncoplot}
     """
diff --git a/workflow/rules/open_cravat.smk b/workflow/rules/open_cravat.smk
index cbaa215..b52a5bf 100644
--- a/workflow/rules/open_cravat.smk
+++ b/workflow/rules/open_cravat.smk
@@ -370,7 +370,7 @@ rule opencravat_somatic:
         Per sample caller merged somatic SQLite OpenCravat results
     """
     input: 
-        vcfs = expand(join(workpath, "merged", "somatic", "{name}.merged.filtered.norm.tumor.vcf.gz"), name=tumors),
+        vcfs = expand(join(workpath, "merged", "somatic", "{name}.merged.filtered.norm.vcf.gz"), name=tumors),
     output: 
         db = join(workpath, "OpenCRAVAT", "somatic", "cravat_somatic.sqlite"),
     params: 
diff --git a/workflow/rules/somatic.smk b/workflow/rules/somatic.smk
index aac0c82..52d3410 100644
--- a/workflow/rules/somatic.smk
+++ b/workflow/rules/somatic.smk
@@ -35,28 +35,24 @@ def get_normal_pileup_table(wildcards):
         # Runs in tumor-only mode
         return []
 
-def get_somatic_merge_tumor(wildcards):
-    """Returns somatic variants found in the tumor sample 
-    for all somatic callers. For tumor-normal samples, extra
-    somatic callers (i.e. MuSE and Strelka) are run. Tumor-only 
-    samples will only have callsets from Mutect2 and octopus.
+def get_somatic_tn_callers(wildcards):
+    """Returns somatic variants found with tumor-normal variant
+    callers. For tumor-normal samples, extra somatic callers 
+    (i.e. MuSE and Strelka) are run. Tumor-only return empty
+    list (rule already has reference in input section).
     See config['pairs'] for tumor, normal pairs.
     """
-    callset = somatic_callers
     tumor = wildcards.name
     normal = tumor2normal[tumor]
     if normal:
-        # Callers = Octopus, Mutect2, MuSE, Strelka
+        # Callers = MuSE, Strelka
         return [
-            join(workpath, caller, "somatic", "{0}.{1}.filtered.norm.tumor.vcf.gz".format(tumor, caller)) \
-            for caller in somatic_callers + tn_somatic_callers
+            join(workpath, caller, "somatic", "{0}.{1}.filtered.norm.vcf".format(tumor, caller)) \
+            for caller in tn_somatic_callers
         ]
     else:
-        # Callers = Octopus, Mutect2
-        return [
-         join(workpath, caller, "somatic", "{0}.{1}.filtered.norm.tumor.vcf.gz".format(tumor, caller)) \
-         for caller in somatic_callers
-        ]
+        # No paired normal, return nothing
+        return []
 
 
 # Data processing rules for calling somatic variants
@@ -127,7 +123,13 @@ rule octopus_merge:
     output:
         lsl  = join(workpath, "octopus", "somatic", "{name}.list"),
         raw  = join(workpath, "octopus", "somatic", "{name}.octopus.raw.vcf"),
-        vcf  = join(workpath, "octopus", "somatic", "{name}.octopus.vcf"),
+        vcf  = join(workpath, "octopus", "somatic", "{name}.octopus.unfiltered.vcf"),
+        chroms  = temp(join(workpath, "octopus", "somatic", "{name}.chroms")),
+        starts  = temp(join(workpath, "octopus", "somatic", "{name}.starts")),
+        stops   = temp(join(workpath, "octopus", "somatic", "{name}.stops")),
+        bed     = temp(join(workpath, "octopus", "somatic", "{name}.list.bed")),
+        sortbed = temp(join(workpath, "octopus", "somatic", "{name}.sort.bed")),
+        sortlsl = join(workpath, "octopus", "somatic", "{name}.sort.list"),
     params: 
         genome = config['references']['GENOME'],
         rname  = "octomerge",
@@ -136,11 +138,32 @@ rule octopus_merge:
     threads: 
         int(allocated("threads", "octopus_merge", cluster))
     container: config['images']['genome-seek']
-    envmodules: config['tools']['bcftools']
+    envmodules: config['tools']['bcftools'],
     shell: """
     # Create list of chunks to merge
     find {params.octopath} -iname '{params.tumor}.vcf.gz' \\
         > {output.lsl}
+    
+    # Sort list of chunks to merge
+    awk -F '/' '{{print $(NF-1)}}' {output.lsl} \\
+        | awk -F ':' '{{print $(1)}}' > {output.chroms}
+    awk -F '/' '{{print $(NF-1)}}' {output.lsl} \\
+        | awk -F ':' '{{print $(2)}}' \\
+        | awk -F '-' '{{print $(NF-1)}}' > {output.starts}
+    awk -F '/' '{{print $(NF-1)}}' {output.lsl} \\
+        | awk -F ':' '{{print $(2)}}' \\
+        | awk -F '-' '{{print $(NF)}}' > {output.stops}
+    paste {output.chroms} \\
+        {output.starts} \\
+        {output.stops} \\
+        {output.lsl} \\
+        > {output.bed}
+    bedtools sort \\
+        -i {output.bed} \\
+        -faidx {params.genome}.fai \\
+        > {output.sortbed}
+    cut -f4 {output.sortbed} > {output.sortlsl}
+
     # Merge octopus chunk calls,
     # contains both germline and
     # somatic variants
@@ -148,7 +171,7 @@ rule octopus_merge:
         --threads {threads} \\
         -d exact \\
         -a \\
-        -f {output.lsl} \\
+        -f {output.sortlsl} \\
         -o {output.raw} \\
         -O v
     # Filter Octopus callset for 
@@ -157,6 +180,58 @@ rule octopus_merge:
         > {output.vcf}
     """
 
+rule octopus_filter:
+    """
+    Data-processing step to merge scattered variant calls from Octopus. Octopus 
+    is scattered across genomic intervals or chunks to reduce its overall runtime. 
+    @Input:
+        Somatic variants in VCF format (gather-per-sample-per-chunks)
+    @Output:
+        Per sample somatic variants in VCF format  
+    """
+    input:
+        vcf  = join(workpath, "octopus", "somatic", "{name}.octopus.unfiltered.vcf"),
+    output:
+        vcfa  = join(workpath, "octopus", "somatic", "{name}.octopus.PASS.vcf"),
+        vcfb  = join(workpath, "octopus", "somatic", "{name}.octopus.SOMATIC.vcf"),
+        vcfc  = join(workpath, "octopus", "somatic", "{name}.octopus.SOMATIC_PASS.vcf"),
+        vcfsort = join(workpath, "octopus", "somatic", "{name}.octopus.vcf"),
+    params: 
+        genome = config['references']['GENOME'],
+        rname  = "octofilter",
+        tumor  = "{name}",
+        octopath = join(workpath, "octopus", "somatic", "chunks"),
+        # Building optional argument for paired normal
+        bcftools_filter_i_option = lambda w: 'FMT/FT[1:0]=="PASS"' \
+            if tumor2normal[w.name] else 'FMT/FT[0:0]=="PASS"',
+    threads: 
+        int(allocated("threads", "octopus_filter", cluster))
+    container: config['images']['genome-seek']
+    envmodules: config['tools']['bcftools']
+    shell: """
+    bcftools filter \\
+        -o {output.vcfa} \\
+        -O v \\
+        -i 'FILTER=="PASS"' \\
+        {input.vcf}
+    
+    awk -F '\\t' -v OFS='\\t' \\
+        '( $1 ~ /^#/ ) || ( $8 ~ /SOMATIC/ ) {{print}}' \\
+        {output.vcfa} \\
+        > {output.vcfb}
+    
+    bcftools filter \\
+        -i 'FMT/FT[0:0]=="PASS"' \\
+        {output.vcfb} \\
+        | bcftools filter \\
+            -i '{params.bcftools_filter_i_option}' \\
+            -o {output.vcfc}
+    
+    bcftools sort \\
+        -o {output.vcfsort} \\
+        -O v \\
+        {output.vcfc}
+    """
 
 rule octopus_germline:
     """
@@ -591,11 +666,12 @@ rule strelka:
         indels = join(workpath, "strelka", "{name}", "results", "variants", "somatic.indels.vcf.gz"),
         vcf    = temp(join(workpath, "strelka", "{name}", "{name}.tmp.vcf")),
         header = temp(join(workpath, "strelka", "{name}", "{name}.samples")),
-        final  = join(workpath, "strelka", "somatic", "{name}.strelka.vcf"),
+        rehead  = temp(join(workpath, "strelka", "somatic", "{name}.rehead.vcf")),
     params:
         tmpdir = tmpdir,
         tumor  = '{name}',
         rname  = 'strelka',
+        purple_jar = config['references']['HMFTOOLS_PURPLE_JAR'],
         outdir = join(workpath, "strelka", "{name}"),
         workflow = join(workpath, "strelka", "{name}", "runWorkflow.py"),
         regions  = config['references']['MANTA_CALLREGIONS'],
@@ -663,9 +739,56 @@ rule strelka:
     echo -e "TUMOR\\t{params.tumor}{params.normal_header}" \\
     > {output.header} 
     bcftools reheader \\
-        -o {output.final} \\
+        -o {output.rehead} \\
         -s {output.header} \\
         {output.vcf}
+    """  
+    
+rule strelka_format:
+    """Data-processing step to call somatic mutations with Strelka. This tool is 
+    optimized for rapid clinical analysis of germline variation in small cohorts 
+    and somatic variation in tumor/normal sample pairs. More information about 
+    strelka can be found here: https://github.com/Illumina/strelka
+    @Input:
+        Realigned, recalibrated BAM file for a normal in a TN pair
+    @Output:
+        Per sample VCF of somatic variants
+    """
+    input: 
+        rehead  = join(workpath, "strelka", "somatic", "{name}.rehead.vcf"),
+    output:
+        final  = join(workpath, "strelka", "somatic", "{name}.strelka.vcf"),
+    params:
+        tmpdir = tmpdir,
+        tumor  = '{name}',
+        rname  = 'strelka_format',
+        purple_jar = config['references']['HMFTOOLS_PURPLE_JAR'],
+        outdir = join(workpath, "strelka", "{name}"),
+        workflow = join(workpath, "strelka", "{name}", "runWorkflow.py"),
+        regions  = config['references']['MANTA_CALLREGIONS'],
+        genome   = config['references']['GENOME'],
+        pon      = config['references']['PON'],
+        memory   = allocated("mem", "strelka_format", cluster).rstrip('G'),
+        # Building optional argument for paired normal
+        normal_option = lambda w: "--normalBam {0}.recal.bam".format(
+            join(workpath, "BAM", tumor2normal[w.name])
+        ) if tumor2normal[w.name] else "",
+        # Creating optional reheader for paired normal,
+        # resolves to "\nNORMAL\t${normalName}"
+        normal_header = lambda w: "\\nNORMAL\\t{0}".format(
+            tumor2normal[w.name]
+        ) if tumor2normal[w.name] else "",
+    threads: 
+        max(int(allocated("threads", "strelka_format", cluster))-1, 1)
+    container: config['images']['genome-seek_cnv']
+    envmodules:
+        config['tools']['rlang'],
+    shell: """
+    #Adding AD annotation to VCF
+    java -Xmx{params.memory}g -cp {params.purple_jar} \\
+        com.hartwig.hmftools.purple.tools.AnnotateStrelkaWithAllelicDepth \\
+        -in {input.rehead} \\
+        -out {output.final}
     """
 
 
@@ -726,108 +849,6 @@ rule somatic_selectvar:
         {output.filt}
     """
 
-
-rule somatic_split_tumor:
-    """Data-processing step to post-process vcf file generated by all the
-    somatic callers. This step takes a re-header, filtered, and normalized
-    somatic vcf file and filters the callset to only contain sites in the 
-    tumor sample (needed due to mixed sites in the tumor-normal callset).
-    @Input:
-        Per sample, per caller somatic variants
-    @Output:
-        Per sample, per caller somatic variants only found in the tumor sample
-    """
-    input:
-        vcf = join(workpath, "{caller}", "somatic", "{name}.{caller}.filtered.norm.vcf"),
-    output:
-        tmp    = temp(join(workpath, "{caller}", "somatic", "{name}.{caller}.filtered.norm.tumor.temp.vcf.gz")),
-        header = temp(join(workpath, "{caller}", "somatic", "{name}.{caller}.tumor.samples")),
-        tumor  = join(workpath, "{caller}", "somatic", "{name}.{caller}.filtered.norm.tumor.vcf.gz"),
-        tbi    = join(workpath, "{caller}", "somatic", "{name}.{caller}.filtered.norm.tumor.vcf.gz.tbi"),
-    params:
-        rname  = 'tumorsplit',
-        sample = '{name}',
-        rename = '{caller}:{name}',
-        memory = allocated("mem", "somatic_split_tumor", cluster).rstrip('G'),
-    threads: 
-        int(allocated("threads", "somatic_split_tumor", cluster))
-    container: config['images']['genome-seek_somatic']
-    envmodules: config['tools']['bcftools']
-    shell: """
-    # Filter call set to tumor sites
-    bcftools view \\
-        -c1 \\
-        -Oz \\
-        -s '{params.sample}' \\
-        -o {output.tmp} \\
-        {input.vcf}
-    # Renaming sample name in VCF 
-    # to contain caller name
-    echo -e "{params.sample}\\t{params.rename}" \\
-    > {output.header} 
-    bcftools reheader \\
-        -o {output.tumor} \\
-        -s {output.header} \\
-        {output.tmp}
-    # Create an VCF index for intersect
-    bcftools index \\
-        -f \\
-        --tbi \\
-        {output.tumor} 
-    """
-
-
-rule somatic_split_normal:
-    """Data-processing step to post-process vcf file generated by all the
-    somatic callers. This step takes a re-header, filtered, and normalized
-    somatic vcf file and filters the callset to only contain sites in the 
-    normal sample (needed due to mixed sites in the tumor-normal callset).
-    @Input:
-        Per sample, per caller somatic variants
-    @Output:
-        Per sample, per caller somatic variants only found in the normal sample
-    """
-    input:
-        vcf = join(workpath, "{caller}", "somatic", "{name}.{caller}.filtered.norm.vcf"),
-    output:
-        tmp    = temp(join(workpath, "{caller}", "somatic", "{name}.{caller}.filtered.norm.normal.temp.vcf.gz")),
-        header = temp(join(workpath, "{caller}", "somatic", "{name}.{caller}.normal.samples")),
-        normal = join(workpath, "{caller}", "somatic", "{name}.{caller}.filtered.norm.normal.vcf.gz"),
-        tbi    = join(workpath, "{caller}", "somatic", "{name}.{caller}.filtered.norm.normal.vcf.gz.tbi"),
-    params:
-        rname  = 'normalsplit',
-        sample = lambda w: "{0}".format(tumor2normal[w.name]),
-        rename = lambda w: "{0}:{1}".format(w.caller, tumor2normal[w.name]),
-        memory = allocated("mem", "somatic_split_normal", cluster).rstrip('G'),
-    threads: 
-        int(allocated("threads", "somatic_split_normal", cluster))
-    container: config['images']['genome-seek_somatic']
-    envmodules: config['tools']['bcftools']
-    shell: """
-    # Filter call set to normal sites
-    bcftools view \\
-        --force-samples \\
-        -c1 \\
-        -Oz \\
-        -s '{params.sample}' \\
-        -o {output.tmp} \\
-        {input.vcf}
-    # Renaming sample name in VCF 
-    # to contain caller name
-    echo -e "{params.sample}\\t{params.rename}" \\
-    > {output.header} 
-    bcftools reheader \\
-        -o {output.normal} \\
-        -s {output.header} \\
-        {output.tmp}
-    # Create an VCF index for intersect
-    bcftools index \\
-        -f \\
-        --tbi \\
-        {output.normal}
-    """
-
-
 rule somatic_merge_tumor:
     """Data-processing step to post-process vcf file generated by all the
     somatic callers. This step takes filtered tumor sample callsets from 
@@ -839,51 +860,49 @@ rule somatic_merge_tumor:
         Variants found in at least 2 callers
     """
     input:
-        tumors = get_somatic_merge_tumor,
+        tn_callers = get_somatic_tn_callers,
+        octopus = join(workpath, "octopus", "somatic", "{name}.octopus.filtered.norm.vcf"),
+        mutect2 = join(workpath, "mutect2", "somatic", "{name}.mutect2.filtered.norm.vcf"),
+        # muse = join(workpath, "muse", "somatic", "{name}.muse.filtered.norm.vcf"),
+        # strelka = join(workpath, "strelka", "somatic", "{name}.strelka.filtered.norm.vcf"),
     output:
-        lsl    = join(workpath, "merged", "somatic", "{name}.intersect.lsl"),
-        merged = join(workpath, "merged", "somatic", "{name}.merged.filtered.norm.tumor.vcf.gz"),
-        tbi = join(workpath, "merged", "somatic", "{name}.merged.filtered.norm.tumor.vcf.gz.tbi"),
+        merged = join(workpath, "merged", "somatic", "{name}.merged.filtered.norm.vcf.gz"),
     params:
         rname  = 'tumormerge',
+        genome = config['references']['GENOME'],
         memory = allocated("mem", "somatic_merge_tumor", cluster).rstrip('G'),
-        isec_dir = join(workpath, "merged", "somatic", "intersect"),
+        tmpdir = tmpdir,
+        # Dynamically update the priority list
+        # based on wether a sample is a tumor-only
+        # or a tumor-normal
+        priority_list = lambda w: "mutect2,octopus,muse,strelka" \
+          if tumor2normal[w.name] else "mutect2,octopus",
+        strelka_option = lambda w: "--variant:strelka {0}.strelka.filtered.norm.vcf".format(
+            join(workpath, "strelka", "somatic", w.name)
+        ) if tumor2normal[w.name] else "",
+        muse_option = lambda w: "--variant:muse {0}.muse.filtered.norm.vcf".format(
+            join(workpath, "muse", "somatic", w.name)
+        ) if tumor2normal[w.name] else "",
     threads: 
         int(allocated("threads", "somatic_merge_tumor", cluster))
     container: config['images']['genome-seek_somatic']
-    envmodules: config['tools']['bcftools']
+    envmodules: config['tools']['gatk3']
     shell: """
-    # Delete previous attempts output
-    # directory to ensure hard restart
-    if [ -d "{params.isec_dir}" ]; then
-        rm -rf "{params.isec_dir}"
-    fi
     # Intersect somatic callset to find
     # variants in at least two callers
-    bcftools isec \\
-        -Oz \\
-        -n+2 \\
-        -c none \\
-        -p {params.isec_dir} \\
-        {input.tumors} 
-    # Create list of files to merge 
-    find {params.isec_dir} \\
-        -name '*.vcf.gz' \\
-        | sort \\
-    > {output.lsl} 
-    # Merge variants found in at 
-    # least two callers 
-    bcftools merge \\
-        -Oz \\
-        -o {output.merged} \\
-        -l {output.lsl}
-    # Create an VCF index for merge
-    bcftools index \\
-        -f \\
-        --tbi \\
-        {output.merged}
-    """
+    if [ ! -d "{params.tmpdir}" ]; then mkdir -p "{params.tmpdir}"; fi
+    tmp=$(mktemp -d -p "{params.tmpdir}")
+    trap 'rm -rf "${{tmp}}"' EXIT
 
+    java -Xmx{params.memory}g -Djava.io.tmpdir=${{tmp}} \\
+        -XX:ParallelGCThreads={threads} -jar $GATK_JAR -T CombineVariants \\
+            -R {params.genome} \\
+            --filteredrecordsmergetype KEEP_IF_ANY_UNFILTERED \\
+            --genotypemergeoption PRIORITIZE \\
+            --rod_priority_list {params.priority_list} \\
+            -o {output.merged} \\
+            --variant:octopus {input.octopus} --variant:mutect2 {input.mutect2} {params.strelka_option} {params.muse_option} 
+    """
 
 rule somatic_sample_maf:
     """Data-processing step to convert the merged somatic calls from 
@@ -897,11 +916,11 @@ rule somatic_sample_maf:
         Annotated, merged MAF file contaning somatic callsets
     """
     input:
-        vcf = join(workpath, "merged", "somatic", "{name}.merged.filtered.norm.tumor.vcf.gz"),
+        vcf = join(workpath, "merged", "somatic", "{name}.merged.filtered.norm.vcf.gz"),
     output:
-        vcf = temp(join(workpath, "merged", "somatic", "{name}.merged.filtered.norm.tumor.temp.vcf")),
-        vep = temp(join(workpath, "merged", "somatic", "{name}.merged.filtered.norm.tumor.temp.vep.vcf")),
-        maf = join(workpath, "merged", "somatic", "{name}.merged.filtered.norm.tumor.maf"),
+        vcf = temp(join(workpath, "merged", "somatic", "{name}.merged.filtered.norm.temp.vcf")),
+        vep = join(workpath, "merged", "somatic", "{name}.merged.filtered.norm.temp.vep.vcf"),
+        maf = join(workpath, "merged", "somatic", "{name}.merged.filtered.norm.maf"),
     params:
         rname  = 'samplemaf',
         tumor  = '{name}',
@@ -933,7 +952,8 @@ rule somatic_sample_maf:
         --vep-forks {threads} \\
         --tumor-id {params.tumor} {params.normal_option} \\
         --ncbi-build {params.vep_build} \\
-        --species {params.vep_species}
+        --species {params.vep_species} \\
+        --retain-info set
     """
 
 
@@ -946,7 +966,7 @@ rule somatic_cohort_maf:
         Merged somatic tumor MAF, cohort-level, with all call sets
     """
     input: 
-        mafs = expand(join(workpath, "merged", "somatic", "{name}.merged.filtered.norm.tumor.maf"), name=tumors),
+        mafs = expand(join(workpath, "merged", "somatic", "{name}.merged.filtered.norm.maf"), name=tumors),
     output: 
         maf  = join(workpath, "merged", "somatic", "cohort_somatic_variants.maf"),
     params:
@@ -1023,7 +1043,7 @@ rule somatic_sample_sigprofiler:
         SigProfiler Sample Portrait Plot (pdf)
     """
     input: 
-        maf = join(workpath, "merged", "somatic", "{name}.merged.filtered.norm.tumor.maf"),
+        maf = join(workpath, "merged", "somatic", "{name}.merged.filtered.norm.maf"),
     output:
         pdf = join(workpath, "sigprofiler", "sample_portrait_{name}.pdf"),
     params:
diff --git a/workflow/rules/sv.smk b/workflow/rules/sv.smk
index 1f08375..9bd3c9a 100644
--- a/workflow/rules/sv.smk
+++ b/workflow/rules/sv.smk
@@ -73,7 +73,6 @@ rule manta_germline:
         -g {params.memory} 
     """
 
-
 # Somatic SV calling 
 rule manta_somatic:
     """
@@ -102,6 +101,15 @@ rule manta_somatic:
         normal_option = lambda w: "--normalBam {0}.recal.bam".format(
             join(workpath, "BAM", tumor2normal[w.name])
         ) if tumor2normal[w.name] else "",
+        # Building command to create a renamed symlink
+        # called, somaticSV.vcf.gz, when run in tumor-
+        # only mode. Ensures a consistent output file
+        # names regardless of whether a sample has a 
+        # paired normal or not.
+        symlink = lambda w: "ln -sf {0} {1}".format(
+            join(workpath, "MANTA", "somatic", w.name, "results", "variants", "tumorSV.vcf.gz"),
+            join(workpath, "MANTA", "somatic", w.name, "results", "variants", "somaticSV.vcf.gz"),
+        ) if not tumor2normal[w.name] else "",
     threads: int(allocated("threads", "manta_somatic", cluster))
     container: config['images']['genome-seek_sv']
     envmodules: config['tools']['manta']
@@ -124,5 +132,77 @@ rule manta_somatic:
     echo "Starting Manta workflow..."
     {params.workflow} \\
         -j {threads} \\
-        -g {params.memory} 
+        -g {params.memory}
+    {params.symlink}
+    """
+
+# Filter Somatic SV calls
+rule manta_somatic_filter:
+    """
+    Data processing step to call somatic structural variants using manta. 
+    Manta is optimized for optimized for analysis of germline variation 
+    in small sets of individuals and somatic variation in tumor/normal 
+    sample pairs. Manta's Github Repo: https://github.com/Illumina/manta
+    @Input:
+        Single-sample VCF file with called somatic structural variants (scatter)
+    @Output:
+        Filtered single-sample VCF file with called somatic structural variants
+    """
+    input: 
+        vcf  = join(workpath, "MANTA", "somatic", "{name}", "results", "variants", "somaticSV.vcf.gz"),
+    output:
+        samplevcf  = join(workpath, "MANTA", "somatic", "{name}", "results", "variants", "somaticSV.sample.vcf.gz"),
+        flagged    = join(workpath, "MANTA", "somatic", "{name}", "results", "variants", "somaticSV.flagged.vcf.gz"),
+        filtered   = join(workpath, "MANTA", "somatic", "{name}", "results", "variants", "somaticSV.filtered.vcf"),
+    params: 
+        rname       = "manta_somatic_filter",
+        sample      = "{name}",
+        outdir      = join(workpath, "MANTA", "somatic", "{name}"),
+        filtermanta = join(workpath, "workflow", "scripts", "FilterManta.pl"),
+        genome      = config['references']['GENOME'],
+        filter_ref  = config['references']['MANTA_FILTER_CHROMOSEQ_TRANSLOCATION'],
+        memory      = allocated("mem", "manta_somatic_filter", cluster).rstrip('G'),
+    threads: int(allocated("threads", "manta_somatic_filter", cluster))
+    container: config['images']['genome-seek_sv']
+    envmodules: 
+        config['tools']['bcftools'],
+        config['tools']['samtools'],
+        config['tools']['bedtools'],
+        config['tools']['svtools'],
+        config['tools']['minimap2'],
+        config['tools']['perl'],
+    shell: """
+    # Flag and filter SVs based on the
+    # following: read support, contig 
+    # re-mapping, and allele fraction. 
+    # Also removes non-PASS SVs from
+    # somatic SV callset.
+    bcftools view \\
+        -s {params.sample} \\
+        -O z \\
+        -o {output.samplevcf} \\
+        {input.vcf}
+    # Script to filter manta results 
+    # from chromoseq pipeline,
+    # https://github.com/genome/docker-basespace_chromoseq
+    # Needs paths to its depedencies:
+    bedtools_path="$(type -P bedtools)"
+    minimap2_path="$(type -P minimap2)"
+    svtools_path="$(type -P svtools)"
+    echo "Paths of all dependencies... ${{bedtools_path}}:${{minimap2_path}}:${{svtools_path}}"
+    perl {params.filtermanta} \\
+        -r {params.genome} \\
+        -k {params.filter_ref} \\
+        -b "${{bedtools_path}}" \\
+        -p "${{minimap2_path}}" \\
+        -s "${{svtools_path}}" \\
+        -t {params.outdir} \\
+        {output.samplevcf} \\
+        {output.flagged}
+    # Remove non-PASS-ing SVs
+    bcftools view \\
+        -O v \\
+        -i 'FILTER=="PASS"' \\
+        {output.flagged} \\
+    > {output.filtered}
     """
diff --git a/workflow/scripts/FilterManta.pl b/workflow/scripts/FilterManta.pl
new file mode 100644
index 0000000..2108e74
--- /dev/null
+++ b/workflow/scripts/FilterManta.pl
@@ -0,0 +1,312 @@
+#!/usr/bin/perl
+
+use List::Util qw(min max);
+use Getopt::Long;
+use strict;
+
+my $svtools = "/opt/conda/envs/python2/bin/svtools";
+my $bedtools = "/usr/local/bin/bedtools";
+my $minimap = "/usr/local/bin/minimap2";
+my $tee = "/usr/bin/tee";
+my $refseq = "/gscuser/dspencer/refdata/hg38/all_sequences.fa";
+my $tmp = "/tmp";
+
+my $slop = 200;
+my $masklevel = 0.8;
+my $PR = 2;
+my $SR = 2;
+my $minSVabund = 5.0;
+my $delcovratio = 0.8;
+my $dupcovratio = 1.3;
+    
+my $minMQ = 0;
+my $fracIdent = 0.95;
+
+my $knownfile = '';
+
+my $usage = <<END;
+$0 -k <knowntrans bedpe> <gzipped vcf> <out vcf name>
+
+  options:
+    -r refseq
+    
+    [ manta hit filtering ] 
+    -m masklevel
+    -pr PR reads
+    -sr SR reads
+    -a min VAF/SVabund
+    -l slop length for known translocations
+    -q min map quality of contig hits
+    -i min fraction identity for contig hits
+
+    [ file/tool paths ]
+    -b bedtools path
+    -s svtools path
+    -p minimap path
+    -t tmpdir 
+
+END
+
+GetOptions("pr=i" => \$PR,
+	   "sr=i" => \$SR,
+	   "a=f" => \$minSVabund,
+	   "b=s" => \$bedtools,
+	   "s=s" => \$svtools,
+	   "p=s" => \$minimap,
+	   "t=s" => \$tmp,
+	   "m=f" => \$masklevel,
+           "r=s" => \$refseq,
+	   "l=i" => \$slop,
+	   "q=i" => \$minMQ,
+	   "i=f" => \$fracIdent,
+	   "k=s" => \$knownfile);
+
+
+die "$usage\n" if ! -e $knownfile || ! -e $ARGV[0] || !$ARGV[1] || ! -e $svtools || ! -e $bedtools || ! -e $minimap || ! -e $refseq || $ARGV[0] !~ /vcf.gz$/;
+
+my $out = $ARGV[1];
+
+#open manta VCF
+open(VCF,"/bin/zcat $ARGV[0] |") || die "Cant open manta file $ARGV[0]";
+
+# create a fastq file
+open(FQ,">$tmp/input.fq") || die "cant make temporary fastq file";
+
+# read in manta records
+while(<VCF>){
+    chomp;
+    next if /^#/;
+    my @F = split("\t",$_);
+
+    # get the contig tag, if it exists, and print sequence to a temp fastq file
+    if ($F[7] =~ /CONTIG=([ACGTNactgn]+)/){
+	my $contig = $1;
+	my $quals = '#' x length($contig);
+	print FQ "\@$F[2]\n$contig\n+\n$quals\n";
+    }
+}
+close VCF;
+close FQ;
+
+my %hits = (); # hash of hits
+my %totalhits = ();
+
+# map fastq file to reference with minimap2 and iterate through records
+open(SAM,"minimap2 -N 50 -p 0.5 --mask-level 0.8 -ax sr $refseq $tmp/input.fq | $tee $tmp/minimap.sam |") || die;
+open(D,">$tmp/minimap.tsv") || die;
+while(<SAM>){
+    chomp;
+
+    next if /^@/;
+    
+    my @F = split("\t",$_);
+
+    $totalhits{$F[0]}++;
+      
+    my $flag = $F[1];
+
+    next if $flag & hex("0x800");  #skip if its a supp alignment
+    
+    next if !/SA:Z/;  # must have a supplementary alignment in the SA tag
+
+    # get supplemental hit
+    /SA:Z:(\S+);/;
+    my @l=split(",",$1); 
+
+    # check strand of primary hit
+    my $pstrand = "+"; 
+    $pstrand = "-" if $flag & hex("0x10"); 
+
+    /NM:i:(\d+)/;
+    my $pnm = $1;
+    
+    # get the alignment coordinates on the read from the cigar string (e.g., positions X through Y are aligned)
+    my $cigar = $F[5];
+    my $st = 0; # align start
+    my $alen = 0; # align length
+    my $qlen = 0; # read length
+    while($cigar =~ /(\d+)([MSHDI])/g){ # get cigar operation
+	my $s = $1;
+	my $o = $2;
+	$qlen += $s if $s ne 'D'; # add to the query length unless its a del
+	$st += $s if ($o =~ /S|H/ and $alen == 0); # change start position iff this is the first operation and its a clip 
+	$alen += $s if ($o =~ /[MI]/); # add to the alignment length if its a M or I
+    }
+    my $pid = 1 - $pnm / $alen;
+    my @p = ($st+1,$st+$alen-1); # make array with (start,end)
+    @p = reverse(map { $qlen - $_ } @p) if ($pstrand eq '-'); # reverse if its a minus strand hit
+
+    map { die "parse cigar failed" if $_ < 0 or $_ > $qlen } @p;
+    
+    # do same for supplemental cigar
+    my $scigar = $l[3];
+    my $snm = $l[5];
+    my $sstrand = $l[2];
+    $st = 0;
+    $alen = 0;
+    $qlen = 0;
+    while($scigar =~ /(\d+)([MSHDI])/g){
+        my $s = $1;
+        my $o = $2;
+        $qlen += $s if $s ne 'D'; # add to the query length unless its a del
+        $st += $s if ($o =~ /S|H/ and $alen == 0); # change start position iff this is the first operation and its a clip
+	$alen += $s if ($o =~ /[MI]/); # add to the alignment length if its a M or I
+    }
+    my $sid = 1 - $snm / $alen;
+    my @s = ($st,$st+$alen-1);
+    @s = reverse(map { $qlen - $_ } @s) if ($sstrand eq '-');
+
+    map { die "parse cigar failed" if $_ < 0 or $_ > $qlen } @s;
+    
+    # calculate MASKLEVEL (1.0 - MASKLEVEL) == fraction of query that overlaps between primary and supplemental hits 
+    my $ov = 0;
+    unless ($s[0] > $p[1] or $p[0] > $s[1]){
+	$ov = (min($p[1],$s[1]) - max($p[0],$s[0])) / min($p[1]-$p[0],$s[1]-$s[0]);
+    }
+
+    die "masklevel failed" if $ov < 0 or $ov > 1;
+    
+    my $mq = ($F[4] < $l[4] ? $F[4] : $l[4]);
+    # skip if masklevel threshold exceeded
+    next if $ov > (1 - $masklevel);
+    
+    next unless $mq >= $minMQ && $pid > $fracIdent && $sid > $fracIdent;
+
+    # store hits
+    push @{$hits{$F[0]}}, [ $F[2],$F[3]-1,$F[3],$l[0],$l[1]-1,$l[1],$F[0],$mq,$pstrand,$sstrand,$F[5] . ";" . $l[3] ];
+    print D join("\t",$F[2],$F[3]-1,$F[3],$l[0],$l[1]-1,$l[1],$F[0],$mq,$pstrand,$sstrand,$F[5] . ";" . $l[3]),"\n";
+}
+close SAM;
+close D;
+
+die "no minimap hits!" if scalar keys %hits == 0;
+
+# get hotspot annotations using bedtools. store in hash by mantaID
+my %knowntrans = ();
+open(BT,"$svtools vcftobedpe -i $ARGV[0] 2> /dev/null | $bedtools pairtopair -is -slop $slop -a stdin -b $knownfile |") || die "cant run bedtools";
+while(<BT>){
+    chomp;
+    my @F = split("\t",$_);
+
+    # get orientation of manta hit
+    my $orientation= 'same';
+    $orientation = 'opposite' if ($F[14] =~ /^[ACTGactg]+\]|\[[ACTGactg]+$/);
+
+    # store name if right orientation
+    if ($F[$#F] eq '.' or ($orientation eq 'same' && $F[$#F] eq $F[$#F-1]) or ($orientation eq 'opposite' && $F[$#F] ne $F[$#F-1])){
+	$knowntrans{$F[12]} = $F[$#F-3];
+	$knowntrans{$F[15]} = $F[$#F-3];
+    }
+}
+close BT;
+
+# outfile
+open(O,">$out") || die;
+
+#open manta VCF again
+open(VCF,"zcat $ARGV[0] |") || die "Cant open manta vcf file $ARGV[0]";
+while(<VCF>){
+    # add info tag and filters
+    if (/^##FILTER/){
+	print O '##INFO=<ID=KNOWNSV,Number=.,Type=String,Description="Known hotspot translocation">',"\n";
+	print O '##INFO=<ID=CONTIGHITS,Number=.,Type=Integer,Description="Number of high-quality primary genomic alignments to manta contig">',"\n";
+	do {
+	    print O;
+	    $_ = <M>;
+	} while(/^##FILTER/);
+	print O '##FILTER=<ID=LowReads,Description="Failed minimum number of PR or SR reads">',"\n";
+	print O '##FILTER=<ID=FailedContig,Description="Mapping of contig failed to reproduce breakends">',"\n";
+	print O '##FILTER=<ID=FailedCov,Description="Coverage of DEL/DUP does not support an SV call">',"\n";
+	next;
+    } elsif (/^#/){
+	print O;
+	next;
+    }
+    
+    # handle records
+    chomp;
+    my @F = split("\t",$_);
+
+    my @fmtk = split(":",$F[8]);
+    my @fmtv = split(":",$F[9]);
+
+    my %fmt = ();
+    map { $fmt{$fmtk[$_]} = $fmtv[$_]; $fmt{$fmtk[$_]} = [ split(",",$fmt{$fmtk[$_]}) ] if $fmt{$fmtk[$_]}=~/,/; } 0..$#fmtk;
+
+    $F[7] =~ /SVTYPE=([^;]+)/;
+    my $svtype = $1;
+    
+    # add hotspot tag, if exists
+    $F[7] .= ";KNOWNSV=" . $knowntrans{$F[2]} if (defined($knowntrans{$F[2]}));	           
+    
+    # add low reads filter if PR and SR reads are low or VAF is too low
+    my $svabund = 0.0;
+    my @filter = ();
+    @filter = split(',',$F[6]) if $F[6] ne 'PASS';
+    
+    if (defined($fmt{PR}) and defined($fmt{SR}) and (max(@{$fmt{PR}}) + max(@{$fmt{SR}})) > 0){
+	$svabund = (($fmt{PR}->[1] + $fmt{SR}->[1]) / ($fmt{PR}->[0] + $fmt{PR}->[1] + $fmt{SR}->[0] + $fmt{SR}->[1])) * 100;
+    }
+    
+    if (!defined($fmt{PR}) or !defined($fmt{SR}) or $fmt{PR}->[1] < $PR or $fmt{SR}->[1] < $SR or $svabund < $minSVabund){
+	push @filter, "LowReads";
+	
+    } elsif (defined($fmt{DHFFC}) and defined($fmt{DHBFC}) and ($svtype =~ /DEL/ and $fmt{DHFFC} > $delcovratio) or ($svtype =~ /DUP/ and $fmt{DHBFC} < $dupcovratio)){
+	push @filter, "FailedCov";
+	
+	# add no contig filter if no contig/imprecise breakends
+    } elsif ($F[7] !~ /CONTIG=/ or !defined($hits{$F[2]})){
+	push @filter, "FailedContig";
+	
+    } else {
+	
+	my $orientation= 'same';
+	my $chr1 = '';
+	my $chr2 = '';
+	my $pos1 = '';
+	my $pos2 = '';
+	
+	# get positions
+	if ($F[2] =~ /DEL|INV|DUP|INS/){
+	    $F[7] =~/END=(\d+)/;
+	    
+	    $chr1 = $F[0];
+	    $pos1 = $F[1];
+	    $chr2 = $F[0];
+	    $pos2 = $1;
+	    
+	} elsif ($F[2] =~ /BND/) {
+	    $F[4] =~/([^\[\]]+):(\d+)/;
+	    
+	    $chr1 = $F[0];
+	    $pos1 = $F[1];
+	    $chr2 = $1;
+	    $pos2 = $2;
+	    
+	    $orientation = 'opposite' if ($F[4] =~ /^[ACTGactg]+\]|\[[ACTGactg]+$/);	    
+	}
+
+	# add hotspot tag, if exists
+	$F[7] .= ";CONTIGHITS=" . $totalhits{$F[2]};
+	
+	# get hits that overlap, allowing for some $slop
+	my $foundhit = 0;
+	foreach my $i (@{$hits{$F[2]}}){
+	    $foundhit = 1 if ((($i->[0] eq $chr1 && $pos1 < $i->[2]+$slop && $pos1 > $i->[1]-$slop &&
+			      $i->[3] eq $chr2 && $pos2 < $i->[5]+$slop && $pos2 > $i->[4]-$slop) ||
+			      ($i->[0] eq $chr2 && $pos2 < $i->[2]+$slop && $pos2 > $i->[1]-$slop &&
+			       $i->[3] eq $chr1 && $pos1 < $i->[5]+$slop && $pos1 > $i->[4]-$slop)) &&
+			       (($orientation eq 'same' && $i->[8] eq $i->[9]) || ($orientation eq 'opposite' && $i->[8] ne $i->[9])));
+		}
+	push @filter, "FailedContig" if $foundhit == 0;
+	
+    }
+    $F[6] = join(';',@filter);
+    $F[6] = "PASS" if scalar @filter == 0;
+    
+    print O join("\t",@F),"\n";
+}
+close VCF;
+close O;    
+
+exit;