Merge pull request #52 from CCBR/feat_ffpe

Fixes
CCBR · Jun 12, 2024 · 3fce8c6 · 3fce8c6
2 parents 11eeaa0 + 89917c6
commit 3fce8c6
Show file tree

Hide file tree

Showing 17 changed files with 541 additions and 253 deletions.
diff --git a/bin/convertStrelka.py b/bin/convertStrelka.py
@@ -0,0 +1,116 @@
+#!/usr/bin/env python
+import os
+import numpy as np
+import vcfpy
+import sys 
+
+def _tumor_normal_genotypes(ref, alt, info):
+    """Retrieve standard 0/0, 0/1, 1/1 style genotypes from INFO field.
+
+    Normal -- NT field (ref, het, hom, conflict)
+    Tumor -- SGT field
+      - for SNPs specified as GG->TT for the normal and tumor diploid alleles. These
+        can also represent more complex alleles in which case we set at heterozygotes
+        pending longer term inclusion of genotypes in Strelka2 directly
+        (https://github.com/Illumina/strelka/issues/16)
+      - For indels, uses the ref, het, hom convention
+
+    ref: The REF allele from a VCF line
+    alt: A list of potentially multiple ALT alleles (rec.ALT.split(";"))
+    info: The VCF INFO field
+    fname, coords: not currently used, for debugging purposes
+    """
+    known_names = set(["het", "hom", "ref", "conflict"])
+    def name_to_gt(val):
+        if val.lower() == "het":
+            return "0/1"
+        elif val.lower() == "hom":
+            return "1/1"
+        elif val.lower() in set(["ref", "conflict"]):
+            return "0/0"
+        else:
+            # Non-standard representations, het is our best imperfect representation
+            # print(fname, coords, ref, alt, info, val)
+            return "0/1"
+    def alleles_to_gt(val):
+        gt_indices = {gt.upper(): i for i, gt in enumerate([ref] + [alt])}
+        tumor_gts = [gt_indices[x.upper()] for x in val if x in gt_indices]
+        if tumor_gts and val not in known_names:
+            if max(tumor_gts) == 0:
+                tumor_gt = "0/0"
+            elif 0 in tumor_gts:
+                tumor_gt = "0/%s" % min([x for x in tumor_gts if x > 0])
+            else:
+                tumor_gt = "%s/%s" % (min(tumor_gts), max(tumor_gts))
+        else:
+            tumor_gt = name_to_gt(val)
+        return tumor_gt
+    nt_val = info.get('NT').split("=")[-1]
+    normal_gt = name_to_gt(nt_val)
+    sgt_val = info.get('SGT').split("=")[-1]
+    if not sgt_val:
+        tumor_gt = "0/0"
+    else:
+        sgt_val = sgt_val.split("->")[-1]
+        tumor_gt = alleles_to_gt(sgt_val)
+    return normal_gt, tumor_gt
+
+
+def _af_annotate_and_filter(in_file,out_file):
+    """Populating FORMAT/AF, and dropping variants with AF<min_allele_fraction
+
+    Strelka2 doesn't report exact AF for a variant, however it can be calculated as alt_counts/dp from existing fields:
+    somatic
+      snps:    GT:DP:FDP:SDP:SUBDP:AU:CU:GU:TU                 dp=DP                {ALT}U[0] = alt_counts(tier1,tier2)
+      indels:  GT:DP:DP2:TAR:TIR:TOR:DP50:FDP50:SUBDP50:BCN50  dp=DP                TIR = alt_counts(tier1,tier2)
+    germline
+      snps:    GT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL(:PS)       dp=sum(alt_counts)   AD = ref_count,alt_counts
+      indels:  GT:GQ:GQX:DPI:AD:ADF:ADR:FT:PL(:PS)             dp=sum(alt_counts)   AD = ref_count,alt_counts
+    """
+    #data = paired.tumor_data if paired else items[0]
+    #min_freq = float(utils.get_in(data["config"], ("algorithm", "min_allele_fraction"), 10)) / 100.0
+    #logger.debug("Filtering Strelka2 calls with allele fraction threshold of %s" % min_freq)
+    vcf = vcfpy.Reader.from_path(in_file)
+    vcf.header.add_format_line(vcfpy.OrderedDict([
+        ('ID', 'AF'), 
+        ('Description', 'Allele frequency, as calculated in bcbio: AD/DP (germline), <ALT>U/DP (somatic snps), TIR/DPI (somatic indels)'),
+        ('Type','Float'),
+        ('Number', '.')
+    ]))
+    vcf.header.add_format_line(vcfpy.OrderedDict([
+        ('ID', 'GT'), 
+        ('Description', 'Genotype'),
+        ('Type','String'),
+        ('Number', '1')
+    ]))
+    writer = vcfpy.Writer.from_path(out_file, vcf.header)
+    for rec in vcf:
+        #print(rec)
+        if rec.is_snv():  # snps?
+            alt_counts_n = rec.calls[0].data[rec.ALT[0].value + "U"]  # {ALT}U=tier1_depth,tier2_depth
+            alt_counts_t = rec.calls[1].data[rec.ALT[0].value + "U"]  # {ALT}U=tier1_depth,tier2_depth
+        else:  # indels
+            alt_counts_n = rec.calls[0].data['TIR']  # TIR=tier1_depth,tier2_depth
+            alt_counts_t = rec.calls[1].data['TIR']
+        DP_n=rec.calls[0].data["DP"]
+        DP_t=rec.calls[1].data["DP"]
+        if DP_n is not None and DP_t is not None:
+            with np.errstate(divide='ignore', invalid='ignore'):  # ignore division by zero and put AF=.0
+                #alt_n = alt_counts_n[0]/DP_n
+                #alt_t = alt_counts_t[0]/DP_t
+                af_n = np.true_divide(alt_counts_n[0], DP_n)
+                af_t = np.true_divide(alt_counts_t[0], DP_t)
+                rec.add_format('AF',0)
+                rec.calls[0].data["AF"]= [round(af_n,5)]
+                rec.calls[1].data["AF"]= [round(af_t,5)]
+        normal_gt, tumor_gt= _tumor_normal_genotypes(rec.REF,rec.ALT[0].value,rec.INFO)
+        rec.add_format('GT',"1/0")
+        rec.calls[0].data["GT"]=normal_gt
+        rec.calls[1].data["GT"]=tumor_gt
+        writer.write_record(rec)
+
+if __name__ == '__main__':
+    filename = sys.argv[1]
+    outname = sys.argv[2]
+    _af_annotate_and_filter(filename, outname)
+
diff --git a/bin/run_sequenza.R b/bin/run_sequenza.R
@@ -48,15 +48,16 @@ CP.example <- sequenza.fit(seqzdata, mc.cores = n_cores)
 
 ## Sequenza.extract seems to fail if too few mutations
 num_mutations <- unlist(lapply(seqzdata$mutations, nrow))
-chrom_list <- names(num_mutations)[num_mutations > 3]
-## But it might actually be segments, idk?
-#num_segments <- unlist(lapply(seqzdata$segments, nrow))
-#chrom_list <- names(num_mutations)[num_segments > 1]
+chrom_list1 <- names(num_mutations)[num_mutations > 3]
+## Also fails if segments <2
+num_segments <- unlist(lapply(seqzdata$segments, nrow))
+chrom_list2 <- names(num_mutations)[num_segments > 1]
 
+chrom_list <- intersect(chrom_list1,chrom_list2)
 not_included <- setdiff(names(num_mutations), chrom_list)
 print("Printing results...")
 if (length(not_included) > 0) {
-    print("Excluding these chromosomes because of too few mutations...")
+    print("Excluding these chromosomes because of too few mutations and/or segments...")
     print(not_included)
 }
 sequenza.results(sequenza.extract = seqzdata,cp.table = CP.example, sample.id = sampleid, out.dir=out_dir, chromosome.list=chrom_list)

diff --git a/conf/base.config b/conf/base.config
@@ -31,37 +31,37 @@ process {
         time   = { check_max( 4.h  * task.attempt, 'time'    ) }
     }
     withLabel:process_low {
-        cpus   = { check_max( 4     * task.attempt, 'cpus'    ) }
+        cpus   = { check_max( 2     * task.attempt, 'cpus'    ) }
         memory = { check_max( 12.GB * task.attempt, 'memory'  ) }
         time   = { check_max( 4.h   * task.attempt, 'time'    ) }
     }
     withLabel:process_medium {
-        cpus   = { check_max( 16    * task.attempt, 'cpus'    ) }
+        cpus   = { check_max( 6    * task.attempt, 'cpus'    ) }
         memory = { check_max( 36.GB * task.attempt, 'memory'  ) }
         time   = { check_max( 8.h   * task.attempt, 'time'    ) }
     }
     withLabel:process_high {
-        cpus   = { check_max( 32     * task.attempt, 'cpus'    ) }
-        memory = { check_max( 120.GB * task.attempt, 'memory'  ) }
+        cpus   = { check_max( 12     * task.attempt, 'cpus'    ) }
+        memory = { check_max( 72.GB * task.attempt, 'memory'  ) }
         time   = { check_max( 16.h   * task.attempt, 'time'    ) }
     }
     withLabel:process_long {
         cpus   = { check_max( 4     * task.attempt, 'cpus'    ) }
         memory = { check_max( 16.GB * task.attempt, 'memory'  ) }
-        time   = { check_max( 72.h  * task.attempt, 'time'    ) }
+        time   = { check_max( 120.h  * task.attempt, 'time'    ) }
     }
     withLabel:process_high_memory {
         memory = { check_max( 200.GB * task.attempt, 'memory' ) }
     }
     withLabel:process_somaticcaller {
         cpus   = { check_max( 4     * task.attempt, 'cpus'    ) }
         memory = { check_max( 64.GB * task.attempt, 'memory'  ) }
-        time   = { check_max( 72.h   * task.attempt, 'time'    ) }
+        time   = { check_max( 120.h   * task.attempt, 'time'    ) }
     }
     withLabel:process_somaticcaller_high {
         cpus   = { check_max( 18     * task.attempt, 'cpus'    ) }
         memory = { check_max( 96.GB * task.attempt, 'memory'  ) }
-        time   = { check_max( 72.h   * task.attempt, 'time'    ) }
+        time   = { check_max( 120.h   * task.attempt, 'time'    ) }
     }
     withLabel:process_highmem {
         cpus   = { check_max( 4     * task.attempt, 'cpus'    ) }

diff --git a/conf/containers.config b/conf/containers.config
@@ -6,6 +6,6 @@ params {
         vcf2maf = 'docker://dnousome/ccbr_vcf2maf:v102.0.0'
         lofreq = 'docker://dnousome/ccbr_lofreq:v0.0.1'
         octopus = 'docker://dancooke/octopus:latest'
-        annotcnvsv = 'docker://dnousome/ccbr_annotate_cnvsv:latest:v0.0.1'
+        annotcnvsv = 'docker://dnousome/ccbr_annotate_cnvsv:v0.0.1'
     }
 }
diff --git a/conf/genomes.config b/conf/genomes.config
@@ -7,13 +7,14 @@ params {
             genomedict= "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/genome/Homo_sapiens_assembly38.dict"
             wgsregion = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/resources_broad_hg38_v0_wgs_calling_regions.hg38.interval_list"
             intervals= "${projectDir}/assets/hg38_v0_wgs_calling_regions.hg38.bed"
+            fullinterval = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/genomes/hg38_main.bed" 
             INDELREF = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz" //ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz" 
             KNOWNINDELS = "-known /data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz -known /data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz"
             KNOWNRECAL = '--known-sites /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/GATK_resource_bundle/dbsnp_138.hg38.vcf.gz --known-sites /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/GATK_resource_bundle/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz --known-sites /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/GATK_resource_bundle/ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz'
             dbsnp = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/dbsnp_138.hg38.vcf.gz"
-            gnomad = '--germline-resource /data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GNOMAD/somatic-hg38-af-only-gnomad.hg38.vcf.gz' // /data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GNOMAD/somatic-hg38-af-only-gnomad.hg38.vcf.gz
+            gnomad = '--germline-resource /data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GNOMAD/somatic-hg38-af-only-gnomad.hg38.vcf.gz' 
             pon = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/PON/updatedpon.vcf.gz"    //pon="/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/PON/hg38.noCOSMIC_ClinVar.pon.vcf.gz" //file{params.pon}
-            kgp = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/1000G_phase1.snps.high_confidence.hg38.vcf.gz"
+            germline_resource = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GNOMAD/somatic-hg38-af-only-gnomad.hg38.vcf.gz"
             KRAKENBACDB = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/kraken/20180907_standard_kraken2"
             snpeff_genome = "GRCh38.86"
             snpeff_config = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/snpEff/4.3t/snpEff.config"
@@ -30,7 +31,7 @@ params {
             chromosomes = ['chr1','chr2','chr3','chr4','chr5','chr6','chr7','chr8','chr9','chr10','chr11','chr12','chr13','chr14','chr15','chr16','chr17','chr18','chr19','chr20','chr21','chr22','chrX','chrY','chrM']
             //HMFTOOLS
             GENOMEVER = "38"
-            HOTSPOTS = "-hotspots /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/hmftools/v5_34/ref/38/variants/KnownHotspots.somatic.38.vcf.gz"
+            HOTSPOTS = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/hmftools/v5_34/ref/38/variants/KnownHotspots.somatic.38.vcf.gz"
             PANELBED = "-panel_bed /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/hmftools/v5_34/ref/38/variants/ActionableCodingPanel.38.bed.gz"
             HCBED = "-high_confidence_bed /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/hmftools/v5_34/ref/38/variants/HG001_GRCh38_GIAB_highconf_CG-IllFB-IllGATKHC-Ion-10X-SOLID_CHROM1-X_v.3.3.2_highconf_nosomaticdel_noCENorHET7.bed.gz"
             ENSEMBLCACHE = "-ensembl_data_dir /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/hmftools/v5_34/ref/38/common/ensembl_data"
@@ -45,15 +46,15 @@ params {
             genome = "/data/CCBR_Pipeliner/db/PipeDB/lib/hg19.with_extra.fa"
             genomefai = "/data/CCBR_Pipeliner/db/PipeDB/lib/hg19.with_extra.fa.fai"
             bwagenome= "/data/CCBR_Pipeliner/db/PipeDB/lib/hs37d5.fa"
-            genomedict= "/data/CCBR_Pipeliner/db/PipeDB/lib/hs37d5.dict"
+            genomedict= "/data/CCBR_Pipeliner/db/PipeDB/lib/hg19.with_extra.dict"
             intervals= "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg19/hg19_noblacklist_maincontig.bed"
-            INDELREF = "/fdb/GATK_resource_bundle/b37/Mills_and_1000G_gold_standard.indels.b37.vcf" //ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz" 
-            KNOWNINDELS = "-known /fdb/GATK_resource_bundle/b37/Mills_and_1000G_gold_standard.indels.b37.vcf -known /fdb/GATK_resource_bundle/b37/1000G_phase1.indels.b37.vcf"
-            KNOWNRECAL = '--known-sites /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/GATK_resource_bundle/dbsnp_138.hg38.vcf.gz --known-sites /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/GATK_resource_bundle/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz --known-sites /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/GATK_resource_bundle/ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz'
+            INDELREF = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg19/GATKbundle/Mills_and_1000G_gold_standard.indels.hg19.vcf.gz" 
+            KNOWNINDELS = "-known /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg19/GATKbundle/Mills_and_1000G_gold_standard.indels.hg19.vcf.gz -known /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg19/GATKbundle/1000G_phase1.indels.hg19.vcf.gz"
+            KNOWNRECAL = '--known-sites /fdb/GATK_resource_bundle/hg19-2.8/dbsnp_138.hg19.excluding_sites_after_129.vcf.gz --known-sites /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg19/GATKbundle/Mills_and_1000G_gold_standard.indels.hg19.vcf.gz'
             dbsnp = "/fdb/GATK_resource_bundle/hg19-2.8/dbsnp_138.hg19.vcf.gz"
-            gnomad = '--germline-resource /data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GNOMAD/somatic-hg38-af-only-gnomad.hg38.vcf.gz' // /data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GNOMAD/somatic-hg38-af-only-gnomad.hg38.vcf.gz
-            pon = "/data/CCBR_Pipeliner/db/PipeDB/lib/GRCh37.noCOSMIC_ClinVar.pon.vcf.gz"   
-            kgp = "/fdb/GATK_resource_bundle/hg19-2.8/dbsnp_138.hg19.vcf.gz"
+            germline_resource = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg19/GATKbundle/af-only-gnomad.raw.sites.liftover.hg19.vcf.gz"
+            gnomad = '--germline-resource /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg19/GATKbundle/af-only-gnomad.raw.sites.liftover.hg19.vcf.gz'
+            pon = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg19/GATKbundle/hg19.liftGRCh37.noCOSMIC_ClinVar.pon.vcf.gz"
             KRAKENBACDB = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/kraken/20180907_standard_kraken2"
             snpeff_genome = "GRCh37.75"
             snpeff_config = "/usr/local/apps/snpEff/4.3t/snpEff.config"
@@ -70,7 +71,7 @@ params {
             chromosomes = ['chr1','chr2','chr3','chr4','chr5','chr6','chr7','chr8','chr9','chr10','chr11','chr12','chr13','chr14','chr15','chr16','chr17','chr18','chr19','chr20','chr21','chr22','chrX','chrY','chrM']
             //HMFTOOLS
             GENOMEVER = "37"
-            HOTSPOTS = "-hotspots /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/hmftools/v5_34/ref/38/variants/KnownHotspots.38.vcf.gz"
+            HOTSPOTS = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/hmftools/v5_34/ref/38/variants/KnownHotspots.38.vcf.gz"
             PANELBED = "-panel_bed /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/hmftools/v5_34/ref/38/variants/ActionableCodingPanel.38.bed.gz"
             HCBED = "-high_confidence_bed /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/hmftools/v5_34/ref/38/variants/HG001_GRCh38_GIAB_highconf_CG-IllFB-IllGATKHC-Ion-10X-SOLID_CHROM1-X_v.3.3.2_highconf_nosomaticdel_noCENorHET7.bed.gz"
             ENSEMBLCACHE = "-ensembl_data_dir /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/hmftools/v5_34/ref/38/common/ensembl_data"
@@ -80,8 +81,6 @@ params {
             DIPLODREG = '/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/PURPLE/DiploidRegions.38.bed.gz'
             ENSEMBLCACHE = '/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/PURPLE/ensembl_data/'
             DRIVERS = '/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/PURPLE/DriverGenePanel.38.tsv'
-            HOTSPOTS = '/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/PURPLE/KnownHotspots.somatic.38.vcf.gz'
-
         }
 
         'mm10' {
@@ -95,9 +94,9 @@ params {
             KNOWNRECAL = "-known-sites /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/dbsnp/mm10_known_indels.vcf.gz -known-sites /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/dbsnp/mm10_known_snps.vcf.gz"
             dbsnp = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/dbsnp/mm10_allstrains_dbSNP142.vcf.gz"
             pon = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/dbsnp/mm10_dbSNP_allStrains_compSet_noIND.vcf.gz"
-            kgp = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/dbsnp/mm10_knownSNPs_sites.vcf.gz"
-            KRAKENBACDB = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/kraken/20180907_standard_kraken2"
+            germline_resource = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/dbsnp/mm10_knownSNPs_sites.vcf.gz"
             gnomad= "--germline-resource  /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/dbsnp/mm10_allstrains_dbSNP142.vcf.gz"
+            KRAKENBACDB = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/kraken/20180907_standard_kraken2"
             snpeff_genome = "GRCm38.86"
             snpeff_config = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/snpEff/4.3t/snpEff.config"
             snpeff_bundle = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/snpEff/4.3t/"

diff --git a/docker/annotate_cnvsv/Dockerfile b/docker/annotate_cnvsv/Dockerfile
@@ -12,13 +12,18 @@ LABEL maintainer <[email protected]>
 
 # Create Container filesystem specific 
 # working directory and opt directories 
+RUN apt-get update \
+ && apt-get -y upgrade \
+ && DEBIAN_FRONTEND=noninteractive apt-get install -y \
+    tclsh
+
 WORKDIR /opt2 
 
 ###Create AnnotSV 
-RUN wget https://github.com/lgmgeo/AnnotSV/archive/refs/tags/v3.3.6.tar.gz \
-    && tar -xvzf /opt2/v3.3.6.tar.gz \
-    && rm /opt2/v3.3.6.tar.gz
-ENV PATH="/opt2/AnnotSV-3.3.6/bin:$PATH"
+RUN wget https://github.com/lgmgeo/AnnotSV/archive/refs/tags/v3.4.2.tar.gz \
+    && tar -xvzf /opt2/v3.4.2.tar.gz \
+    && rm /opt2/v3.4.2.tar.gz
+ENV PATH="/opt2/AnnotSV-3.4.2/bin:$PATH"
 
 ##ClassifyCNV
 ##Update the resources for ClassifyCNV

diff --git a/docker/annotate_cnvsv/meta.yml b/docker/annotate_cnvsv/meta.yml
@@ -1,4 +1,4 @@
 dockerhub_namespace: dnousome
 image_name: ccbr_annotate_cnvsv
-version: v0.0.1
+version: v0.0.2
 container: "$(dockerhub_namespace)/$(image_name):$(version)"