Skip to content

Commit

Permalink
Adding refs and rule for hmftools sage
Browse files Browse the repository at this point in the history
  • Loading branch information
skchronicles committed Sep 25, 2024
1 parent b5aa555 commit 21118eb
Show file tree
Hide file tree
Showing 2 changed files with 84 additions and 0 deletions.
6 changes: 6 additions & 0 deletions config/genome.json
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,12 @@
"HMFTOOLS_AMBER_JAR": "/data/OpenOmics/references/genome-seek/hmftools/amber-3.5.jar",
"HMFTOOLS_COBALT_JAR": "/data/OpenOmics/references/genome-seek/hmftools/cobalt-1.11.jar",
"HMFTOOLS_PURPLE_JAR": "/data/OpenOmics/references/genome-seek/hmftools/purple_v3.2.jar",
"HMFTOOLS_SAGE_JAR": "/data/OpenOmics/references/genome-seek/hmftools/sage_v3.4.1.jar",
"HMFTOOLS_SAGE_REF_VERSION": "38",
"HMFTOOLS_SAGE_HOTSPOTS": "/data/OpenOmics/references/genome-seek/hmftools/v5_34/ref/38/variants/KnownHotspots.somatic.38.vcf.gz",
"HMFTOOLS_SAGE_PANEL": "/data/OpenOmics/references/genome-seek/hmftools/v5_34/ref/38/variants/ActionableCodingPanel.38.bed.gz",
"HMFTOOLS_SAGE_HIGH_CONF": "/data/OpenOmics/references/genome-seek/hmftools/v5_34/ref/38/variants/HG001_GRCh38_GIAB_highconf_CG-IllFB-IllGATKHC-Ion-10X-SOLID_CHROM1-X_v.3.3.2_highconf_nosomaticdel_noCENorHET7.bed.gz",
"HMFTOOLS_SAGE_ENSEMBL_DATA": "/data/OpenOmics/references/genome-seek/hmftools/v5_34/ref/38/common/ensembl_data/",
"HMFTOOLS_AMBER_LOCI": "/data/OpenOmics/references/genome-seek/hmftools/GermlineHetPon.hg38.vcf.gz",
"HMFTOOLS_GC_PROFILE": "/data/OpenOmics/references/genome-seek/hmftools/GC_profile.hg38.1000bp.cnp",
"HMFTOOLS_DIPLOID": "/data/OpenOmics/references/genome-seek/hmftools/DiploidRegions.hg38.bed.gz",
Expand Down
78 changes: 78 additions & 0 deletions workflow/rules/somatic.smk
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,21 @@ def get_normal_recal_bam(wildcards):
# Runs in tumor-only mode
return []


def get_normal_sorted_bam(wildcards):
"""
Returns a tumor samples paired normal
See config['pairs'] for tumor, normal pairs.
"""
normal = tumor2normal[wildcards.name]
if normal:
# Runs in a tumor, normal mode
return join(workpath, "BAM", "{0}.sorted.bam".format(normal))
else:
# Runs in tumor-only mode
return []


def get_normal_pileup_table(wildcards):
"""
Returns a tumor samples paired normal pileup
Expand Down Expand Up @@ -612,6 +627,69 @@ rule gatk_filter_mutect2:
"""


rule hmftools_sage:
"""Data-processing step to call somatic variants in TO and TN
samples using hmftools sage. HMF Tools is a suite of tools the
Hartwig Medical Foundation developed to analyze genomic data. Amber
and cobalt must be run prior to running purple. For more information
about hmftools visit: https://github.com/hartwigmedical/hmftools
@Input:
Sorted BAM file (scatter-per-tumor-sample)
@Output:
Per sample somatic variants in VCF format
"""
input:
tumor = join(workpath, "BAM", "{name}.sorted.bam"),
normal = get_normal_sorted_bam
output:
vcf = join(workpath, "sage", "somatic", "{name}.sage.vcf"),
params:
rname = 'hmfsage',
tumor = '{name}',
genome = config['references']['GENOME'],
amber_jar = config['references']['HMFTOOLS_SAGE_JAR'],
ref_version = config['references']['HMFTOOLS_SAGE_REF_VERSION'],
hotspots = config['references']['HMFTOOLS_SAGE_HOTSPOTS'],
panel = config['references']['HMFTOOLS_SAGE_PANEL'],
high_conf = config['references']['HMFTOOLS_SAGE_HIGH_CONF'],
ensembl_data = config['references']['HMFTOOLS_SAGE_ENSEMBL_DATA'],
# For UGE/SGE clusters memory is allocated
# per cpu, so we must calculate total mem
# as the product of threads and memory
memory = lambda _: int(
int(allocated("mem", "hmftools_sage", cluster).lower().rstrip('g')) * \
int(allocated("threads", "hmftools_sage", cluster))
)-1 if run_mode == "uge" \
else allocated("mem", "hmftools_sage", cluster).lower().rstrip('g'),
# Building optional argument for paired normal
normal_name = lambda w: "-reference {0}".format(
tumor2normal[w.name]
) if tumor2normal[w.name] else "",
normal_bam = lambda w: "-reference_bam {0}.sorted.bam".format(
join(workpath, "BAM", tumor2normal[w.name])
) if tumor2normal[w.name] else "",
threads:
int(allocated("threads", "hmftools_sage", cluster)),
container: config['images']['genome-seek_cnv']
envmodules: config['tools']['rlang']
shell: """
# Call somatic variants with hmftools
# Somatic Alterations in Genome (SAGE)
java -Xmx{params.memory}g -cp {params.amber_jar} \\
com.hartwig.hmftools.sage.SageApplication \\
-threads {threads} \\
-tumor {params.tumor} {params.normal_name} \\
-tumor_bam {input.tumor} {params.normal_bam} \\
-ref_genome_version {params.ref_version} \\
-ref_genome {params.genome} \\
-hotspots {params.hotspots} \\
-panel_bed {params.panel} \\
-high_confidence_bed {params.high_conf} \\
-ensembl_data_dir {params.ensembl_data} \\
-output_vcf {output.vcf}
"""


rule muse:
"""Data-processing step to call somatic mutations with MuSE. This tool is
unique in accounting for tumor heterogeneity using a sample-specific error
Expand Down

0 comments on commit 21118eb

Please sign in to comment.