Skip to content

Commit

Permalink
Breaking up deepsomatic into its constituent steps
Browse files Browse the repository at this point in the history
  • Loading branch information
skchronicles committed Nov 5, 2024
1 parent d145b39 commit 57b666c
Show file tree
Hide file tree
Showing 4 changed files with 326 additions and 49 deletions.
23 changes: 21 additions & 2 deletions config/cluster/slurm.json
Original file line number Diff line number Diff line change
Expand Up @@ -73,11 +73,30 @@
"gres": "lscratch:512"
},
"deepsomatic": {
"threads": "24",
"mem": "64G",
"threads": "36",
"mem": "192G",
"time": "1-18:00:00",
"gres": "lscratch:750"
},
"deepsomatic_make_examples": {
"threads": "36",
"mem": "96G",
"time": "1-00:00:00",
"gres": "lscratch:750"
},
"deepsomatic_call_variants": {
"threads": "16",
"mem": "60G",
"partition": "gpu",
"gres": "gpu:a100:1,lscratch:450",
"time": "1-00:00:00"
},
"deepsomatic_postprocess_variants": {
"threads": "4",
"mem": "64G",
"time": "1-00:00:00",
"gres": "lscratch:256"
},
"deepvariant": {
"threads": "18",
"mem": "48G",
Expand Down
15 changes: 15 additions & 0 deletions config/cluster/uge.json
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,21 @@
"mem": "4G",
"partition": ""
},
"deepsomatic_call_variants": {
"mem": "4G",
"partition": "",
"threads": "8"
},
"deepsomatic_make_examples": {
"mem": "4G",
"partition": "",
"threads": "8"
},
"deepsomatic_postprocess_variants": {
"mem": "8G",
"partition": "",
"threads": "4"
},
"deepvariant": {
"mem": "3G",
"partition": "",
Expand Down
94 changes: 93 additions & 1 deletion workflow/rules/depreciated.smk
Original file line number Diff line number Diff line change
@@ -1,4 +1,17 @@
# Depreciated rules that may still be useful for some projects
def get_normal_sorted_bam(wildcards):
"""
Returns a tumor samples paired normal
See config['pairs'] for tumor, normal pairs.
"""
normal = tumor2normal[wildcards.name]
if normal:
# Runs in a tumor, normal mode
return join(workpath, "BAM", "{0}.sorted.bam".format(normal))
else:
# Runs in tumor-only mode
return []


# Depreciated germline variant calling rule(s)
rule deepvariant:
Expand Down Expand Up @@ -57,4 +70,83 @@ rule deepvariant:
--output_vcf={output.vcf} \\
--num_shards={threads} \\
--intermediate_results_dir=${{tmp}}
"""
"""

# Depreciated somatic variant calling rule(s)
rule deepsomatic:
"""
Data processing step to call somatic variants using deep neural
network in tumor-normal pairs. DeepSomatic is an extension of the
deep learning-based variant caller DeepVariant that takes aligned
reads (in BAM or CRAM format) from tumor and normal data, produces
pileup image tensors from them, classifies each tensor using a CNN,
and finally reports somatic variants in a standard VCF or gVCF file.
This rule runs all three steps in the deepsomatic pipeline as a one
step: i.e. make_examples, call_variants, and postprocess_variants.
This is not optimal for large-scale projects as it will consume a lot
of resources inefficently (only the 2nd step in the dv pipeline can
make use of GPU-computing). As so, it is better to run the 1st/3rd
step on a normal compute node and run the 2nd step on a GPU node.
@Input:
Duplicate marked, sorted Tumor-Normal BAM file (scatter)
@Output:
Single-sample VCF file with called somatic variants
"""
input:
tumor = join(workpath, "BAM", "{name}.sorted.bam"),
normal = get_normal_sorted_bam
output:
vcf = join(workpath, "deepsomatic", "somatic", "{name}.deepsomatic.vcf"),
params:
rname = "deepsom",
genome = config['references']['GENOME'],
tmpdir = tmpdir,
# Building option for deepsomatic config, where:
# @WGS = --model_type=WGS
# @WES = --model_type=WES (may be added in future)
dv_model_type = "WGS",
# Get tumor and normal sample names
tumor = '{name}',
# Building option for the paired normal sorted bam
normal_bam_option = lambda w: "--reads_normal={0}.sorted.bam".format(
join(workpath, "BAM", tumor2normal[w.name])
) if tumor2normal[w.name] else "",
# Building option for the normal sample name
normal_name_option = lambda w: "--sample_name_normal={0}".format(
tumor2normal[w.name]
) if tumor2normal[w.name] else "",
threads: int(allocated("threads", "deepsomatic", cluster))
container: config['images']['deepsomatic']
envmodules: config['tools']['deepsomatic']
shell: """
# Setups temporary directory for
# intermediate files with built-in
# mechanism for deletion on exit
if [ ! -d "{params.tmpdir}" ]; then mkdir -p "{params.tmpdir}"; fi
tmp=$(mktemp -d -p "{params.tmpdir}")
trap 'du -sh "${{tmp}}"; rm -rf "${{tmp}}"' EXIT
# Export OpenBLAS variable to
# control the number of threads
# in a thread pool. By setting
# this variable to 1, work is
# done in the thread that ran
# the operation, rather than
# disbatching the work to a
# thread pool. If this option
# is not provided, it can lead
# to nested parallelism.
# See this issue for more info:
# https://github.com/google/deepsomatic/issues/28
export OPENBLAS_NUM_THREADS=1
# Run deepsomatic
run_deepsomatic \\
--model_type={params.dv_model_type} \\
--ref={params.genome} \\
--reads_tumor={input.tumor} {params.normal_bam_option} \\
--sample_name_tumor={params.tumor} {params.normal_name_option} \\
--output_vcf={output.vcf} \\
--num_shards={threads} \\
--intermediate_results_dir=${{tmp}}
"""
Loading

0 comments on commit 57b666c

Please sign in to comment.