forked from epi2me-labs/wf-human-variation
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnextflow_schema.json
604 lines (604 loc) · 30.4 KB
/
nextflow_schema.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
{
"$schema": "http://json-schema.org/draft-07/schema",
"$id": "https://raw.githubusercontent.com/epi2me-labs/wf-human-variation/master/nextflow_schema.json",
"title": "epi2me-labs/wf-human-variation",
"workflow_title": "Human variation workflow",
"description": "SNV, SV and CNV calling, modified base calling, and STR genotyping of human samples.",
"demo_url": "https://ont-exd-int-s3-euwst1-epi2me-labs.s3.amazonaws.com/wf-human-variation/wf-human-variation-demo.tar.gz",
"aws_demo_url": "https://ont-exd-int-s3-euwst1-epi2me-labs.s3.amazonaws.com/wf-human-variation/wf-human-variation-demo/aws.nextflow.config",
"url": "https://github.com/epi2me-labs/wf-human-variation",
"type": "object",
"definitions": {
"workflow": {
"title": "Workflow Options",
"type": "object",
"fa_icon": "fas fa-arrow-right",
"description": "Select which sub-workflows of wf-human-variation you wish to run. Parameters for each sub-workflow can be changed using the sections below.",
"properties": {
"sv": {
"title": "SV: Structural variants",
"type": "boolean",
"description": "Call for structural variants.",
"help_text": "If this option is selected, structural variant calling will be carried out using Sniffles2.",
"default": false
},
"snp": {
"title": "SNP: Small variants",
"type": "boolean",
"description": "Call for small variants",
"help_text": "If this option is selected, small variant calling will be carried out using Clair3.",
"default": false
},
"cnv": {
"title": "CNV: Copy number variants",
"type": "boolean",
"description": "Call for copy number variants.",
"help_text": "If this option is selected, copy number variant calling will be carried out with either Spectre (default) or QDNAseq. To use QDNAseq instead of Spectre, use the option `--use_qdnaseq`. Spectre is only compatible with genome build hg38, and if QDNAseq is used, it is only compatible with genome builds hg37 and hg38.",
"default": false
},
"str": {
"title": "STR: Short tandem repeat expansions",
"type": "boolean",
"description": "Enable Straglr to genotype STR expansions.",
"help_text": "If this option is selected, genotyping of STR expansions will be carried out using Straglr. This sub-workflow is only compatible with genome build hg38.",
"default": false
},
"mod": {
"title": "MOD: Modified bases aggregation",
"type": "boolean",
"description": "Enable output of modified calls to a bedMethyl file [requires input BAM with Ml and Mm tags]",
"help_text": "This option is automatically selected and aggregation of modified calls with be carried out using modkit if Ml and Mm tags are found. Disable this option to prevent output of a bedMethyl file.",
"default": false
}
},
"anyOf": [
{
"required": [
"sv"
]
},
{
"required": [
"snp"
]
},
{
"required": [
"cnv"
]
},
{
"required": [
"str"
]
},
{
"required": [
"mod"
]
}
]
},
"input": {
"title": "Main options",
"type": "object",
"fa_icon": "fas fa-arrow-right",
"description": "Primary parameters for the data analysis.",
"properties": {
"sample_name": {
"type": "string",
"default": "SAMPLE",
"description": "Sample name to be displayed in workflow outputs.",
"help_text": ""
},
"bam": {
"title": "Input: BAM or CRAM file",
"type": "string",
"format": "file-path",
"description": "Path to a BAM (or CRAM) containing aligned or unaligned reads.",
"help_text": "The workflow currently accepts a single BAM or CRAM file."
},
"ref": {
"title": "Reference file",
"type": "string",
"format": "file-path",
"description": "Path to a reference FASTA file.",
"help_text": "Reference against which to compare reads for variant calling."
},
"old_ref": {
"title": "CRAM reference file for realignment",
"type": "string",
"format": "file-path",
"description": "Reference FASTA file for CRAM input (only required if the CRAM requires realignment)",
"help_text": "You do not need to provide this unless the workflow specifically asks you to. If your input CRAM headers do not match the metadata of the input reference, the workflow will assume you want to realign your reads to the new input reference. CRAM files are compressed using the reference, so the read sequences cannot be realigned without the old reference."
},
"basecaller_cfg": {
"title": "Basecaller configuration",
"type": "string",
"description": "Name of the model to use for selecting a small variant calling model.",
"help_text": "Required for small variant calling. The basecaller configuration is used to automatically select the appropriate small variant calling model. The model list shows all models that are compatible for small variant calling with this workflow. You should select 'custom' to override the basecaller_cfg with clair3_model_path.",
"default": "[email protected]",
"enum": [
"custom",
"dna_r10.4.1_e8.2_400bps_hac",
"dna_r10.4.1_e8.2_400bps_hac_prom",
"dna_r9.4.1_450bps_hac",
"dna_r9.4.1_450bps_hac_prom"
]
},
"bam_min_coverage": {
"type": "number",
"default": 20,
"description": "Minimum read coverage required to run analysis.",
"hidden": false
},
"depth_window_size": {
"type": "number",
"default": 25000,
"description": "Coverage window size in bp.",
"help_text": "This options specify the window size to use when computing the coverage along the genome.",
"hidden": true
},
"bed": {
"title": "Target region BED file",
"type": "string",
"format": "file-path",
"description": "An optional BED file enumerating regions to process for variant calling.",
"help_text": ""
},
"annotation": {
"type": "boolean",
"description": "SnpEff annotation.",
"help_text": "If this option is unselected, VCFs will not be annotated with SnpEff.",
"default": true
},
"phased": {
"type": "boolean",
"default": false,
"description": "Perform phasing.",
"help_text": "This option enables phasing of SV, SNP and modifications, depending on which sub-workflow has been chosen; see [README](README.md#9-phasing-variants) for more details."
},
"include_all_ctgs": {
"type": "boolean",
"default": false,
"description": "Call for variants on all sequences in the reference, otherwise small and structural variants will only be called on chr{1..22,X,Y,MT}.",
"help_text": "Enabling this option will call for variants on all contigs of the input reference sequence. Typically this option is not required as standard human reference sequences contain decoy and unplaced contigs that are usually omitted for the purpose of variant calling. This option might be useful for non-standard reference sequence databases."
},
"out_dir": {
"title": "Output directory",
"type": "string",
"default": "output",
"format": "directory-path",
"description": "Directory for output of all workflow results."
}
},
"required": [
"ref",
"bam"
]
},
"sv_options": {
"title": "Structural variant calling options",
"type": "object",
"description": "Options specific to the SV calling subworkflow.",
"properties": {
"tr_bed": {
"title": "Tandem repeat BED file",
"type": "string",
"format": "file-path",
"description": "Input BED file containing tandem repeat annotations for the reference genome.",
"help_text": "Providing a tandem repeat BED can improve calling in repetitive regions. An appropriate tandem repeat BED can be downloaded for your reference genome [from the Sniffles2 repository](https://github.com/fritzsedlazeck/Sniffles/tree/master/annotations)."
},
"cluster_merge_pos": {
"type": "number",
"default": 150,
"description": "Maximum merging distance for insertions and deletions on the same read and cluster in non-repeat regions.",
"hidden": true
},
"min_sv_length": {
"type": "number",
"default": 30,
"description": "Minimum structural variant size called by Sniffles2.",
"hidden": true,
"minimum": 30
},
"sniffles_args": {
"type": "string",
"description": "Additional command line arguments to pass to the Sniffles2 process",
"hidden": true,
"help_text": "The additional command line arguments will be passed directly to Sniffles2; ensure to use the right commands for the version and from command line provide them as follow: `--sniffles_args=\"--non-germline\"`."
}
}
},
"sv_benchmark_options": {
"title": "Structural variant benchmarking options",
"type": "object",
"description": "Options specific to automated benchmarking of the SV calling subworkflow.",
"properties": {
"sv_benchmark": {
"type": "boolean",
"description": "Benchmark called structural variants.",
"help_text": "If this option is selected, automated benchmarking of structural variant calls will be carried out using Truvari.",
"default": false
},
"sv_benchmark_vcf": {
"type": "string",
"format": "file-path",
"description": "Override truthset VCF for benchmarking structural variants.",
"help_text": "This option will use a custom VCF in place of the one bundled with the default 'NIST_SVs_Integration_v0.6' truth set. A Tabix index is required and will be expected to have the same path as the VCF with a '.tbi' extension.",
"hidden": true
},
"sv_benchmark_bed": {
"type": "string",
"format": "file-path",
"description": "Override truthset BED for benchmarking structural variants.",
"help_text": "This option will use a custom BED in place of the one bundled with the default 'NIST_SVs_Integration_v0.6' truth set.",
"hidden": true
}
},
"dependencies": {
"sv_benchmark": [
"sv"
]
}
},
"snp_options": {
"title": "Small variant calling options",
"type": "object",
"description": "Options specific to the small variant calling subworkflow.",
"properties": {
"use_longphase": {
"type": "boolean",
"default": true,
"description": "Use longphase for final phasing of output variants (experimental).",
"hidden": true,
"help_text": "The default behaviour of the workflow is to use longphase for the variant phasing. If set to false, the workflow will execute [whatshap](https://whatshap.readthedocs.io/) instead."
},
"clair3_model_path": {
"type": "string",
"format": "directory-path",
"description": "Clair3 model directory.",
"help_text": "The workflow will attempt to map the basecalling model used to a suitable Clair3 model. You can override this by providing the path to a model with this option instead. Models can be obtained from [this page](https://cdn.oxfordnanoportal.com/software/analysis/models/clair3/index.html).",
"hidden": true
},
"ref_pct_full": {
"type": "number",
"default": 0.1,
"description": "Expected percentage of low quality 0/0 variants called in the pileup mode for full-alignment mode calling.",
"hidden": true
},
"var_pct_full": {
"type": "number",
"default": 0.7,
"description": "Expected percentage of low quality 0/1 and 1/1 variants called in the pileup mode for full-alignment mode calling.",
"hidden": true
},
"snp_min_af": {
"type": "number",
"default": 0.08,
"description": "Minimum SNP AF required for a candidate variant.",
"hidden": true
},
"indel_min_af": {
"type": "number",
"default": 0.15,
"description": "Minimum Indel AF required for a candidate variant.",
"hidden": true
},
"vcf_fn": {
"type": "string",
"default": "EMPTY",
"description": "Candidate sites VCF file input, variants will only be called at the sites in the VCF file if provided.",
"hidden": true
},
"min_cov": {
"type": "number",
"default": 2,
"description": "Minimum coverage required to call a variant (experimental).",
"hidden": true
},
"min_mq": {
"type": "number",
"default": 5,
"description": "Reads with mapping quality < min_mq are filtered (experimental).",
"hidden": true
},
"min_qual": {
"type": "number",
"default": 2,
"description": "Variants with >=min_qual will be marked 'PASS', or 'LowQual' otherwise, optional.",
"hidden": true
},
"min_contig_size": {
"type": "number",
"default": 0,
"description": "Contigs with contig size < min_contig_size are filtered (experimental).",
"hidden": true
},
"ctg_name": {
"type": "string",
"description": "The name of the sequence to be processed.",
"hidden": true
},
"refine_snp_with_sv": {
"type": "boolean",
"hidden": true,
"default": true,
"description": "Refine SNP calls using calls from the SV subworkflow.",
"help_text": "By default when the SNP and SV subworkflows are both selected, the workflow will use the results of the SV subworkflow to refine the SNP calls. Disabling this option will stop the use of SV calls to refine low-coverage variant zygosity and variant phase to avoid impossible overlaps (e.g. homozygous SNPs falling in a large deletion called by Sniffles)."
}
}
},
"cnv_options": {
"title": "Copy number variant calling options",
"type": "object",
"description": "Options related to the copy number variant subworkflow.",
"properties": {
"use_qdnaseq": {
"type": "boolean",
"default": false,
"description": "Use QDNAseq for CNV calling.",
"help_text": "Set this to true to use QDNASeq for CNV calling instead of Spectre. QDNAseq is better suited to shorter reads such as those generated from adaptive sampling experiments."
},
"qdnaseq_bin_size": {
"type": "integer",
"default": 500,
"description": "Bin size for QDNAseq in kbp.",
"help_text": "Pre-computed bin annotations are available for a range of bin sizes. Larger sizes reduce noise, however this may result in reduced sensitivity.",
"enum": [
1,
5,
10,
15,
30,
50,
100,
500,
1000
]
}
}
},
"mod_options": {
"title": "Modified base calling options",
"type": "object",
"description": "Options related to the modified bases aggregation subworkflow.",
"properties": {
"force_strand": {
"title": "Force strand",
"type": "boolean",
"default": false,
"description": "Require modkit to call strand-aware modifications.",
"help_test": "By default strand calls are collapsed (strand reported as '.'). Enabling this will force stranding to be considered when calling modifications, creating one output per modification per strand and the report will be tabulated by both modification and strand."
},
"modkit_args": {
"title": "Modkit additional arguments",
"type": "string",
"description": "The additional options for modkit.",
"hidden": true,
"help_text": "This is an advanced option to allow running modkit with custom settings. The arguments specified in this option will fully override all options set by the workflow. To provide custom arguments to `modkit` from command line proceed as follow: `--modkit_args=\"--preset traditional\"`"
}
}
},
"str_options": {
"title": "Short tandem repeat expansion genotyping options",
"type": "object",
"description": "Options related to the STR subworkflow.",
"properties": {
"sex": {
"title": "Sample sex",
"type": "string",
"default": "female",
"description": "Sex (male or female) to be passed to Straglr-genotype.",
"help_text": "The sex determines how many calls will be obtained for all repeats on chrX. Defaults to female if not specified.",
"enum": [
"male",
"female"
]
}
}
},
"advanced_options": {
"title": "Advanced Options",
"type": "object",
"fa_icon": "far fa-question-circle",
"description": "Advanced options for configuring processes inside the workflow.",
"help_text": "These advanced options do not need to be changed for typical use, but allow fine tuning of workflows for users who want more control over the workflow.",
"properties": {
"depth_intervals": {
"type": "boolean",
"default": false,
"description": "Output a bedGraph file with entries for each genomic interval featuring homogeneous depth.",
"help_text": "The output [bedGraph](https://genome.ucsc.edu/goldenPath/help/bedgraph.html) file will have an entry for each genomic interval in which all positions have the same alignment depth. By default this workflow outputs summary depth information from your aligned reads. Per-base depth outputs are slower to generate but may be required for some downstream applications."
},
"GVCF": {
"type": "boolean",
"default": false,
"description": "Enable to output a gVCF file in addition to the VCF outputs (experimental).",
"help_text": "By default the the workflow outputs a VCF file containing only records where a variant has been detected. Enabling this option will output additionally a gVCF with records spanning all reference positions regardless of whether a variant was detected in the sample."
},
"downsample_coverage": {
"type": "boolean",
"default": false,
"description": "Downsample the coverage to along the genome.",
"help_text": "This options will trigger a downsampling of the read alignments to the target coverage specified by --downsample_coverage_target. Downsampling will make the workflow run faster but could lead to non-deterministic variant calls."
},
"downsample_coverage_target": {
"type": "number",
"default": 60,
"description": "Average coverage or reads to use for the analyses.",
"help_text": "This options will set the target coverage for the downsampling stage, if downsampling has been enabled."
},
"downsample_coverage_margin": {
"type": "number",
"default": 1.1,
"description": "Downsample if the bam effective coverage / target coverage if greater than this value.",
"help_text": "By default, if the coverage of the input alignments is within 1.1x the target coverage, downsampling will not be performed even if it was enabled. This is to avoid triggering the downsampling of alignments when coverage is already close to the target coverage.",
"hidden": true
},
"output_separate_phased": {
"type": "boolean",
"default": false,
"hidden": true,
"description": "Keep separate phasing files.",
"help_text": "This option enables to save individually phased SV and SNP VCF files even when `--phased --sv --snv` are provided."
}
}
},
"multiprocessing_options": {
"title": "Multiprocessing Options",
"type": "object",
"fa_icon": "far fa-gauge-high",
"description": "Options for configuring the common processes across the different subworkflows.",
"help_text": "These options do not need to be changed for typical use, but allow fine tuning of workflows for users who want more control over the workflow.",
"properties": {
"threads": {
"type": "integer",
"default": 4,
"description": "Set max number of threads to use for more intense processes (limited by config executor cpus)"
},
"ubam_map_threads": {
"type": "integer",
"default": 8,
"description": "Set max number of threads to use for aligning reads from uBAM (limited by config executor cpus)"
},
"ubam_sort_threads": {
"type": "integer",
"default": 3,
"description": "Set max number of threads to use for sorting and indexing aligned reads from uBAM (limited by config executor cpus)"
},
"ubam_bam2fq_threads": {
"type": "integer",
"default": 1,
"description": "Set max number of threads to use for uncompressing uBAM and generating FASTQ for alignment (limited by config executor cpus)"
},
"merge_threads": {
"type": "integer",
"default": 4,
"description": "Set max number of threads to use for merging alignment files (limited by config executor cpus)"
},
"modkit_threads": {
"type": "integer",
"default": 4,
"description": "Total number of threads to use in modkit modified base calling (limited by config executor cpus)"
}
}
},
"misc": {
"title": "Miscellaneous Options",
"type": "object",
"description": "Everything else.",
"default": "",
"properties": {
"disable_ping": {
"type": "boolean",
"default": false,
"description": "Enable to prevent sending a workflow ping."
},
"help": {
"type": "boolean",
"default": false,
"description": "Display help text.",
"fa_icon": "fas fa-question-circle",
"hidden": true
},
"version": {
"type": "boolean",
"default": false,
"description": "Display version and exit.",
"fa_icon": "fas fa-question-circle",
"hidden": true
}
}
}
},
"allOf": [
{
"$ref": "#/definitions/workflow"
},
{
"$ref": "#/definitions/input"
},
{
"$ref": "#/definitions/snp_options"
},
{
"$ref": "#/definitions/sv_options"
},
{
"$ref": "#/definitions/advanced_options"
},
{
"$ref": "#/definitions/mod_options"
},
{
"$ref": "#/definitions/cnv_options"
},
{
"$ref": "#/definitions/str_options"
},
{
"$ref": "#/definitions/sv_benchmark_options"
},
{
"$ref": "#/definitions/multiprocessing_options"
},
{
"$ref": "#/definitions/misc"
}
],
"properties": {
"aws_image_prefix": {
"type": "string",
"hidden": true
},
"aws_queue": {
"type": "string",
"hidden": true
},
"monochrome_logs": {
"type": "boolean"
},
"validate_params": {
"type": "boolean",
"default": true
},
"show_hidden_params": {
"type": "boolean"
}
},
"resources": {
"recommended": {
"cpus": 32,
"memory": "128GB"
},
"minimum": {
"cpus": 12,
"memory": "32GB"
},
"run_time": "Variable depending on whether it is targeted sequencing or whole genome sequencing, as well as coverage and the individual analyses requested. For instance, a 90X human sample run (options: `--snp --sv --mod --str --cnv --phased --sex male`) takes less than 8h with recommended resources.",
"arm_support": false
}
}