nextflow_schema.json

{
    "$schema": "http://json-schema.org/draft-07/schema",
    "$id": "https://raw.githubusercontent.com/epi2me-labs/wf-human-variation/master/nextflow_schema.json",
    "title": "epi2me-labs/wf-human-variation",
    "workflow_title": "Human variation workflow",
    "description": "SNV, SV and CNV calling, modified base calling, and STR genotyping of human samples.",
    "demo_url": "https://ont-exd-int-s3-euwst1-epi2me-labs.s3.amazonaws.com/wf-human-variation/wf-human-variation-demo.tar.gz",
    "aws_demo_url": "https://ont-exd-int-s3-euwst1-epi2me-labs.s3.amazonaws.com/wf-human-variation/wf-human-variation-demo/aws.nextflow.config",
    "url": "https://github.com/epi2me-labs/wf-human-variation",
    "type": "object",
    "definitions": {
        "workflow": {
            "title": "Workflow Options",
            "type": "object",
            "fa_icon": "fas fa-arrow-right",
            "description": "Select which sub-workflows of wf-human-variation you wish to run. Parameters for each sub-workflow can be changed using the sections below.",
            "properties": {
                "sv": {
                    "title": "SV: Structural variants",
                    "type": "boolean",
                    "description": "Call for structural variants.",
                    "help_text": "If this option is selected, structural variant calling will be carried out using Sniffles2.",
                    "default": false
                },
                "snp": {
                    "title": "SNP: Small variants",
                    "type": "boolean",
                    "description": "Call for small variants",
                    "help_text": "If this option is selected, small variant calling will be carried out using Clair3.",
                    "default": false
                },
                "cnv": {
                    "title": "CNV: Copy number variants",
                    "type": "boolean",
                    "description": "Call for copy number variants.",
                    "help_text": "If this option is selected, copy number variant calling will be carried out with either Spectre (default) or QDNAseq. To use QDNAseq instead of Spectre, use the option `--use_qdnaseq`. Spectre is only compatible with genome build hg38, and if QDNAseq is used, it is only compatible with genome builds hg37 and hg38.",
                    "default": false
                },
                "str": {
                    "title": "STR: Short tandem repeat expansions",
                    "type": "boolean",
                    "description": "Enable Straglr to genotype STR expansions.",
                    "help_text": "If this option is selected, genotyping of STR expansions will be carried out using Straglr. This sub-workflow is only compatible with genome build hg38.",
                    "default": false
                },
                "mod": {
                    "title": "MOD: Modified bases aggregation",
                    "type": "boolean",
                    "description": "Enable output of modified calls to a bedMethyl file [requires input BAM with Ml and Mm tags]",
                    "help_text": "This option is automatically selected and aggregation of modified calls with be carried out using modkit if Ml and Mm tags are found. Disable this option to prevent output of a bedMethyl file.",
                    "default": false
                }
            },
            "anyOf": [
                {
                    "required": [
                        "sv"
                    ]
                },
                {
                    "required": [
                        "snp"
                    ]
                },
                {
                    "required": [
                        "cnv"
                    ]
                },
                {
                    "required": [
                        "str"
                    ]
                },
                {
                    "required": [
                        "mod"
                    ]
                }
            ]
        },
        "input": {
            "title": "Main options",
            "type": "object",
            "fa_icon": "fas fa-arrow-right",
            "description": "Primary parameters for the data analysis.",
            "properties": {
                "sample_name": {
                    "type": "string",
                    "default": "SAMPLE",
                    "description": "Sample name to be displayed in workflow outputs.",
                    "help_text": ""
                },
                "bam": {
                    "title": "Input: BAM or CRAM file",
                    "type": "string",
                    "format": "file-path",
                    "description": "Path to a BAM (or CRAM) containing aligned or unaligned reads.",
                    "help_text": "The workflow currently accepts a single BAM or CRAM file."
                },
                "ref": {
                    "title": "Reference file",
                    "type": "string",
                    "format": "file-path",
                    "description": "Path to a reference FASTA file.",
                    "help_text": "Reference against which to compare reads for variant calling."
                },
                "old_ref": {
                    "title": "CRAM reference file for realignment",
                    "type": "string",
                    "format": "file-path",
                    "description": "Reference FASTA file for CRAM input (only required if the CRAM requires realignment)",
                    "help_text": "You do not need to provide this unless the workflow specifically asks you to. If your input CRAM headers do not match the metadata of the input reference, the workflow will assume you want to realign your reads to the new input reference. CRAM files are compressed using the reference, so the read sequences cannot be realigned without the old reference."
                },
                "basecaller_cfg": {
                    "title": "Basecaller configuration",
                    "type": "string",
                    "description": "Name of the model to use for selecting a small variant calling model.",
                    "help_text": "Required for small variant calling. The basecaller configuration is used to automatically select the appropriate small variant calling model. The model list shows all models that are compatible for small variant calling with this workflow. You should select 'custom' to override the basecaller_cfg with clair3_model_path.",
                    "default": "dna_r10.4.1_e8.2_400bps_sup@v4.1.0",
                    "enum": [
                        "dna_r10.4.1_e8.2_260bps_fast@v4.1.0",
                        "dna_r10.4.1_e8.2_260bps_hac@v4.1.0",
                        "dna_r10.4.1_e8.2_260bps_sup@v4.1.0",
                        "dna_r10.4.1_e8.2_400bps_fast@v4.1.0",
                        "dna_r10.4.1_e8.2_400bps_fast@v4.2.0",
                        "dna_r10.4.1_e8.2_400bps_fast@v4.3.0",
                        "dna_r10.4.1_e8.2_400bps_hac@v4.1.0",
                        "dna_r10.4.1_e8.2_400bps_hac@v4.3.0",
                        "dna_r10.4.1_e8.2_400bps_sup@v4.1.0",
                        "dna_r10.4.1_e8.2_400bps_sup@v4.3.0",
                        "dna_r9.4.1_e8_fast@v3.4",
                        "dna_r9.4.1_e8_hac@v3.3",
                        "dna_r9.4.1_e8_sup@v3.3",
                        "dna_r9.4.1_e8_sup@v3.6",
                        "custom",
                        "dna_r10.4.1_e8.2_260bps_hac@v4.0.0",
                        "dna_r10.4.1_e8.2_260bps_sup@v4.0.0",
                        "dna_r10.4.1_e8.2_400bps_hac",
                        "dna_r10.4.1_e8.2_400bps_hac@v3.5.2",
                        "dna_r10.4.1_e8.2_400bps_hac@v4.0.0",
                        "dna_r10.4.1_e8.2_400bps_hac@v4.2.0",
                        "dna_r10.4.1_e8.2_400bps_hac_prom",
                        "dna_r10.4.1_e8.2_400bps_sup@v3.5.2",
                        "dna_r10.4.1_e8.2_400bps_sup@v4.0.0",
                        "dna_r10.4.1_e8.2_400bps_sup@v4.2.0",
                        "dna_r9.4.1_450bps_hac",
                        "dna_r9.4.1_450bps_hac_prom"
                    ]
                },
                "bam_min_coverage": {
                    "type": "number",
                    "default": 20,
                    "description": "Minimum read coverage required to run analysis.",
                    "hidden": false
                },
                "depth_window_size": {
                    "type": "number",
                    "default": 25000,
                    "description": "Coverage window size in bp.",
                    "help_text": "This options specify the window size to use when computing the coverage along the genome.",
                    "hidden": true
                },
                "bed": {
                    "title": "Target region BED file",
                    "type": "string",
                    "format": "file-path",
                    "description": "An optional BED file enumerating regions to process for variant calling.",
                    "help_text": ""
                },
                "annotation": {
                    "type": "boolean",
                    "description": "SnpEff annotation.",
                    "help_text": "If this option is unselected, VCFs will not be annotated with SnpEff.",
                    "default": true
                },
                "phased": {
                    "type": "boolean",
                    "default": false,
                    "description": "Perform phasing.",
                    "help_text": "This option enables phasing of SV, SNP and modifications, depending on which sub-workflow has been chosen; see [README](README.md#9-phasing-variants) for more details."
                },
                "include_all_ctgs": {
                    "type": "boolean",
                    "default": false,
                    "description": "Call for variants on all sequences in the reference, otherwise small and structural variants will only be called on chr{1..22,X,Y,MT}.",
                    "help_text": "Enabling this option will call for variants on all contigs of the input reference sequence. Typically this option is not required as standard human reference sequences contain decoy and unplaced contigs that are usually omitted for the purpose of variant calling. This option might be useful for non-standard reference sequence databases."
                },
                "out_dir": {
                    "title": "Output directory",
                    "type": "string",
                    "default": "output",
                    "format": "directory-path",
                    "description": "Directory for output of all workflow results."
                }
            },
            "required": [
                "ref",
                "bam"
            ]
        },
        "sv_options": {
            "title": "Structural variant calling options",
            "type": "object",
            "description": "Options specific to the SV calling subworkflow.",
            "properties": {
                "tr_bed": {
                    "title": "Tandem repeat BED file",
                    "type": "string",
                    "format": "file-path",
                    "description": "Input BED file containing tandem repeat annotations for the reference genome.",
                    "help_text": "Providing a tandem repeat BED can improve calling in repetitive regions. An appropriate tandem repeat BED can be downloaded for your reference genome [from the Sniffles2 repository](https://github.com/fritzsedlazeck/Sniffles/tree/master/annotations)."
                },
                "cluster_merge_pos": {
                    "type": "number",
                    "default": 150,
                    "description": "Maximum merging distance for insertions and deletions on the same read and cluster in non-repeat regions.",
                    "hidden": true
                },
                "min_sv_length": {
                    "type": "number",
                    "default": 30,
                    "description": "Minimum structural variant size called by Sniffles2.",
                    "hidden": true,
                    "minimum": 30
                },
                "sniffles_args": {
                    "type": "string",
                    "description": "Additional command line arguments to pass to the Sniffles2 process",
                    "hidden": true,
                    "help_text": "The additional command line arguments will be passed directly to Sniffles2; ensure to use the right commands for the version and from command line provide them as follow: `--sniffles_args=\"--non-germline\"`."
                }
            }
        },
        "sv_benchmark_options": {
            "title": "Structural variant benchmarking options",
            "type": "object",
            "description": "Options specific to automated benchmarking of the SV calling subworkflow.",
            "properties": {
                "sv_benchmark": {
                    "type": "boolean",
                    "description": "Benchmark called structural variants.",
                    "help_text": "If this option is selected, automated benchmarking of structural variant calls will be carried out using Truvari.",
                    "default": false
                },
                "sv_benchmark_vcf": {
                    "type": "string",
                    "format": "file-path",
                    "description": "Override truthset VCF for benchmarking structural variants.",
                    "help_text": "This option will use a custom VCF in place of the one bundled with the default 'NIST_SVs_Integration_v0.6' truth set. A Tabix index is required and will be expected to have the same path as the VCF with a '.tbi' extension.",
                    "hidden": true
                },
                "sv_benchmark_bed": {
                    "type": "string",
                    "format": "file-path",
                    "description": "Override truthset BED for benchmarking structural variants.",
                    "help_text": "This option will use a custom BED in place of the one bundled with the default 'NIST_SVs_Integration_v0.6' truth set.",
                    "hidden": true
                }
            },
            "dependencies": {
                "sv_benchmark": [
                    "sv"
                ]
            }
        },
        "snp_options": {
            "title": "Small variant calling options",
            "type": "object",
            "description": "Options specific to the small variant calling subworkflow.",
            "properties": {
                "use_longphase": {
                    "type": "boolean",
                    "default": true,
                    "description": "Use longphase for final phasing of output variants (experimental).",
                    "hidden": true,
                    "help_text": "The default behaviour of the workflow is to use longphase for the variant phasing. If set to false, the workflow will execute [whatshap](https://whatshap.readthedocs.io/) instead."
                },
                "clair3_model_path": {
                    "type": "string",
                    "format": "directory-path",
                    "description": "Clair3 model directory.",
                    "help_text": "The workflow will attempt to map the basecalling model used to a suitable Clair3 model. You can override this by providing the path to a model with this option instead. Models can be obtained from [this page](https://cdn.oxfordnanoportal.com/software/analysis/models/clair3/index.html).",
                    "hidden": true
                },
                "ref_pct_full": {
                    "type": "number",
                    "default": 0.1,
                    "description": "Expected percentage of low quality 0/0 variants called in the pileup mode for full-alignment mode calling.",
                    "hidden": true
                },
                "var_pct_full": {
                    "type": "number",
                    "default": 0.7,
                    "description": "Expected percentage of low quality 0/1 and 1/1 variants called in the pileup mode for full-alignment mode calling.",
                    "hidden": true
                },
                "snp_min_af": {
                    "type": "number",
                    "default": 0.08,
                    "description": "Minimum SNP AF required for a candidate variant.",
                    "hidden": true
                },
                "indel_min_af": {
                    "type": "number",
                    "default": 0.15,
                    "description": "Minimum Indel AF required for a candidate variant.",
                    "hidden": true
                },
                "vcf_fn": {
                    "type": "string",
                    "default": "EMPTY",
                    "description": "Candidate sites VCF file input, variants will only be called at the sites in the VCF file if provided.",
                    "hidden": true
                },
                "min_cov": {
                    "type": "number",
                    "default": 2,
                    "description": "Minimum coverage required to call a variant (experimental).",
                    "hidden": true
                },
                "min_mq": {
                    "type": "number",
                    "default": 5,
                    "description": "Reads with mapping quality < min_mq are filtered (experimental).",
                    "hidden": true
                },
                "min_qual": {
                    "type": "number",
                    "default": 2,
                    "description": "Variants with >=min_qual will be marked 'PASS', or 'LowQual' otherwise, optional.",
                    "hidden": true
                },
                "min_contig_size": {
                    "type": "number",
                    "default": 0,
                    "description": "Contigs with contig size < min_contig_size are filtered (experimental).",
                    "hidden": true
                },
                "ctg_name": {
                    "type": "string",
                    "description": "The name of the sequence to be processed.",
                    "hidden": true
                },
                "refine_snp_with_sv": {
                    "type": "boolean",
                    "hidden": true,
                    "default": true,
                    "description": "Refine SNP calls using calls from the SV subworkflow.",
                    "help_text": "By default when the SNP and SV subworkflows are both selected, the workflow will use the results of the SV subworkflow to refine the SNP calls. Disabling this option will stop the use of SV calls to refine low-coverage variant zygosity and variant phase to avoid impossible overlaps (e.g. homozygous SNPs falling in a large deletion called by Sniffles)."
                }
            }
        },
        "cnv_options": {
            "title": "Copy number variant calling options",
            "type": "object",
            "description": "Options related to the copy number variant subworkflow.",
            "properties": {
                "use_qdnaseq": {
                    "type": "boolean",
                    "default": false,
                    "description": "Use QDNAseq for CNV calling.",
                    "help_text": "Set this to true to use QDNASeq for CNV calling instead of Spectre. QDNAseq is better suited to shorter reads such as those generated from adaptive sampling experiments."
                },
                "qdnaseq_bin_size": {
                    "type": "integer",
                    "default": 500,
                    "description": "Bin size for QDNAseq in kbp.",
                    "help_text": "Pre-computed bin annotations are available for a range of bin sizes. Larger sizes reduce noise, however this may result in reduced sensitivity.",
                    "enum": [
                        1,
                        5,
                        10,
                        15,
                        30,
                        50,
                        100,
                        500,
                        1000
                    ]
                }
            }
        },
        "mod_options": {
            "title": "Modified base calling options",
            "type": "object",
            "description": "Options related to the modified bases aggregation subworkflow.",
            "properties": {
                "force_strand": {
                    "title": "Force strand",
                    "type": "boolean",
                    "default": false,
                    "description": "Require modkit to call strand-aware modifications.",
                    "help_test": "By default strand calls are collapsed (strand reported as '.'). Enabling this will force stranding to be considered when calling modifications, creating one output per modification per strand and the report will be tabulated by both modification and strand."
                },
                "modkit_args": {
                    "title": "Modkit additional arguments",
                    "type": "string",
                    "description": "The additional options for modkit.",
                    "hidden": true,
                    "help_text": "This is an advanced option to allow running modkit with custom settings. The arguments specified in this option will fully override all options set by the workflow. To provide custom arguments to `modkit` from command line proceed as follow: `--modkit_args=\"--preset traditional\"`"
                }
            }
        },
        "str_options": {
            "title": "Short tandem repeat expansion genotyping options",
            "type": "object",
            "description": "Options related to the STR subworkflow.",
            "properties": {
                "sex": {
                    "title": "Sample sex",
                    "type": "string",
                    "default": "female",
                    "description": "Sex (male or female) to be passed to Straglr-genotype.",
                    "help_text": "The sex determines how many calls will be obtained for all repeats on chrX. Defaults to female if not specified.",
                    "enum": [
                        "male",
                        "female"
                    ]
                }
            }
        },
        "advanced_options": {
            "title": "Advanced Options",
            "type": "object",
            "fa_icon": "far fa-question-circle",
            "description": "Advanced options for configuring processes inside the workflow.",
            "help_text": "These advanced options do not need to be changed for typical use, but allow fine tuning of workflows for users who want more control over the workflow.",
            "properties": {
                "depth_intervals": {
                    "type": "boolean",
                    "default": false,
                    "description": "Output a bedGraph file with entries for each genomic interval featuring homogeneous depth.",
                    "help_text": "The output [bedGraph](https://genome.ucsc.edu/goldenPath/help/bedgraph.html) file will have an entry for each genomic interval in which all positions have the same alignment depth. By default this workflow outputs summary depth information from your aligned reads. Per-base depth outputs are slower to generate but may be required for some downstream applications."
                },
                "GVCF": {
                    "type": "boolean",
                    "default": false,
                    "description": "Enable to output a gVCF file in addition to the VCF outputs (experimental).",
                    "help_text": "By default the the workflow outputs a VCF file containing only records where a variant has been detected. Enabling this option will output additionally a gVCF with records spanning all reference positions regardless of whether a variant was detected in the sample."
                },
                "downsample_coverage": {
                    "type": "boolean",
                    "default": false,
                    "description": "Downsample the coverage to along the genome.",
                    "help_text": "This options will trigger a downsampling of the read alignments to the target coverage specified by --downsample_coverage_target. Downsampling will make the workflow run faster but could lead to non-deterministic variant calls."
                },
                "downsample_coverage_target": {
                    "type": "number",
                    "default": 60,
                    "description": "Average coverage or reads to use for the analyses.",
                    "help_text": "This options will set the target coverage for the downsampling stage, if downsampling has been enabled."
                },
                "downsample_coverage_margin": {
                    "type": "number",
                    "default": 1.1,
                    "description": "Downsample if the bam effective coverage / target coverage if greater than this value.",
                    "help_text": "By default, if the coverage of the input alignments is within 1.1x the target coverage, downsampling will not be performed even if it was enabled. This is to avoid triggering the downsampling of alignments when coverage is already close to the target coverage.",
                    "hidden": true
                },
                "output_separate_phased": {
                    "type": "boolean",
                    "default": false,
                    "hidden": true,
                    "description": "Keep separate phasing files.",
                    "help_text": "This option enables to save individually phased SV and SNP VCF files even when `--phased --sv --snv` are provided."
                }
            }
        },
        "multiprocessing_options": {
            "title": "Multiprocessing Options",
            "type": "object",
            "fa_icon": "far fa-gauge-high",
            "description": "Options for configuring the common processes across the different subworkflows.",
            "help_text": "These options do not need to be changed for typical use, but allow fine tuning of workflows for users who want more control over the workflow.",
            "properties": {
                "threads": {
                    "type": "integer",
                    "default": 4,
                    "description": "Set max number of threads to use for more intense processes (limited by config executor cpus)"
                },
                "ubam_map_threads": {
                    "type": "integer",
                    "default": 8,
                    "description": "Set max number of threads to use for aligning reads from uBAM (limited by config executor cpus)"
                },
                "ubam_sort_threads": {
                    "type": "integer",
                    "default": 3,
                    "description": "Set max number of threads to use for sorting and indexing aligned reads from uBAM (limited by config executor cpus)"
                },
                "ubam_bam2fq_threads": {
                    "type": "integer",
                    "default": 1,
                    "description": "Set max number of threads to use for uncompressing uBAM and generating FASTQ for alignment (limited by config executor cpus)"
                },
                "merge_threads": {
                    "type": "integer",
                    "default": 4,
                    "description": "Set max number of threads to use for merging alignment files (limited by config executor cpus)"
                },
                "modkit_threads": {
                    "type": "integer",
                    "default": 4,
                    "description": "Total number of threads to use in modkit modified base calling (limited by config executor cpus)"
                }
            }
        },
        "misc": {
            "title": "Miscellaneous Options",
            "type": "object",
            "description": "Everything else.",
            "default": "",
            "properties": {
                "disable_ping": {
                    "type": "boolean",
                    "default": false,
                    "description": "Enable to prevent sending a workflow ping."
                },
                "help": {
                    "type": "boolean",
                    "default": false,
                    "description": "Display help text.",
                    "fa_icon": "fas fa-question-circle",
                    "hidden": true
                },
                "version": {
                    "type": "boolean",
                    "default": false,
                    "description": "Display version and exit.",
                    "fa_icon": "fas fa-question-circle",
                    "hidden": true
                }
            }
        }
    },
    "allOf": [
        {
            "$ref": "#/definitions/workflow"
        },
        {
            "$ref": "#/definitions/input"
        },
        {
            "$ref": "#/definitions/snp_options"
        },
        {
            "$ref": "#/definitions/sv_options"
        },
        {
            "$ref": "#/definitions/advanced_options"
        },
        {
            "$ref": "#/definitions/mod_options"
        },
        {
            "$ref": "#/definitions/cnv_options"
        },
        {
            "$ref": "#/definitions/str_options"
        },
        {
            "$ref": "#/definitions/sv_benchmark_options"
        },
        {
            "$ref": "#/definitions/multiprocessing_options"
        },
        {
            "$ref": "#/definitions/misc"
        }
    ],
    "properties": {
        "aws_image_prefix": {
            "type": "string",
            "hidden": true
        },
        "aws_queue": {
            "type": "string",
            "hidden": true
        },
        "monochrome_logs": {
            "type": "boolean"
        },
        "validate_params": {
            "type": "boolean",
            "default": true
        },
        "show_hidden_params": {
            "type": "boolean"
        }
    },
    "resources": {
        "recommended": {
            "cpus": 32,
            "memory": "128GB"
        },
        "minimum": {
            "cpus": 12,
            "memory": "32GB"
        },
        "run_time": "Variable depending on whether it is targeted sequencing or whole genome sequencing, as well as coverage and the individual analyses requested. For instance, a 90X human sample run (options: `--snp --sv --mod --str --cnv --phased --sex male`) takes less than 8h with recommended resources.",
        "arm_support": false
    }
}