forked from epi2me-labs/wf-basecalling
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnextflow_schema.json
407 lines (407 loc) · 21.8 KB
/
nextflow_schema.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
{
"$schema": "http://json-schema.org/draft-07/schema",
"$id": "https://raw.githubusercontent.com/epi2me-labs/wf-basecalling/master/nextflow_schema.json",
"title": "epi2me-labs/wf-basecalling",
"workflow_title": "Basecalling workflow",
"description": "Helper workflow for signal processing and primary data analysis of Oxford Nanopore Technologies' reads.",
"demo_url": "https://ont-exd-int-s3-euwst1-epi2me-labs.s3.amazonaws.com/wf-basecalling/wf-basecalling-demo.tar.gz",
"aws_demo_url": "https://ont-exd-int-s3-euwst1-epi2me-labs.s3.amazonaws.com/wf-basecalling/wf-basecalling-demo/aws.nextflow.config",
"url": "https://github.com/epi2me-labs/wf-basecalling",
"type": "object",
"definitions": {
"input": {
"title": "Input Options",
"type": "object",
"fa_icon": "fas fa-arrow-right",
"description": "Parameters for finding and handling input data for analysis.",
"properties": {
"input": {
"title": "Input directory",
"type": "string",
"format": "directory-path",
"description": "Directory containing FAST5 (or POD5) signal for basecalling.",
"help_text": "This directory will be searched recursively. All FAST5 or POD5 files (depending on which extension you select in the Basecalling Options) in this directory or any subdirectory (no matter how deep) will be basecalled."
},
"ref": {
"title": "Reference FASTA file",
"type": "string",
"format": "file-path",
"description": "Optional reference FASTA file to align basecalled reads to.",
"help_text": "Without a reference, basecalls are output to unaligned CRAM. When using a reference, take care to retain this FASTA file as the output CRAM file cannot be read without the reference it was aligned to."
}
},
"required": [
"input"
]
},
"output": {
"title": "Output Options",
"type": "object",
"fa_icon": "fas fa-arrow-left",
"description": "Parameters for saving and naming workflow outputs.",
"properties": {
"out_dir": {
"title": "Output directory",
"type": "string",
"default": "output",
"format": "directory-path",
"description": "Directory for output of all files."
},
"sample_name": {
"type": "string",
"default": "SAMPLE",
"description": "Sample name to prefix file names of workflow outputs."
},
"fastq_only": {
"title": "Output FASTQ not CRAM",
"type": "boolean",
"default": false,
"description": "Output unaligned FASTQ instead of unaligned CRAM.",
"help_text": "FASTQ can only be output when a reference has not been provided. Aligned output will always be written to CRAM even if fastq_only is set."
},
"output_bam": {
"title": "Output BAM not CRAM",
"type": "boolean",
"default": false,
"description": "Output unaligned BAM instead of unaligned CRAM.",
"help_text": "Some downstream applications do not yet support CRAM and will require a BAM file. Enabling this option will output BAM instead of CRAM. You should only use this option if you know that it is needed. Output files will be larger than the corresponding CRAM files that would have been written if this option was not enabled."
}
}
},
"basecalling_options": {
"title": "Basecalling options",
"type": "object",
"fa_icon": "fas fa-gear",
"description": "Basecalling model selection.",
"help_text": "This section contains options that should be checked before basecalling.",
"properties": {
"basecaller_cfg": {
"title": "Basecaller configuration",
"type": "string",
"description": "Name of the model to use for converting signal.",
"help_text": "Required for basecalling. The model list only shows models that are compatible with this workflow.",
"enum": [
"rna002_70bps_fast@v3",
"rna002_70bps_hac@v3",
]
},
"duplex": {
"title": "Duplex calling",
"type": "boolean",
"description": "Run the basecaller in duplex mode.",
"default": false,
"help_text": "By default, the workflow conducts simplex basecalling. If you used a chemistry and flowcell combination that supported duplex reads, you should switch this option on. Currently, duplex basecalling is not compatible with modified basecalling. Additionally, duplex basecalling within this workflow is reliant on internal optimisations to organise input files for better duplex rates, which is not possible when using streaming basecalling; therefore duplex combined with the watch_path option could lead to lower duplex rates."
},
"remora_cfg": {
"title": "Modified basecalling model configuration",
"type": "string",
"description": "Name of the model to use for calling modified bases.",
"help_text": "Required for calling modified bases while basecalling. The model list only shows models that are compatible with this workflow.",
"enum": [
"[email protected]_5mCG_5hmCG@v2",
"[email protected]_5mCG_5hmCG@v2",
"[email protected]_5mCG_5hmCG@v2",
"[email protected]_5mCG_5hmCG@v2",
"[email protected]_5mCG_5hmCG@v2",
"[email protected]_5mCG_5hmCG@v2",
"[email protected]_5mCG_5hmCG@v1",
"[email protected]_5mC_5hmC@v1",
"[email protected]_6mA@v2",
"[email protected]_5mCG_5hmCG@v2",
"[email protected]_5mCG_5hmCG@v1",
"[email protected]_5mC_5hmC@v1",
"[email protected]_6mA@v2",
"[email protected]_5mCG_5hmCG@v0",
"[email protected]_5mCG_5hmCG@v0",
"[email protected]_5mCG_5hmCG@v0",
"[email protected]_m6A_DRACH@v1"
]
},
"dorado_ext": {
"title": "Input file format",
"type": "string",
"description": "File extension for Dorado inputs.",
"help_text": "Set this to fast5 if you have not converted your fast5 to pod5. It is recommended to [convert existing fast5 files to pod5 for use with Dorado](https://github.com/nanoporetech/pod5-file-format/blob/master/python/README.md#pod5-convert-from-fast5).",
"default": "pod5",
"enum": [
"fast5",
"pod5"
]
}
},
"allOf": [
{
"required": [
"dorado_ext"
]
},
{
"oneOf": [
{
"required": [
"basecaller_cfg"
]
},
{
"required": [
"basecaller_model_path"
]
}
]
}
]
},
"advanced_basecalling_options": {
"title": "Advanced basecalling options",
"type": "object",
"fa_icon": "fas fa-gears",
"description": "Basecaller performance and load settings.",
"help_text": "These basecalling options do not typically need to be changed.",
"properties": {
"output_pod5": {
"type": "boolean",
"title": "Output converted FAST5",
"default": false,
"description": "Save the converted POD5 when running in duplex with FAST5 inputs.",
"help_text": "Dorado duplex only supports POD5 input. The workflow will automatically convert FAST5 input to POD5 when duplex calling. By default, converted POD5 are deleted to save disk space. Enabling this option will make the workflow output converted POD5 files to a subfolder within the output directory."
},
"qscore_filter": {
"type": "number",
"default": 10,
"description": "Mean qscore by which to filter reads. Inclusive such that reads with score >= qscore_filter are kept.",
"help_text": "The mean qscore of reads is calculated by dorado and rounded to an integer by dorado and stored as a tag in dorado's SAM output. The pipeline separates reads into pass and fail categories based on this SAM tag."
},
"basecaller_chunk_size": {
"type": "number",
"default": 25,
"description": "Number of input files to basecall in each basecalling process.",
"minimum": 1,
"hidden": true
},
"cuda_device": {
"type": "string",
"default": "cuda:all",
"description": "GPU device to use for basecalling [cuda:all].",
"help_text": "For local execution this can be used to pin GPU tasks to one (or more) specific GPU devices. Use cuda:all to use all available GPU devices, or cuda:idx[,idx,...] where idx is an index number(s) of GPU device(s) to use."
},
"basecaller_model_path": {
"type": "string",
"format": "directory-path",
"description": "Override the named basecalling model with a custom basecalling model.",
"help_text": "For typical use, users should set --basecaller_cfg which will use a named model from inside the container. Experimental or custom basecallers will not be available in the container and can be loaded from the host with --basecaller_model_path."
},
"remora_model_path": {
"type": "string",
"format": "directory-path",
"description": "Override the named remora model with a custom remora model.",
"help_text": "For typical use, users should set --remora_cfg which will use a named model from inside the container. Experimental or custom models will not be available in the container and can be loaded from the host with --remora_model_path."
},
"basecaller_basemod_threads": {
"type": "number",
"default": 2,
"description": "Number of threads to use for base modification calling.",
"help_text": "You must set this to > 0 when using a modbase aware model. Modbase calling does not require much additional CPU and should be set carefully when using GPU servers with a small number of CPUs per GPU."
},
"basecaller_args": {
"type": "string",
"description": "Additional command line arguments to pass to the basecaller process."
},
"experimental": {
"type": "boolean",
"default": false,
"description": "Enable experimental and unsupported features.",
"hidden": true,
"help": "Use of this option is required to enable hidden, experimental features. No ordinary user should need to enable this option unless instructed to do so by a member of the EPI2ME team."
},
"use_bonito": {
"type": "boolean",
"default": false,
"description": "Use bonito rather than dorado for basecalling. Highly experimental, many options will not work when this is enabled.",
"help_text": "This experimental option is not recommended for normal use and is not supported.",
"hidden": true
},
"bonito_cfg": {
"title": "Basecaller configuration",
"default": "[email protected]",
"type": "string",
"description": "Name of the model to use for converting signal.",
"help_text": "Required for basecalling. The model list only shows models that are compatible with this workflow.",
"enum": [
],
"hidden": true
}
},
"dependencies": {
"output_pod5": [
"duplex",
"dorado_ext"
]
}
},
"multiprocessing_options": {
"title": "Multiprocessing Options",
"type": "object",
"fa_icon": "far fa-gauge-high",
"description": "Advanced options for configuring the performance of specific workflow processes.",
"help_text": "These options do not need to be changed for typical use, but allow fine tuning of workflows for users who want more control over the workflow.",
"properties": {
"ubam_map_threads": {
"type": "integer",
"default": 8,
"description": "Set max number of threads to use for aligning reads from uBAM (limited by config executor cpus)"
},
"ubam_sort_threads": {
"type": "integer",
"default": 3,
"description": "Set max number of threads to use for sorting and indexing aligned reads from uBAM (limited by config executor cpus)"
},
"ubam_bam2fq_threads": {
"type": "integer",
"default": 1,
"description": "Set max number of threads to use for uncompressing uBAM and generating FASTQ for alignment (limited by config executor cpus)"
},
"merge_threads": {
"type": "integer",
"default": 4,
"description": "Set max number of threads to use for merging BAM files (limited by config executor cpus)"
},
"stats_threads": {
"type": "integer",
"default": 4,
"description": "Set max number of threads to use for getting stats from output files. (limited by config executor cpus)"
}
}
},
"real_time_analysis_options": {
"title": "Real Time Analysis Options",
"type": "object",
"description": "Options relating to the non default real-time streaming workflow.",
"default": "",
"properties": {
"watch_path": {
"title": "Basecall while sequencing ('watch path')",
"type": "boolean",
"default": false,
"description": "Enable to continuously watch the input directory for new input files. Reads will be analysed as they appear.",
"help_text": "This option enables the use of Nextflow's directory watching feature to constantly monitor input directories for new files. As soon as files are written by an external process Nextflow will begin analysing these files. The workflow will accumulate data over time to produce an updating report. Real time analysis of duplex data may lead to lower duplex rates than what would have been obtained by running basecalling after sequencing."
},
"read_limit": {
"type": "integer",
"description": "Stop processing data when a particular number of reads have been analysed.",
"help_text": "By default the workflow will run indefinitely when using the real time watch path option. This will set the upper bound on the number of reads that will be analysed before the workflow is automatically stopped and no more data is analysed."
}
}
},
"generic_options": {
"title": "Generic options",
"type": "object",
"fa_icon": "far fa-question-circle",
"description": "Less common options for the pipeline, typically set in a config file.",
"help_text": "These options are common to all nf-core pipelines and allow you to customise some of the core preferences for how the pipeline runs.\n\nTypically these options would be set in a Nextflow config file loaded for all pipeline runs, such as `~/.nextflow/config`.",
"properties": {
"help": {
"type": "boolean",
"description": "Display help text.",
"fa_icon": "fas fa-question-circle",
"default": false,
"hidden": true
},
"version": {
"type": "boolean",
"description": "Display version and exit.",
"fa_icon": "fas fa-question-circle",
"default": false,
"hidden": true
},
"disable_ping": {
"type": "boolean",
"default": false,
"description": "Disable workflow ping."
}
}
}
},
"allOf": [
{
"$ref": "#/definitions/input"
},
{
"$ref": "#/definitions/output"
},
{
"$ref": "#/definitions/basecalling_options"
},
{
"$ref": "#/definitions/real_time_analysis_options"
},
{
"$ref": "#/definitions/advanced_basecalling_options"
},
{
"$ref": "#/definitions/multiprocessing_options"
},
{
"$ref": "#/definitions/generic_options"
}
],
"properties": {
"aws_image_prefix": {
"type": "string",
"hidden": true
},
"aws_queue": {
"type": "string",
"hidden": true
},
"monochrome_logs": {
"type": "boolean"
},
"validate_params": {
"type": "boolean",
"default": true
},
"show_hidden_params": {
"type": "boolean"
}
},
"resources": {
"recommended": {
"cpus": 64,
"memory": "256GB"
},
"minimum": {
"cpus": 8,
"memory": "64GB"
},
"run_time": "Variable depending on coverage, genome size, model of choice and GPU model.",
"arm_support": false
},
"docs": {
"intro": "## Introduction\n\nThis workflow introduces users to [`Dorado`](https://github.com/nanoporetech/dorado),\nwhich is now our standard basecaller. `dorado` is still under active development and\nwill be kept updated as new releases are made. We strongly encourage users to check\nthe CHANGELOG for breaking changes.\n",
"links": "## Useful links\n\n* [nextflow](https://www.nextflow.io/)\n* [docker](https://www.docker.com/products/docker-desktop)\n* [singularity](https://sylabs.io/singularity/)\n* [dorado](https://github.com/nanoporetech/dorado/)\n"
}
}