Skip to content

Commit

Permalink
Merge pull request #22 from LouisLeNezet/validation
Browse files Browse the repository at this point in the history
Add validation step
  • Loading branch information
LouisLeNezet authored Apr 24, 2024
2 parents 7addfc7 + a89168e commit 411233e
Show file tree
Hide file tree
Showing 70 changed files with 2,327 additions and 3,291 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ Initial release of nf-core/phaseimpute, created with the [nf-core](https://nf-co
- Test impute and test sim works
- [#19](https://github.com/nf-core/phaseimpute/pull/19) - Changed reference panel to accept a csv, update modules and subworkflows (glimpse1/2 and shapeit5)
- [#20](https://github.com/nf-core/phaseimpute/pull/20) - Added automatic detection of vcf contigs for the reference panel and automatic renaming available
- [#22](https://github.com/nf-core/phaseimpute/pull/20) - Add validation step for concordance analysis. Input channels changed to match inputs steps. Outdir folder organised by steps. Modules config by subworkflows.
- [#26](https://github.com/nf-core/phaseimpute/pull/26) - Added QUILT method

### `Fixed`
Expand Down
14 changes: 7 additions & 7 deletions assets/schema_input.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,17 @@
"errorMessage": "Sample name must be provided and cannot contain spaces",
"meta": ["id"]
},
"bam": {
"file": {
"type": "string",
"pattern": "^\\S+\\.bam$",
"errorMessage": "BAM file must be provided, cannot contain spaces and must have extension '.bam'"
"pattern": "^\\S+\\.(bam)|((vcf|bcf)(\\.gz))?$",
"errorMessage": "BAM, VCF or BCF file must be provided, cannot contain spaces and must have extension '.bam' or '.vcf', '.bcf' with optional '.gz' extension"
},
"bai": {
"errorMessage": "BAI file must be provided, cannot contain spaces and must have extension '.bai'",
"index": {
"errorMessage": "Input file index must be provided, cannot contain spaces and must have extension '.bai', '.tbi' or '.csi'",
"type": "string",
"pattern": "^\\S+\\.bai$"
"pattern": "^\\S+\\.(bai|tbi|csi)$"
}
},
"required": ["sample", "bam", "bai"]
"required": ["sample", "file", "index"]
}
}
115 changes: 0 additions & 115 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -34,119 +34,4 @@ process {
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

// Simulation workflow
withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_REGION:SAMTOOLS_VIEW' {
ext.args = [
].join(' ')
ext.prefix = { "${meta.id}_R${meta.region}" }
}
withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_DOWNSAMPLE:SAMTOOLS_VIEW' {
ext.args = [
].join(' ')
ext.prefix = { "${meta.id}_D${meta.depth}" }
}

// Panel preparation workflow
withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_REGION:VIEW_VCF_REGION' {
ext.args = [
"--output-type z",
"--no-version"
].join(' ')
ext.prefix = { "${meta.id}_${meta.region}" }
}

withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CHR_CHECK:VCF_CHR_RENAME:BCFTOOLS_ANNOTATE' {
ext.args = [
"-Oz",
"--no-version"
].join(' ')
ext.prefix = { "${meta.id}_chrrename" }
}

withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:GET_PANEL:VIEW_VCF_SNPS' {
ext.args = [
"-m 2",
"-M 2",
"-v snps",
"--output-type z",
"--no-version"
].join(' ')
ext.prefix = { "${meta.id}_SPNS" }
}
withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:GET_PANEL:BCFTOOLS_NORM' {
ext.args = [
"-m",
"-any",
"--no-version"
].join(' ')
ext.prefix = { "${meta.id}_norm" }
}
withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:GET_PANEL:VIEW_VCF_SITES' {
ext.args = [
"-G",
"-m 2",
"-M 2",
"-v snps",
"--output-type z",
"--no-version"
].join(' ')
ext.prefix = { "${meta.id}_SITES" }
}
withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:GET_PANEL:BCFTOOLS_QUERY' {
ext.args = [
"-f'%CHROM\t%POS\t%REF,%ALT\n'",
].join(' ')
ext.prefix = { "${meta.id}_SITES_TSV" }
}
withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:GET_PANEL:TABIX_TABIX' {
ext.args = [
"-s1",
"-b2",
"-e2"
].join(' ')
ext.prefix = { "${meta.id}_SITES_TSV" }
}
withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_PHASE_SHAPEIT5:BEDTOOLS_MAKEWINDOWS' {
ext.args = [
'-w 60000',
'-s 40000'
].join(' ')
ext.prefix = { "${meta.id}_chunks" }
}
withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:GL_INPUT:BCFTOOLS_MPILEUP' {
ext.args = [
"-I",
"-E",
"-a 'FORMAT/DP'"
].join(' ')
ext.args2 = [
"-Aim",
"-C alleles"
].join(' ')
}

withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_GLIMPSE:GLIMPSE_PHASE' {
ext.args = [
"--impute-reference-only-variants"
].join(' ')
ext.prefix = { "${meta.id}" }
ext.suffix = "bcf"
}
withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_GLIMPSE:GLIMPSE_CHUNK' {
ext.args = [
"--window-size 200000",
"--buffer-size 20000"
].join(' ')
ext.prefix = { "${meta.id}" }
}
withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_GLIMPSE:GLIMPSE_LIGATE' {
ext.prefix = { "${meta.id}_D${meta.depth}_P${meta.panel}" }
}
withName: GLIMPSE_CONCORDANCE {
ext.prefix = { "${meta.id}_D${meta.depth}_P${meta.panel}_R${meta.region}" }
}
withName: ADD_COLUMNS {
ext.prefix = { "${meta.id}_D${meta.depth}_P${meta.panel}_R${meta.region}_SNP" }
}
}
33 changes: 33 additions & 0 deletions conf/steps/imputation.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Config file for defining DSL2 per module options and publishing paths
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Available keys to override module options:
ext.args = Additional arguments appended to command in module.
ext.args2 = Second set of arguments appended to command in module (multi-tool modules).
ext.args3 = Third set of arguments appended to command in module (multi-tool modules).
ext.prefix = File name prefix for output files.
----------------------------------------------------------------------------------------
*/

process {
withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:CONCAT_IMPUT:.*' {
publishDir = [
path: { "${params.outdir}/imputation/concat" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
ext.prefix = { "${meta.id}_impute_concat" }
}

withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:CONCAT_IMPUT:BCFTOOLS_CONCAT' {
ext.args = {[
"--ligate",
"--output-type z",
].join(" ").trim()}
}

withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:CONCAT_IMPUT:BCFTOOLS_INDEX' {
ext.args = "--tbi"
}
}
84 changes: 84 additions & 0 deletions conf/steps/imputation_glimpse1.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Config file for defining DSL2 per module options and publishing paths
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Available keys to override module options:
ext.args = Additional arguments appended to command in module.
ext.args2 = Second set of arguments appended to command in module (multi-tool modules).
ext.args3 = Third set of arguments appended to command in module (multi-tool modules).
ext.prefix = File name prefix for output files.
----------------------------------------------------------------------------------------
*/

process {
// Configuration for the glimpse1 imputation subworkflow
withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:GL_INPUT:.*' {
publishDir = [
path: { "${params.outdir}/imputation/glimpse1/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
enabled: false
]
}
withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:GL_INPUT:BCFTOOLS_MPILEUP' {
ext.args = [
"-I",
"-E",
"-a 'FORMAT/DP'"
].join(' ')
ext.args2 = [
"-Aim",
"-C alleles"
].join(' ')
ext.prefix = { "${meta.id}_R${meta.region.replace(':','_')}.call" }
}

withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:GL_INPUT:BCFTOOLS_ANNOTATE' {
ext.args = "--set-id '%CHROM:%POS:%REF:%ALT' -Oz"
ext.prefix = { "${meta.id}_R${meta.region.replace(':','_')}.annotate" }
}

withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:GL_INPUT:BCFTOOLS_INDEX' {
ext.args = "--tbi"
}

withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_GLIMPSE1:.*' {
publishDir = [
path: { "${params.outdir}/imputation/glimpse1/" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_GLIMPSE1:GLIMPSE_CHUNK' {
ext.args = [
"--window-size 200000",
"--buffer-size 20000"
].join(' ')
ext.prefix = { "${meta.id}_R${meta.region.replace(':','_')}.chunk" }
publishDir = [ enabled: false ]
}

withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_GLIMPSE1:GLIMPSE_PHASE' {
ext.args = [
"--impute-reference-only-variants"
].join(' ')
ext.prefix = { "${meta.id}_R${meta.region.replace(':','_')}.phase" }
ext.suffix = "bcf"
publishDir = [ enabled: false ]
}

withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_GLIMPSE1:INDEX_PHASE' {
publishDir = [ enabled: false ]
}

withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_GLIMPSE1:GLIMPSE_LIGATE' {
ext.prefix = { "${meta.id}_R${meta.region.replace(':','_')}.ligate" }
}

withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_GLIMPSE1:INDEX_LIGATE' {
publishDir = [
path: { "${params.outdir}/imputation/glimpse1" }
]
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,20 +20,25 @@ process {
]
}

withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:MAKE_CHUNKS:GLIMPSE_CHUNK' {
withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:MAKE_CHUNKS:.*' {

ext.prefix = { "${meta.id}_${meta.chr}" }

publishDir = [
[
path: { "${params.outdir}/quilt_impute/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}_chunk" },
path: { "${params.outdir}/imputation/quilt/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}_chunk" },
mode: params.publish_dir_mode,
enabled: false
],


]
}

withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:MAKE_CHUNKS:GLIMPSE_CHUNK' {
ext.prefix = { "${meta.id}_${meta.chr}" }
}

withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:MAKE_CHUNKS:BCFTOOLS_INDEX' {
cpus = 2
memory = 400.MB
Expand Down Expand Up @@ -76,60 +81,34 @@ process {
cpus = 2
memory = 400.MB
maxRetries = 2
}

withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:IMPUTE_QUILT:.*' {
publishDir = [
[
path: { "${params.outdir}/quilt_impute/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}/convert" },
path: { "${params.outdir}/imputation/quilt/" },
mode: params.publish_dir_mode,
],
]
}

withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:IMPUTE_QUILT:QUILT_QUILT' {
publishDir = [
[
path: { "${params.outdir}/quilt_impute/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" },
mode: params.publish_dir_mode,
],
]
ext.prefix = { "${meta.id}_R${meta.region.replace(':','_')}.impute" }
publishDir = [enabled: false]
}

withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:IMPUTE_QUILT:BCFTOOLS_INDEX' {
ext.args = {[
"--tbi",
].join(" ").trim()}
withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:IMPUTE_QUILT:INDEX1' {
ext.args = "--tbi"
publishDir = [enabled: false]
}


withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CONCATENATE_BCFTOOLS:BCFTOOLS_CONCAT' {
ext.args = {[
"--ligate",
"--output-type z",
].join(" ").trim()}

cpus = 2
memory = 1.GB
maxRetries = 2

publishDir = [
[
path: { "${params.outdir}/quilt_impute/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}/concat" },
mode: params.publish_dir_mode,
],
]
withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:IMPUTE_QUILT:BCFTOOLS_ANNOTATE' {
ext.args = "--set-id '%CHROM:%POS:%REF:%ALT' -Oz"
ext.prefix = { "${meta.id}_R${meta.region.replace(':','_')}.impute.annotate" }
}

withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CONCATENATE_BCFTOOLS:BCFTOOLS_INDEX' {
ext.args = {[
"--tbi",
].join(" ").trim()}

publishDir = [
[
path: { "${params.outdir}/quilt_impute/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}/concat" },
mode: params.publish_dir_mode,
],
]
withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:IMPUTE_QUILT:INDEX2' {
ext.args = "--tbi"
}

}
21 changes: 21 additions & 0 deletions conf/steps/initialisation.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Config file for defining DSL2 per module options and publishing paths
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Available keys to override module options:
ext.args = Additional arguments appended to command in module.
ext.args2 = Second set of arguments appended to command in module (multi-tool modules).
ext.args3 = Third set of arguments appended to command in module (multi-tool modules).
ext.prefix = File name prefix for output files.
----------------------------------------------------------------------------------------
*/

process {
withName: 'PIPELINE_INITIALISATION:.*' {
publishDir = [
path: { "${params.outdir}/initialisation/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" },
mode: params.publish_dir_mode,
enabled: false
]
}
}
Loading

0 comments on commit 411233e

Please sign in to comment.