Skip to content

Commit

Permalink
Merge pull request #163 from LouisLeNezet/awstest
Browse files Browse the repository at this point in the history
Improve config and reduce fulltest
  • Loading branch information
LouisLeNezet authored Dec 6, 2024
2 parents 05a2a4c + 4c846d0 commit 61c18f6
Show file tree
Hide file tree
Showing 26 changed files with 419 additions and 313 deletions.
5 changes: 5 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,11 @@ jobs:
- name: Check out pipeline code
uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4

- uses: actions/setup-java@8df1039502a15bceb9433410b1a100fbe190c53b # v4
with:
distribution: "temurin"
java-version: "17"

- name: Set up Nextflow
uses: nf-core/setup-nextflow@v2
with:
Expand Down
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ Special thanks to [Matthias Hörtenhuber](https://github.com/mashehu), [Mazzalab
- [#160](https://github.com/nf-core/phaseimpute/pull/160) - Improve `CHANGELOG.md` and add details to `usage.md`
- [#158](https://github.com/nf-core/phaseimpute/pull/158) - Remove frequency computation and phasing from full test to reduce cost and computational time.
- [#164](https://github.com/nf-core/phaseimpute/pull/164) - Rename `BAM_REGION_SAMTOOLS` to `BAM_EXTRACT_REGION_SAMTOOLS`. Remove `GLIMPSE2_SPLITREFERENCE` as it is not used. Add more steps to `test_all` profile for more exhaustivity.
- [#163](https://github.com/nf-core/phaseimpute/pull/163) - Improve configuration for demanding processes. Use Genome in a Bottle VCF benchmarking file for AWS full test. Moved from `glimpse1` to `glimpse2` for the full test profile.

### `Fixed`

Expand All @@ -81,6 +82,7 @@ Special thanks to [Matthias Hörtenhuber](https://github.com/mashehu), [Mazzalab
- [#161](https://github.com/nf-core/phaseimpute/pull/161) - Fix `VCF_SPLIT_BCFTOOLS` when only one sample present by updating `BCFTOOLS_PLUGINSPLIT` and adding `BCFTOOLS_QUERY` to get truth samples names for renaming the resulting files.
- [#162](https://github.com/nf-core/phaseimpute/pull/162) - Fix `fai` usage when provided by `genomes` parameter.
- [#164](https://github.com/nf-core/phaseimpute/pull/164) - Improve documentation writing
- [#163](https://github.com/nf-core/phaseimpute/pull/163) - Fix MULTIQC samples names (add post-processing for clean up `FILTER_CHR_DWN`, `FILTER_CHR_INP`, `GAWK_ERROR_SPL`, `GAWK_RSQUARE_SPL`). Fix output panel `publisDir`. Fix java version to `17` in `ci.yml` due to new nextflow version.

### `Dependencies`

Expand Down
29 changes: 27 additions & 2 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,22 @@ process {
// Coverage process
withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:SAMTOOLS_COVERAGE_INP' {
cache = "lenient"
ext.prefix = { "${meta.id}.truth" }
ext.prefix = { "${meta.id}.truth.allchr" }
publishDir = [ enabled: false ]
}
withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:SAMTOOLS_COVERAGE_DWN' {
cache = "lenient"
ext.prefix = { "${meta.id}.allchr" }
publishDir = [ enabled: false ]
}

// Filter chromosomes in coverage
withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:FILTER_CHR_INP' {
ext.prefix = { "${meta.id}.truth" }
publishDir = [ enabled: false ]
}

withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:FILTER_CHR_DWN' {
ext.prefix = { "${meta.id}" }
publishDir = [ enabled: false ]
}
Expand Down Expand Up @@ -86,8 +97,22 @@ process {
]
}

// Compute sample files for renaming
withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BCFTOOLS_QUERY_IMPUTED' {
tag = { "${meta.id} Batch ${meta.batch} ${meta.tools}" }
ext.args = '--list-samples'
publishDir = [enabled: false]
}

withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:GAWK_IMPUTED' {
tag = { "${meta.id} Batch ${meta.batch} ${meta.tools}" }
ext.prefix = { "${meta.id}_samples"}
ext.args2 = { "-v tools=\"${meta.tools}\" " + "'BEGIN { OFS = \"\\t\" } { print \$1, \"-\", \$1\".\"tools }'" }
publishDir = [enabled: false]
}

// Split by samples for each tool
withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_SPLIT_BCFTOOLS:BCFTOOLS_PLUGINSPLIT' {
withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:SPLIT_IMPUTED:BCFTOOLS_PLUGINSPLIT' {
tag = { "${meta.id} Batch ${meta.batch} ${meta.tools}" }
ext.args = ["--output-type z", "--write-index=tbi"].join(' ')
publishDir = [
Expand Down
27 changes: 21 additions & 6 deletions conf/steps/panel_prep.config
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,13 @@ process {
publishDir = [
path: { "${params.outdir}/prep_panel/panel" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
enabled: { !params.compute_freq && !params.phase }
saveAs: { filename ->
if ( !params.compute_freq && !params.phase ) {
filename.equals('versions.yml') ? null : filename
} else {
null
}
}
]
}

Expand All @@ -66,8 +71,13 @@ process {
publishDir = [
path: { "${params.outdir}/prep_panel/panel" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
enabled: { !params.phase }
saveAs: { filename ->
if ( !params.phase ) {
filename.equals('versions.yml') ? null : filename
} else {
null
}
}
]
}

Expand All @@ -76,8 +86,13 @@ process {
publishDir = [
path: { "${params.outdir}/prep_panel/panel" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
enabled: { !params.phase }
saveAs: { filename ->
if ( !params.phase ) {
filename.equals('versions.yml') ? null : filename
} else {
null
}
}
]
}

Expand Down
4 changes: 4 additions & 0 deletions conf/steps/simulation.config
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,13 @@ process {
tag = {"${meta.id} ${meta.chr}"}
}
withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_EXTRACT_REGION_SAMTOOLS:SAMTOOLS_VIEW' {
label = 'process_medium'
ext.args = ["--output-fmt bam", "--write-index"].join(' ')
ext.prefix = { "${meta.id}_R${meta.region.replace(':','_')}" }
}

withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_EXTRACT_REGION_SAMTOOLS:SAMTOOLS_MERGE' {
cache = "lenient"
ext.prefix = { "${meta.id}" }
tag = {"${meta.id} ${meta.chr}"}
}
Expand All @@ -33,6 +35,7 @@ process {
withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_DOWNSAMPLE_SAMTOOLS:.*' {
tag = {"${meta.id} ${meta.chr}"}
}

withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_DOWNSAMPLE_SAMTOOLS:SAMTOOLS_DEPTH' {
publishDir = [enabled: false]
ext.prefix = { "${meta1.id}_C${meta1.chr ?: "all"}.depth" }
Expand All @@ -48,6 +51,7 @@ process {
}

withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_DOWNSAMPLE_SAMTOOLS:SAMTOOLS_VIEW' {
cache = "lenient"
ext.args = ["--output-fmt bam", "--write-index"].join(' ')
ext.prefix = { "${meta.id}.depth_${meta.depth}x" }
publishDir = [
Expand Down
22 changes: 20 additions & 2 deletions conf/steps/validation.config
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ process {
}

withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:GL_TRUTH:BCFTOOLS_MPILEUP' {
label = 'process_high'
cache = "lenient"
ext.args = [
"-I",
"-E",
Expand Down Expand Up @@ -65,13 +67,13 @@ process {
}

// Compute sample files for renaming
withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BCFTOOLS_QUERY' {
withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BCFTOOLS_QUERY_TRUTH' {
tag = { "${meta.id} Batch ${meta.batch} ${meta.tools}" }
ext.args = '--list-samples'
publishDir = [enabled: false]
}

withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:GAWK' {
withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:GAWK_TRUTH' {
tag = { "${meta.id} Batch ${meta.batch} ${meta.tools}" }
ext.prefix = { "${meta.id}_samples"}
ext.args2 = "'BEGIN { OFS = \"\\t\" } { print \$1, \"-\", \$1\".truth\" }'"
Expand Down Expand Up @@ -103,6 +105,22 @@ process {
publishDir = [ enabled: false ]
}

withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CONCORDANCE_GLIMPSE2:GAWK_ERROR_SPL' {
tag = { "${meta.id} Batch ${meta.batch} ${meta.tools}" }
ext.prefix = { "${meta.id}${meta.panel ? '_P' + meta.panel : ''}${meta.tools ? '_T' + meta.tools : ''}.concordance.renamed.error.spl" }
ext.suffix = "txt.gz"
ext.args2 = { "-v tool=\"${meta.tools}\" " + "'BEGIN { OFS = \" \" } !/^#/ { \$3 = \$3\".\"tool } { print }'" }
publishDir = [enabled: false]
}

withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CONCORDANCE_GLIMPSE2:GAWK_RSQUARE_SPL' {
tag = { "${meta.id} Batch ${meta.batch} ${meta.tools}" }
ext.prefix = { "${meta.id}${meta.panel ? '_P' + meta.panel : ''}${meta.tools ? '_T' + meta.tools : ''}.concordance.renamed.rsquare.spl" }
ext.suffix = "txt.gz"
ext.args2 = { "-v tool=\"${meta.tools}\" " + "'BEGIN { OFS = \" \" } !/^#/ { \$1 = \$1\".\"tool } { print }'" }
publishDir = [enabled: false]
}

withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CONCORDANCE_GLIMPSE2:GUNZIP' {
ext.prefix = { "${meta.id}${meta.panel ? '_P' + meta.panel : ''}${meta.tools ? '_T' + meta.tools : ''}" }
publishDir = [ enabled: false ]
Expand Down
19 changes: 16 additions & 3 deletions conf/test_full.config
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,9 @@ params {
genome = "GRCh38"

// Input data
input = "${projectDir}/tests/csv/sample_sim_full.csv"
panel = "${projectDir}/tests/csv/panel_full.csv"
input = "${projectDir}/tests/csv/sample_sim_full.csv"
input_truth = "${projectDir}/tests/csv/sample_sim_full_truth.csv"
panel = "${projectDir}/tests/csv/panel_full.csv"

// Pipeline steps
steps = "all"
Expand All @@ -40,5 +41,17 @@ params {
phase = false

// Impute tools
tools = "glimpse1"
tools = "glimpse2"

// Concordance arguments
min_val_gl = null
min_val_dp = null
}

process {
withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CONCORDANCE_GLIMPSE2:GLIMPSE2_CONCORDANCE' {
ext.args = "--gt-val --af-tag AF"
ext.prefix = { "${meta.id}${meta.panel ? '_P' + meta.panel : ''}${meta.tools ? '_T' + meta.tools : ''}.concordance" }
publishDir = [ enabled: false ]
}
}
7 changes: 0 additions & 7 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -80,13 +80,6 @@ workflow NFCORE_PHASEIMPUTE {
ch_input_validate = ch_input
}

if (params.steps.split(',').contains("all")) {
ch_input_truth.map{
error "Cannot run all steps with --input-truth"
}
ch_input_truth = ch_input
}

//
// WORKFLOW: Run pipeline
//
Expand Down
7 changes: 3 additions & 4 deletions modules.json
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,8 @@
},
"gawk": {
"branch": "master",
"git_sha": "97321eded31a12598837a476d3615300af413bb7",
"installed_by": ["modules"],
"patch": "modules/nf-core/gawk/gawk.diff"
"git_sha": "caab1314ca62679b629da4c79afa9a4cab2bb8ee",
"installed_by": ["modules"]
},
"glimpse/chunk": {
"branch": "master",
Expand All @@ -99,7 +98,7 @@
},
"glimpse2/concordance": {
"branch": "master",
"git_sha": "cc64e71652f67ce627064af51008fe0a00850987",
"git_sha": "6aed50284f6b208fd8eff1ec1dae4b25bf03c432",
"installed_by": ["modules"]
},
"glimpse2/ligate": {
Expand Down
6 changes: 6 additions & 0 deletions modules/nf-core/gawk/main.nf

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 61c18f6

Please sign in to comment.