From 2ba8c89634e2487ac61c9ec1de4d71125ca2b716 Mon Sep 17 00:00:00 2001 From: LouisBzh Date: Fri, 8 Nov 2024 12:22:55 +0100 Subject: [PATCH 1/6] Set params.chunk_model --- conf/test_all.config | 1 + conf/test_dog.config | 1 + conf/test_panelprep.config | 1 + nextflow.config | 3 ++- nextflow_schema.json | 7 +++++++ subworkflows/local/vcf_chunk_glimpse/main.nf | 4 +--- .../local/vcf_chunk_glimpse/tests/main.nf.test | 2 ++ subworkflows/local/vcf_phase_shapeit5/main.nf | 10 ++++------ .../local/vcf_phase_shapeit5/tests/main.nf.test | 2 ++ workflows/phaseimpute/main.nf | 5 +++-- 10 files changed, 24 insertions(+), 12 deletions(-) diff --git a/conf/test_all.config b/conf/test_all.config index 854f8664..c16d7904 100644 --- a/conf/test_all.config +++ b/conf/test_all.config @@ -33,6 +33,7 @@ params { phase = true normalize = true compute_freq = false + chunk_model = "recursive" // Pipeline steps steps = "all" diff --git a/conf/test_dog.config b/conf/test_dog.config index 33755994..6ebc015e 100644 --- a/conf/test_dog.config +++ b/conf/test_dog.config @@ -32,6 +32,7 @@ params { normalize = false compute_freq = false rename_chr = true + chunk_model = "recursive" // Input data input = params.pipelines_testdata_base_path + "dog_data/csv/sample_dog.csv" diff --git a/conf/test_panelprep.config b/conf/test_panelprep.config index 181d81de..f62f2189 100644 --- a/conf/test_panelprep.config +++ b/conf/test_panelprep.config @@ -32,6 +32,7 @@ params { normalize = true compute_freq = true remove_samples = "HG00096,HG00097,HG00099,HG00100" + chunk_model = "recursive" // Pipeline steps steps = "panelprep" diff --git a/nextflow.config b/nextflow.config index 14781b4a..3e402c96 100644 --- a/nextflow.config +++ b/nextflow.config @@ -10,7 +10,7 @@ params { // steps - steps = null + steps = null // Input options input = null @@ -24,6 +24,7 @@ params { normalize = true compute_freq = false remove_samples = null + chunk_model = 'sequential' // ChrCheck parameters rename_chr = false diff --git a/nextflow_schema.json b/nextflow_schema.json index e5d2f315..34008bd7 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -132,6 +132,13 @@ "binaryref": { "type": "string", "description": "Whether to generate a binary reference file to be used with GLIMPSE2" + }, + "chunk_model" : { + "type": "string", + "description": "Model type to use for GLIMPSE2_CHUNK", + "enum": ["recursive", "sequential", "uniform-number-variants"], + "default": "sequential", + "hidden": true } } }, diff --git a/subworkflows/local/vcf_chunk_glimpse/main.nf b/subworkflows/local/vcf_chunk_glimpse/main.nf index 457b0b1b..dad0dcf0 100644 --- a/subworkflows/local/vcf_chunk_glimpse/main.nf +++ b/subworkflows/local/vcf_chunk_glimpse/main.nf @@ -7,6 +7,7 @@ workflow VCF_CHUNK_GLIMPSE { take: ch_reference // channel: [ [panel, chr], vcf, csi ] ch_map // channel (optional): [ [chr], map ] + chunk_model // channel : model main: @@ -36,9 +37,6 @@ workflow VCF_CHUNK_GLIMPSE { ) .map { metaPC, it -> [metaPC, it["RegionIn"], it["RegionOut"]]} - // Make chunks with Glimpse2 (does not work with "sequential" mode) - chunk_model = "recursive" - ch_input_glimpse2 = ch_vcf_csi_chr .map{ metaPC, vcf, csi, chr -> [metaPC.subMap("chr"), metaPC, vcf, csi, chr] diff --git a/subworkflows/local/vcf_chunk_glimpse/tests/main.nf.test b/subworkflows/local/vcf_chunk_glimpse/tests/main.nf.test index c222059e..f25d7a05 100644 --- a/subworkflows/local/vcf_chunk_glimpse/tests/main.nf.test +++ b/subworkflows/local/vcf_chunk_glimpse/tests/main.nf.test @@ -47,6 +47,7 @@ nextflow_workflow { file(params.pipelines_testdata_base_path + "hum_data/reference_genome/GRCh38_21.map", checkIfExist:true) ] ) + input[2] = "recursive" """ } } @@ -87,6 +88,7 @@ nextflow_workflow { [[chr: "chr22"], []], [[chr: "chr21"], []] ) + input[2] = "recursive" """ } } diff --git a/subworkflows/local/vcf_phase_shapeit5/main.nf b/subworkflows/local/vcf_phase_shapeit5/main.nf index af643b14..50b0c017 100644 --- a/subworkflows/local/vcf_phase_shapeit5/main.nf +++ b/subworkflows/local/vcf_phase_shapeit5/main.nf @@ -7,19 +7,17 @@ include { BCFTOOLS_INDEX as VCF_BCFTOOLS_INDEX_2 } from '../../../modules/nf-cor workflow VCF_PHASE_SHAPEIT5 { take: - ch_vcf // channel (mandatory): [ [id, chr], vcf, csi, pedigree ] + ch_vcf // channel (mandatory) : [ [id, chr], vcf, csi, pedigree ] ch_region // channel (mandatory) : [ [chr, region], region ] - ch_ref // channel (optional) : [ [id, chr], ref, csi ] - ch_scaffold // channel (optional) : [ [id, chr], scaffold, csi ] + ch_ref // channel (optional) : [ [id, chr], ref, csi ] + ch_scaffold // channel (optional) : [ [id, chr], scaffold, csi ] ch_map // channel (mandatory) : [ [chr], map] + chunk_model // channel (mandatory) : [ model ] main: ch_versions = Channel.empty() - // Make chunks with Glimpse2 (does not work with "sequential" mode) - chunk_model = "recursive" - // Chunk with Glimpse2 ch_input_glimpse2 = ch_vcf .map{ diff --git a/subworkflows/local/vcf_phase_shapeit5/tests/main.nf.test b/subworkflows/local/vcf_phase_shapeit5/tests/main.nf.test index 4a95348f..8e38306e 100644 --- a/subworkflows/local/vcf_phase_shapeit5/tests/main.nf.test +++ b/subworkflows/local/vcf_phase_shapeit5/tests/main.nf.test @@ -52,6 +52,7 @@ nextflow_workflow { [[chr: "chr22"],[]], [[chr: "chr21"], []] ) + input[5] = "recursive" """ } } @@ -103,6 +104,7 @@ nextflow_workflow { [ [chr: "chr22"], file(params.pipelines_testdata_base_path + "hum_data/reference_genome/GRCh38_22.map", checkIfExist:true)], [ [chr: "chr21"], file(params.pipelines_testdata_base_path + "hum_data/reference_genome/GRCh38_21.map", checkIfExist:true)] ) + input[5] = "recursive" """ } } diff --git a/workflows/phaseimpute/main.nf b/workflows/phaseimpute/main.nf index 5ae079dc..43941ddf 100644 --- a/workflows/phaseimpute/main.nf +++ b/workflows/phaseimpute/main.nf @@ -171,14 +171,15 @@ workflow PHASEIMPUTE { ch_region, [[],[],[]], [[],[],[]], - ch_map + ch_map, + params.chunk_model ) ch_panel_phased = VCF_PHASE_SHAPEIT5.out.vcf_tbi ch_versions = ch_versions.mix(VCF_PHASE_SHAPEIT5.out.versions) } // Create chunks from reference VCF - VCF_CHUNK_GLIMPSE(ch_panel_phased, ch_map) + VCF_CHUNK_GLIMPSE(ch_panel_phased, ch_map, params.chunk_model) ch_versions = ch_versions.mix(VCF_CHUNK_GLIMPSE.out.versions) // Assign chunks channels From 0722f8ba5cd1247c88db6c270134a354f286d9ae Mon Sep 17 00:00:00 2001 From: LouisBzh Date: Fri, 8 Nov 2024 12:25:39 +0100 Subject: [PATCH 2/6] Fix linting --- nextflow_schema.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 34008bd7..8aa167c2 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -133,7 +133,7 @@ "type": "string", "description": "Whether to generate a binary reference file to be used with GLIMPSE2" }, - "chunk_model" : { + "chunk_model": { "type": "string", "description": "Model type to use for GLIMPSE2_CHUNK", "enum": ["recursive", "sequential", "uniform-number-variants"], From 7d242be192782505a94b02b43ce3e96d547d12cb Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Fri, 8 Nov 2024 14:50:54 +0100 Subject: [PATCH 3/6] Update Changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 940f0cfa..a5f95677 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -58,6 +58,7 @@ Initial release of nf-core/phaseimpute, created with the [nf-core](https://nf-co - [#103](https://github.com/nf-core/phaseimpute/pull/103) - Update Glimpse2 phase, gunzip and multiqc - [#135](https://github.com/nf-core/phaseimpute/pull/135) - Impute by batch of 100 individuals by default using `--batch_size` parameter. All individuals BAM files are gathered and VCF are allowed for glimpse1 and glimpse2. Channel preprocessing of stitch is done in stitch subworkflow. Genotype likelihood computation for glimpse1 is now done outside of the subworkflow and merge the resulting vcf with all the samples. New test added to check batch separation. Improve `usage.md` documentation. Add validation to initialisation of the pipeline to ensure compatibility between tools, steps and the files provided by the user. - [#139](https://github.com/nf-core/phaseimpute/pull/139) - Update all nf-core modules +- [#157](https://github.com/nf-core/phaseimpute/pull/157) - Add `chunk_model` as parameter for better control over `GLIMPSE2_CHUNK`. ### `Fixed` From 80c9d35ed3edf69b0d4c31ec1bfd6be69b77c983 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Fri, 8 Nov 2024 15:03:32 +0100 Subject: [PATCH 4/6] Set chunk window size to 4mb --- CHANGELOG.md | 2 +- conf/steps/panel_prep.config | 15 +++++++++------ 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b670f63a..bbc71090 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -64,7 +64,7 @@ Initial release of nf-core/phaseimpute, created with the [nf-core](https://nf-co - [#148](https://github.com/nf-core/phaseimpute/pull/148) - Fix awsfulltest github action for manual dispatch - [#149](https://github.com/nf-core/phaseimpute/pull/149) - Remove the map file from the awsfulltest - [#152](https://github.com/nf-core/phaseimpute/pull/152) - Fix URLs in the documentation and remove tools citation in the README, use a white background for all images in the documentation. -- [#157](https://github.com/nf-core/phaseimpute/pull/157) - Add `chunk_model` as parameter for better control over `GLIMPSE2_CHUNK`. +- [#157](https://github.com/nf-core/phaseimpute/pull/157) - Add `chunk_model` as parameter for better control over `GLIMPSE2_CHUNK` and set window size in `GLIMPSE1_CHUNK` and `GLIMPSE2_chunk` to 4mb to reduce number of chunks (empirical). ### `Fixed` diff --git a/conf/steps/panel_prep.config b/conf/steps/panel_prep.config index d578817a..3ab60861 100644 --- a/conf/steps/panel_prep.config +++ b/conf/steps/panel_prep.config @@ -41,8 +41,8 @@ process { } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_NORMALIZE_BCFTOOLS:BCFTOOLS_NORM' { - ext.args = ["-m +any", "--no-version", "--output-type z", "--write-index=tbi"].join(' ') - ext.prefix = { "${meta.id}_${meta.chr}_multiallelic" } + ext.args = ["-m +any", "--no-version", "--output-type z", "--write-index=tbi"].join(' ') + ext.prefix = { "${meta.id}_${meta.chr}_multiallelic" } publishDir = [ enabled: false ] } @@ -62,7 +62,7 @@ process { } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_NORMALIZE_BCFTOOLS:VCFLIB_VCFFIXUP' { - ext.prefix = { "${meta.id}_${meta.chr}_fixup" } + ext.prefix = { "${meta.id}_${meta.chr}_fixup" } publishDir = [ path: { "${params.outdir}/prep_panel/panel" }, mode: params.publish_dir_mode, @@ -72,7 +72,7 @@ process { } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_NORMALIZE_BCFTOOLS:BCFTOOLS_INDEX' { - ext.args = "--tbi" + ext.args = "--tbi" publishDir = [ path: { "${params.outdir}/prep_panel/panel" }, mode: params.publish_dir_mode, @@ -88,6 +88,7 @@ process { } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_PHASE_SHAPEIT5:GLIMPSE2_CHUNK' { + ext.args = "--window-mb 4" ext.prefix = { "${meta.id}_chunks" } } @@ -123,7 +124,7 @@ process { } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_SITES_EXTRACT_BCFTOOLS:BCFTOOLS_CONVERT' { - ext.args = {"--haplegendsample ${meta.id}_${meta.chr}"} + ext.args = {"--haplegendsample ${meta.id}_${meta.chr}"} publishDir = [ path: { "${params.outdir}/prep_panel/haplegend/" }, mode: params.publish_dir_mode, @@ -184,6 +185,7 @@ process { } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CHUNK_GLIMPSE:GLIMPSE_CHUNK' { + ext.args = "--window-size 4" ext.prefix = { "${meta.id}_${meta.chr}_chunks_glimpse1" } publishDir = [ path: { "${params.outdir}/prep_panel/chunks/glimpse1/" }, @@ -194,7 +196,8 @@ process { } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CHUNK_GLIMPSE:GLIMPSE2_CHUNK' { - ext.prefix = { "${meta.id}_${meta.chr}_chunks_glimpse2" } + ext.args = "--window-mb 4" + ext.prefix = { "${meta.id}_${meta.chr}_chunks_glimpse2" } publishDir = [ path: { "${params.outdir}/prep_panel/chunks/glimpse2/" }, mode: params.publish_dir_mode, From 857772d4b541c72f0cff65559c99249f6a3fef90 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Sun, 10 Nov 2024 18:31:45 +0100 Subject: [PATCH 5/6] Change chunk model initialisation --- main.nf | 3 +++ .../local/utils_nfcore_phaseimpute_pipeline/main.nf | 7 +++++++ .../utils_nfcore_phaseimpute_pipeline/tests/main.nf.test | 3 +++ workflows/phaseimpute/main.nf | 5 +++-- 4 files changed, 16 insertions(+), 2 deletions(-) diff --git a/main.nf b/main.nf index a5e3fa05..f5303872 100644 --- a/main.nf +++ b/main.nf @@ -43,6 +43,7 @@ workflow NFCORE_PHASEIMPUTE { ch_map // channel: map file for imputation ch_posfile // channel: samplesheet read in from --posfile ch_chunks // channel: samplesheet read in from --chunks + chunk_model // parameter: chunk model ch_versions // channel: versions of software used main: @@ -101,6 +102,7 @@ workflow NFCORE_PHASEIMPUTE { ch_map, ch_posfile, ch_chunks, + chunk_model, ch_versions ) emit: @@ -141,6 +143,7 @@ workflow { PIPELINE_INITIALISATION.out.gmap, PIPELINE_INITIALISATION.out.posfile, PIPELINE_INITIALISATION.out.chunks, + PIPELINE_INITIALISATION.out.chunk_model, PIPELINE_INITIALISATION.out.versions ) // diff --git a/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf b/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf index 54fc07d1..21eab92c 100644 --- a/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf @@ -279,6 +279,9 @@ workflow PIPELINE_INITIALISATION { // Check that all input files have the correct index checkFileIndex(ch_input.mix(ch_input_truth, ch_ref_gen, ch_panel)) + // Chunk model + chunk_model = params.chunk_model + emit: input = ch_input // [ [meta], file, index ] input_truth = ch_input_truth // [ [meta], file, index ] @@ -289,6 +292,7 @@ workflow PIPELINE_INITIALISATION { gmap = ch_map // [ [map], map ] posfile = ch_posfile // [ [panel, chr], vcf, index, hap, legend ] chunks = ch_chunks // [ [chr], txt ] + chunk_model = chunk_model versions = ch_versions } @@ -407,6 +411,9 @@ def validateInputParameters() { error("To use `--remove_samples` you need to include `--normalize`.") } } + + // Check that the chunk model is provided + assert params.chunk_model : "No chunk model provided" } // diff --git a/subworkflows/local/utils_nfcore_phaseimpute_pipeline/tests/main.nf.test b/subworkflows/local/utils_nfcore_phaseimpute_pipeline/tests/main.nf.test index 238bc53c..2c7045c9 100644 --- a/subworkflows/local/utils_nfcore_phaseimpute_pipeline/tests/main.nf.test +++ b/subworkflows/local/utils_nfcore_phaseimpute_pipeline/tests/main.nf.test @@ -17,6 +17,7 @@ nextflow_workflow { chunks = "../../../tests/csv/chunks.csv" posfile = "../../../tests/csv/posfile.csv" panel = "../../../tests/csv/panel.csv" + chunk_model = "recursive" } workflow { """ @@ -49,6 +50,7 @@ nextflow_workflow { posfile = "../../../tests/csv/posfile.csv" panel = "../../../tests/csv/panel.csv" input_region = "$moduleTestDir/region.csv" + chunk_model = "sequential" } workflow { """ @@ -80,6 +82,7 @@ nextflow_workflow { chunks = "../../../tests/csv/chunks.csv" panel = "../../../tests/csv/panel.csv" input_region = "$moduleTestDir/region.csv" + chunk_model = "recursive" } workflow { """ diff --git a/workflows/phaseimpute/main.nf b/workflows/phaseimpute/main.nf index 43941ddf..fe4c1fa9 100644 --- a/workflows/phaseimpute/main.nf +++ b/workflows/phaseimpute/main.nf @@ -91,6 +91,7 @@ workflow PHASEIMPUTE { ch_map // channel: genetic map [ [chr], map] ch_posfile // channel: posfile [ [id, chr], vcf, index, hap, legend] ch_chunks // channel: chunks [ [chr], txt] + chunk_model // parameter: chunk model ch_versions // channel: versions of software used main: @@ -172,14 +173,14 @@ workflow PHASEIMPUTE { [[],[],[]], [[],[],[]], ch_map, - params.chunk_model + chunk_model ) ch_panel_phased = VCF_PHASE_SHAPEIT5.out.vcf_tbi ch_versions = ch_versions.mix(VCF_PHASE_SHAPEIT5.out.versions) } // Create chunks from reference VCF - VCF_CHUNK_GLIMPSE(ch_panel_phased, ch_map, params.chunk_model) + VCF_CHUNK_GLIMPSE(ch_panel_phased, ch_map, chunk_model) ch_versions = ch_versions.mix(VCF_CHUNK_GLIMPSE.out.versions) // Assign chunks channels From e34c5a2aff4e1872e08aad52674bf13a1144ee82 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Sun, 10 Nov 2024 19:19:31 +0100 Subject: [PATCH 6/6] Add nf-test for chunk model --- nextflow_schema.json | 2 +- .../vcf_chunk_glimpse/tests/main.nf.test | 63 +++- .../vcf_chunk_glimpse/tests/main.nf.test.snap | 333 ++++++++++++++++-- .../vcf_chunk_glimpse/tests/nextflow.config | 5 + subworkflows/local/vcf_phase_shapeit5/main.nf | 3 - .../vcf_phase_shapeit5/tests/main.nf.test | 65 +++- .../tests/main.nf.test.snap | 90 ++++- .../vcf_phase_shapeit5/tests/nextflow.config | 2 +- 8 files changed, 497 insertions(+), 66 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 96b6f8e1..3beb8382 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -136,7 +136,7 @@ "chunk_model": { "type": "string", "description": "Model type to use for GLIMPSE2_CHUNK", - "enum": ["recursive", "sequential", "uniform-number-variants"], + "enum": ["recursive", "sequential"], "default": "sequential", "hidden": true } diff --git a/subworkflows/local/vcf_chunk_glimpse/tests/main.nf.test b/subworkflows/local/vcf_chunk_glimpse/tests/main.nf.test index f25d7a05..9be51fbf 100644 --- a/subworkflows/local/vcf_chunk_glimpse/tests/main.nf.test +++ b/subworkflows/local/vcf_chunk_glimpse/tests/main.nf.test @@ -55,10 +55,12 @@ nextflow_workflow { then { assertAll( { assert workflow.success }, - { assert snapshot(workflow.out).match() }, - { assert snapshot(workflow.out.chunks.collect{ - path(it[1]).readLines() - }).match("chunksWithMap") + { assert snapshot( + workflow.out, + workflow.out.chunks.collect{ + path(it[1]).readLines() + } + ).match() } ) } @@ -96,10 +98,55 @@ nextflow_workflow { then { assertAll( { assert workflow.success }, - { assert snapshot(workflow.out).match() }, - { assert snapshot(workflow.out.chunks.collect{ - path(it[1]).readLines() - }).match("chunksWithoutMap") + { assert snapshot( + workflow.out, + workflow.out.chunks.collect{ + path(it[1]).readLines() + } + ).match() + } + ) + } + } + + test("Chunks with sequential model") { + when { + params { + max_cpus = 2 + max_memory = '2.GB' + } + workflow { + """ + input[0] = Channel.of( + [ + [id: "1000GP", chr: "chr22"], + file(params.pipelines_testdata_base_path + "hum_data/panel/chr22/1000GP.chr22.s.norel.vcf.gz", checkIfExist:true), + file(params.pipelines_testdata_base_path + "hum_data/panel/chr22/1000GP.chr22.s.norel.vcf.gz.csi", checkIfExist:true), + ], + [ + [id: "1000GP", chr: "chr21"], + file(params.pipelines_testdata_base_path + "hum_data/panel/chr21/1000GP.chr21.s.norel.vcf.gz", checkIfExist:true), + file(params.pipelines_testdata_base_path + "hum_data/panel/chr21/1000GP.chr21.s.norel.vcf.gz.csi", checkIfExist:true), + ] + ) + input[1] = Channel.of( + [[chr: "chr22"], []], + [[chr: "chr21"], []] + ) + input[2] = "sequential" + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out, + workflow.out.chunks.collect{ + path(it[1]).readLines() + } + ).match() } ) } diff --git a/subworkflows/local/vcf_chunk_glimpse/tests/main.nf.test.snap b/subworkflows/local/vcf_chunk_glimpse/tests/main.nf.test.snap index c6a21882..90a62932 100644 --- a/subworkflows/local/vcf_chunk_glimpse/tests/main.nf.test.snap +++ b/subworkflows/local/vcf_chunk_glimpse/tests/main.nf.test.snap @@ -1,23 +1,4 @@ { - "chunksWithoutMap": { - "content": [ - [ - [ - "0\tchr21\tchr21:16570070-16595525\tchr21:16570070-16590513\t20444\t419", - "1\tchr21\tchr21:16585483-16609998\tchr21:16590521-16609998\t19478\t417" - ], - [ - "0\tchr22\tchr22:16570065-16597215\tchr22:16570065-16592216\t22152\t452", - "1\tchr22\tchr22:16587172-16609999\tchr22:16592229-16609999\t17771\t451" - ] - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-07-18T11:26:35.422657952" - }, "Chunks without Map": { "content": [ { @@ -289,13 +270,23 @@ "versions.yml:md5,1ae67fb38ed979f02f47d5ecb8a85f0d", "versions.yml:md5,7d277747b107043dd31d3aef18045eef" ] - } + }, + [ + [ + "0\tchr21\tchr21:16570070-16595525\tchr21:16570070-16590513\t20444\t419", + "1\tchr21\tchr21:16585483-16609998\tchr21:16590521-16609998\t19478\t417" + ], + [ + "0\tchr22\tchr22:16570065-16597215\tchr22:16570065-16592216\t22152\t452", + "1\tchr22\tchr22:16587172-16609999\tchr22:16592229-16609999\t17771\t451" + ] + ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.2", + "nextflow": "24.10.0" }, - "timestamp": "2024-07-18T11:26:34.820936365" + "timestamp": "2024-11-10T18:36:03.086912114" }, "Chunks with Map": { "content": [ @@ -536,16 +527,296 @@ "versions.yml:md5,1ae67fb38ed979f02f47d5ecb8a85f0d", "versions.yml:md5,7d277747b107043dd31d3aef18045eef" ] - } + }, + [ + [ + "0\tchr21\tchr21:16570070-16595525\tchr21:16570070-16590513\t20444\t419", + "1\tchr21\tchr21:16585483-16609998\tchr21:16590521-16609998\t19478\t417" + ], + [ + "0\tchr22\tchr22:16570065-16597215\tchr22:16570065-16592216\t22152\t452", + "1\tchr22\tchr22:16587172-16609999\tchr22:16592229-16609999\t17771\t451" + ] + ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.2", + "nextflow": "24.10.0" }, - "timestamp": "2024-07-18T11:26:24.155279577" + "timestamp": "2024-11-10T18:35:46.374947355" }, - "chunksWithMap": { + "Chunks with sequential model": { "content": [ + { + "0": [ + [ + { + "id": "1000GP", + "chr": "chr21" + }, + "1000GP_chr21.txt:md5,64ca4a1655363e4a4e558836e5ac12f9" + ], + [ + { + "id": "1000GP", + "chr": "chr22" + }, + "1000GP_chr22.txt:md5,3344e171251722cf58ae31136da223ac" + ] + ], + "1": [ + [ + { + "id": "1000GP", + "chr": "chr21" + }, + "chr21", + "16570070", + "16595525" + ], + [ + { + "id": "1000GP", + "chr": "chr21" + }, + "chr21", + "16585483", + "16609998" + ], + [ + { + "id": "1000GP", + "chr": "chr22" + }, + "chr22", + "16570065", + "16597215" + ], + [ + { + "id": "1000GP", + "chr": "chr22" + }, + "chr22", + "16587172", + "16609999" + ] + ], + "2": [ + [ + { + "id": "1000GP", + "chr": "chr21" + }, + "chr21:16570070-16595525", + "chr21:16570070-16590513" + ], + [ + { + "id": "1000GP", + "chr": "chr21" + }, + "chr21:16585483-16609998", + "chr21:16590521-16609998" + ], + [ + { + "id": "1000GP", + "chr": "chr22" + }, + "chr22:16570065-16597215", + "chr22:16570065-16592216" + ], + [ + { + "id": "1000GP", + "chr": "chr22" + }, + "chr22:16587172-16609999", + "chr22:16592229-16609999" + ] + ], + "3": [ + [ + { + "id": "1000GP", + "chr": "chr21" + }, + "chr21:1-16609998", + "chr21:1-16590145" + ], + [ + { + "id": "1000GP", + "chr": "chr21" + }, + "chr21:16570070-1248956422", + "chr21:16590146-1248956422" + ], + [ + { + "id": "1000GP", + "chr": "chr22" + }, + "chr22:1-16609999", + "chr22:1-16590520" + ], + [ + { + "id": "1000GP", + "chr": "chr22" + }, + "chr22:16570065-1248956422", + "chr22:16590521-1248956422" + ] + ], + "4": [ + [ + [ + + ] + ] + ], + "5": [ + "versions.yml:md5,1ae67fb38ed979f02f47d5ecb8a85f0d", + "versions.yml:md5,1ae67fb38ed979f02f47d5ecb8a85f0d", + "versions.yml:md5,7d277747b107043dd31d3aef18045eef" + ], + "binary": [ + [ + [ + + ] + ] + ], + "chunks": [ + [ + { + "id": "1000GP", + "chr": "chr21" + }, + "1000GP_chr21.txt:md5,64ca4a1655363e4a4e558836e5ac12f9" + ], + [ + { + "id": "1000GP", + "chr": "chr22" + }, + "1000GP_chr22.txt:md5,3344e171251722cf58ae31136da223ac" + ] + ], + "chunks_glimpse1": [ + [ + { + "id": "1000GP", + "chr": "chr21" + }, + "chr21:16570070-16595525", + "chr21:16570070-16590513" + ], + [ + { + "id": "1000GP", + "chr": "chr21" + }, + "chr21:16585483-16609998", + "chr21:16590521-16609998" + ], + [ + { + "id": "1000GP", + "chr": "chr22" + }, + "chr22:16570065-16597215", + "chr22:16570065-16592216" + ], + [ + { + "id": "1000GP", + "chr": "chr22" + }, + "chr22:16587172-16609999", + "chr22:16592229-16609999" + ] + ], + "chunks_glimpse2": [ + [ + { + "id": "1000GP", + "chr": "chr21" + }, + "chr21:1-16609998", + "chr21:1-16590145" + ], + [ + { + "id": "1000GP", + "chr": "chr21" + }, + "chr21:16570070-1248956422", + "chr21:16590146-1248956422" + ], + [ + { + "id": "1000GP", + "chr": "chr22" + }, + "chr22:1-16609999", + "chr22:1-16590520" + ], + [ + { + "id": "1000GP", + "chr": "chr22" + }, + "chr22:16570065-1248956422", + "chr22:16590521-1248956422" + ] + ], + "chunks_quilt": [ + [ + { + "id": "1000GP", + "chr": "chr21" + }, + "chr21", + "16570070", + "16595525" + ], + [ + { + "id": "1000GP", + "chr": "chr21" + }, + "chr21", + "16585483", + "16609998" + ], + [ + { + "id": "1000GP", + "chr": "chr22" + }, + "chr22", + "16570065", + "16597215" + ], + [ + { + "id": "1000GP", + "chr": "chr22" + }, + "chr22", + "16587172", + "16609999" + ] + ], + "versions": [ + "versions.yml:md5,1ae67fb38ed979f02f47d5ecb8a85f0d", + "versions.yml:md5,1ae67fb38ed979f02f47d5ecb8a85f0d", + "versions.yml:md5,7d277747b107043dd31d3aef18045eef" + ] + }, [ [ "0\tchr21\tchr21:16570070-16595525\tchr21:16570070-16590513\t20444\t419", @@ -558,9 +829,9 @@ ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.2", + "nextflow": "24.10.0" }, - "timestamp": "2024-07-18T11:26:24.609554836" + "timestamp": "2024-11-10T18:36:16.974727308" } } \ No newline at end of file diff --git a/subworkflows/local/vcf_chunk_glimpse/tests/nextflow.config b/subworkflows/local/vcf_chunk_glimpse/tests/nextflow.config index 3f3e8b9e..5a412491 100644 --- a/subworkflows/local/vcf_chunk_glimpse/tests/nextflow.config +++ b/subworkflows/local/vcf_chunk_glimpse/tests/nextflow.config @@ -1,4 +1,9 @@ process { + resourceLimits = [ + memory : "2.GB", + cpus : 2, + time : "1h" + ] withName: GLIMPSE2_CHUNK { ext.args = ["--window-mb 0.01", "--window-cm 0.01", "--window-count 200", "--buffer-mb 0.005", "--buffer-cm 0.005", "--buffer-count 30"].join(' ') ext.prefix = { "${meta.id}_${meta.chr}" } diff --git a/subworkflows/local/vcf_phase_shapeit5/main.nf b/subworkflows/local/vcf_phase_shapeit5/main.nf index 50b0c017..7bf0e7d2 100644 --- a/subworkflows/local/vcf_phase_shapeit5/main.nf +++ b/subworkflows/local/vcf_phase_shapeit5/main.nf @@ -41,9 +41,6 @@ workflow VCF_PHASE_SHAPEIT5 { ) .map { metaIC, it -> [metaIC, it["RegionBuf"], it["RegionCnk"]]} - ch_chunks_number = GLIMPSE2_CHUNK.out.chunk_chr - .map { meta, chunk -> [meta.subMap("chr"), chunk.countLines().intValue()]} - ch_phase_input = ch_vcf .combine(ch_chunks_glimpse2, by:0) .map{ diff --git a/subworkflows/local/vcf_phase_shapeit5/tests/main.nf.test b/subworkflows/local/vcf_phase_shapeit5/tests/main.nf.test index 8e38306e..a2f855ac 100644 --- a/subworkflows/local/vcf_phase_shapeit5/tests/main.nf.test +++ b/subworkflows/local/vcf_phase_shapeit5/tests/main.nf.test @@ -20,7 +20,7 @@ nextflow_workflow { tag "bcftools" tag "bcftools/index" - test("Phase vcf with regions, no map, no ref, no scaffold") { + test("Phase vcf with regions, no map, no ref, no scaffold, recursive model") { when { params { max_cpus = 2 @@ -60,10 +60,64 @@ nextflow_workflow { then { assertAll( { assert workflow.success }, - { assert snapshot(workflow.out).match() }, - { assert snapshot(workflow.out.vcf_tbi.collect{ - path(it[1]).vcf.summary - }).match("Phasing content") + { assert snapshot( + workflow.out, + workflow.out.vcf_tbi.collect{ + path(it[1]).vcf.summary + }).match() + }, + { workflow.out.vcf_tbi.collect{ + assert path(it[1]).vcf.phased + }} + ) + } + } + + test("Phase vcf with regions, no map, no ref, no scaffold, sequential model") { + when { + params { + max_cpus = 2 + max_memory = '2.GB' + } + workflow { + """ + input[0] = Channel.of( + [ + [id: "1000GP", chr: "chr22"], + file(params.pipelines_testdata_base_path + "hum_data/panel/chr22/1000GP.chr22.s.norel.vcf.gz", checkIfExist:true), + file(params.pipelines_testdata_base_path + "hum_data/panel/chr22/1000GP.chr22.s.norel.vcf.gz.csi", checkIfExist:true), + [] + ], + [ + [id: "1000GP", chr: "chr21"], + file(params.pipelines_testdata_base_path + "hum_data/panel/chr21/1000GP.chr21.s.norel.vcf.gz", checkIfExist:true), + file(params.pipelines_testdata_base_path + "hum_data/panel/chr21/1000GP.chr21.s.norel.vcf.gz.csi", checkIfExist:true), + [] + ] + ) + input[1] = Channel.of( + [[chr: "chr22", region:"chr22:16570000-16610000"], "chr22:16570000-16610000"], + [[chr: "chr21", region:"chr21:16570000-16610000"], "chr21:16570000-16610000"] + ) + input[2] = Channel.of([[],[],[]]).collect() + input[3] = Channel.of([[],[],[]]).collect() + input[4] = Channel.of( + [[chr: "chr22"],[]], + [[chr: "chr21"], []] + ) + input[5] = "sequential" + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out, + workflow.out.vcf_tbi.collect{ + path(it[1]).vcf.summary + }).match() }, { workflow.out.vcf_tbi.collect{ assert path(it[1]).vcf.phased @@ -71,6 +125,7 @@ nextflow_workflow { ) } } + /* TODO: Fix this test with https://github.com/odelaneau/shapeit5/issues/96 test("Phase vcf with regions, with map, no ref, no scaffold") { when { diff --git a/subworkflows/local/vcf_phase_shapeit5/tests/main.nf.test.snap b/subworkflows/local/vcf_phase_shapeit5/tests/main.nf.test.snap index ffa1c169..bb7e9f9d 100644 --- a/subworkflows/local/vcf_phase_shapeit5/tests/main.nf.test.snap +++ b/subworkflows/local/vcf_phase_shapeit5/tests/main.nf.test.snap @@ -1,18 +1,70 @@ { - "Phasing content": { + "Phase vcf with regions, no map, no ref, no scaffold, sequential model": { "content": [ + { + "0": [ + [ + { + "id": "1000GP", + "chr": "chr21" + }, + "1000GP.vcf.gz:md5,4029303e3c083ebb2522fb5c8dc4b63a", + "1000GP.vcf.gz.csi:md5,c57057d136f6c859aac4e6ae28ec793b" + ], + [ + { + "id": "1000GP", + "chr": "chr22" + }, + "1000GP.vcf.gz:md5,23d09ba884eda7449702fece3f652d9d", + "1000GP.vcf.gz.csi:md5,281791c87517a6f3e83c3fd736ec704e" + ] + ], + "1": [ + "versions.yml:md5,529c03b8d921c72026e91d71c0321811", + "versions.yml:md5,714bb0db6e2d39cf0042359a64915bc6", + "versions.yml:md5,8ffcda8a9d22f60c90d0e4276da3e714", + "versions.yml:md5,cab7592ebcb3d391afcd3191a175723b", + "versions.yml:md5,ed131d2608f28f8ada06ccc42717575e" + ], + "vcf_tbi": [ + [ + { + "id": "1000GP", + "chr": "chr21" + }, + "1000GP.vcf.gz:md5,4029303e3c083ebb2522fb5c8dc4b63a", + "1000GP.vcf.gz.csi:md5,c57057d136f6c859aac4e6ae28ec793b" + ], + [ + { + "id": "1000GP", + "chr": "chr22" + }, + "1000GP.vcf.gz:md5,23d09ba884eda7449702fece3f652d9d", + "1000GP.vcf.gz.csi:md5,281791c87517a6f3e83c3fd736ec704e" + ] + ], + "versions": [ + "versions.yml:md5,529c03b8d921c72026e91d71c0321811", + "versions.yml:md5,714bb0db6e2d39cf0042359a64915bc6", + "versions.yml:md5,8ffcda8a9d22f60c90d0e4276da3e714", + "versions.yml:md5,cab7592ebcb3d391afcd3191a175723b", + "versions.yml:md5,ed131d2608f28f8ada06ccc42717575e" + ] + }, [ "VcfFile [chromosomes=[chr21], sampleCount=3196, variantCount=836, phased=true, phasedAutodetect=true]", "VcfFile [chromosomes=[chr22], sampleCount=3196, variantCount=903, phased=true, phasedAutodetect=true]" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.2", + "nextflow": "24.10.0" }, - "timestamp": "2024-07-18T15:56:20.067633938" + "timestamp": "2024-11-10T19:18:52.508986134" }, - "Phase vcf with regions, no map, no ref, no scaffold": { + "Phase vcf with regions, no map, no ref, no scaffold, recursive model": { "content": [ { "0": [ @@ -21,16 +73,16 @@ "id": "1000GP", "chr": "chr21" }, - "1000GP.vcf.gz:md5,a8af1c991fb1ca9e0919e0d5ddae8968", - "1000GP.vcf.gz.csi:md5,b4c653bd398bb838b496e022705bb562" + "1000GP.vcf.gz:md5,d454c821a71b7c569540e381068fbe03", + "1000GP.vcf.gz.csi:md5,13e80ee9a038a715efd6f97befec28c6" ], [ { "id": "1000GP", "chr": "chr22" }, - "1000GP.vcf.gz:md5,106157dc553c3c19eebdce0567e69d4f", - "1000GP.vcf.gz.csi:md5,58026cf302bd82382a078c5690429b5c" + "1000GP.vcf.gz:md5,68e488b81ea8ca52a52b20fec603bf3e", + "1000GP.vcf.gz.csi:md5,98037a25e4112f8a4df62eba96d08634" ] ], "1": [ @@ -46,16 +98,16 @@ "id": "1000GP", "chr": "chr21" }, - "1000GP.vcf.gz:md5,a8af1c991fb1ca9e0919e0d5ddae8968", - "1000GP.vcf.gz.csi:md5,b4c653bd398bb838b496e022705bb562" + "1000GP.vcf.gz:md5,d454c821a71b7c569540e381068fbe03", + "1000GP.vcf.gz.csi:md5,13e80ee9a038a715efd6f97befec28c6" ], [ { "id": "1000GP", "chr": "chr22" }, - "1000GP.vcf.gz:md5,106157dc553c3c19eebdce0567e69d4f", - "1000GP.vcf.gz.csi:md5,58026cf302bd82382a078c5690429b5c" + "1000GP.vcf.gz:md5,68e488b81ea8ca52a52b20fec603bf3e", + "1000GP.vcf.gz.csi:md5,98037a25e4112f8a4df62eba96d08634" ] ], "versions": [ @@ -65,12 +117,16 @@ "versions.yml:md5,cab7592ebcb3d391afcd3191a175723b", "versions.yml:md5,ed131d2608f28f8ada06ccc42717575e" ] - } + }, + [ + "VcfFile [chromosomes=[chr21], sampleCount=3196, variantCount=836, phased=true, phasedAutodetect=true]", + "VcfFile [chromosomes=[chr22], sampleCount=3196, variantCount=903, phased=true, phasedAutodetect=true]" + ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.2", + "nextflow": "24.10.0" }, - "timestamp": "2024-07-22T15:46:34.727932091" + "timestamp": "2024-11-10T19:18:20.407509527" } } \ No newline at end of file diff --git a/subworkflows/local/vcf_phase_shapeit5/tests/nextflow.config b/subworkflows/local/vcf_phase_shapeit5/tests/nextflow.config index 8817d417..c0c00d6f 100644 --- a/subworkflows/local/vcf_phase_shapeit5/tests/nextflow.config +++ b/subworkflows/local/vcf_phase_shapeit5/tests/nextflow.config @@ -1,7 +1,7 @@ process { withName: GLIMPSE2_CHUNK { ext.prefix = { "${meta.id}_chunks" } - ext.args = ["--window-mb 0.1", "--window-cm 0.1", "--window-count 2000", "--buffer-mb 0.05", "--buffer-cm 0.05", "--buffer-count 300"].join(' ') + ext.args = ["--window-mb 0.01", "--window-cm 0.01", "--window-count 200", "--buffer-mb 0.005", "--buffer-cm 0.005", "--buffer-count 30"].join(' ') } withName: SHAPEIT5_PHASECOMMON {