Skip to content

Commit

Permalink
Merge pull request #157 from LouisLeNezet/chunk_model
Browse files Browse the repository at this point in the history
Add chunk_model as parameter
  • Loading branch information
LouisLeNezet authored Nov 10, 2024
2 parents fe2213c + e34c5a2 commit 06d6b60
Show file tree
Hide file tree
Showing 19 changed files with 544 additions and 83 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ Initial release of nf-core/phaseimpute, created with the [nf-core](https://nf-co
- [#148](https://github.com/nf-core/phaseimpute/pull/148) - Fix awsfulltest github action for manual dispatch
- [#149](https://github.com/nf-core/phaseimpute/pull/149) - Remove the map file from the awsfulltest
- [#152](https://github.com/nf-core/phaseimpute/pull/152) - Fix URLs in the documentation and remove tools citation in the README, use a white background for all images in the documentation.
- [#157](https://github.com/nf-core/phaseimpute/pull/157) - Add `chunk_model` as parameter for better control over `GLIMPSE2_CHUNK` and set window size in `GLIMPSE1_CHUNK` and `GLIMPSE2_chunk` to 4mb to reduce number of chunks (empirical).

### `Fixed`

Expand Down
15 changes: 9 additions & 6 deletions conf/steps/panel_prep.config
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@ process {
}

withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_NORMALIZE_BCFTOOLS:BCFTOOLS_NORM' {
ext.args = ["-m +any", "--no-version", "--output-type z", "--write-index=tbi"].join(' ')
ext.prefix = { "${meta.id}_${meta.chr}_multiallelic" }
ext.args = ["-m +any", "--no-version", "--output-type z", "--write-index=tbi"].join(' ')
ext.prefix = { "${meta.id}_${meta.chr}_multiallelic" }
publishDir = [ enabled: false ]
}

Expand All @@ -62,7 +62,7 @@ process {
}

withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_NORMALIZE_BCFTOOLS:VCFLIB_VCFFIXUP' {
ext.prefix = { "${meta.id}_${meta.chr}_fixup" }
ext.prefix = { "${meta.id}_${meta.chr}_fixup" }
publishDir = [
path: { "${params.outdir}/prep_panel/panel" },
mode: params.publish_dir_mode,
Expand All @@ -72,7 +72,7 @@ process {
}

withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_NORMALIZE_BCFTOOLS:BCFTOOLS_INDEX' {
ext.args = "--tbi"
ext.args = "--tbi"
publishDir = [
path: { "${params.outdir}/prep_panel/panel" },
mode: params.publish_dir_mode,
Expand All @@ -88,6 +88,7 @@ process {
}

withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_PHASE_SHAPEIT5:GLIMPSE2_CHUNK' {
ext.args = "--window-mb 4"
ext.prefix = { "${meta.id}_chunks" }
}

Expand Down Expand Up @@ -123,7 +124,7 @@ process {
}

withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_SITES_EXTRACT_BCFTOOLS:BCFTOOLS_CONVERT' {
ext.args = {"--haplegendsample ${meta.id}_${meta.chr}"}
ext.args = {"--haplegendsample ${meta.id}_${meta.chr}"}
publishDir = [
path: { "${params.outdir}/prep_panel/haplegend/" },
mode: params.publish_dir_mode,
Expand Down Expand Up @@ -184,6 +185,7 @@ process {
}

withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CHUNK_GLIMPSE:GLIMPSE_CHUNK' {
ext.args = "--window-size 4"
ext.prefix = { "${meta.id}_${meta.chr}_chunks_glimpse1" }
publishDir = [
path: { "${params.outdir}/prep_panel/chunks/glimpse1/" },
Expand All @@ -194,7 +196,8 @@ process {
}

withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CHUNK_GLIMPSE:GLIMPSE2_CHUNK' {
ext.prefix = { "${meta.id}_${meta.chr}_chunks_glimpse2" }
ext.args = "--window-mb 4"
ext.prefix = { "${meta.id}_${meta.chr}_chunks_glimpse2" }
publishDir = [
path: { "${params.outdir}/prep_panel/chunks/glimpse2/" },
mode: params.publish_dir_mode,
Expand Down
1 change: 1 addition & 0 deletions conf/test_all.config
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ params {
phase = true
normalize = true
compute_freq = false
chunk_model = "recursive"

// Pipeline steps
steps = "all"
Expand Down
1 change: 1 addition & 0 deletions conf/test_dog.config
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ params {
normalize = false
compute_freq = false
rename_chr = true
chunk_model = "recursive"

// Input data
input = params.pipelines_testdata_base_path + "dog_data/csv/sample_dog.csv"
Expand Down
1 change: 1 addition & 0 deletions conf/test_panelprep.config
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ params {
normalize = true
compute_freq = true
remove_samples = "HG00096,HG00097,HG00099,HG00100"
chunk_model = "recursive"

// Pipeline steps
steps = "panelprep"
Expand Down
3 changes: 3 additions & 0 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ workflow NFCORE_PHASEIMPUTE {
ch_map // channel: map file for imputation
ch_posfile // channel: samplesheet read in from --posfile
ch_chunks // channel: samplesheet read in from --chunks
chunk_model // parameter: chunk model
ch_versions // channel: versions of software used

main:
Expand Down Expand Up @@ -101,6 +102,7 @@ workflow NFCORE_PHASEIMPUTE {
ch_map,
ch_posfile,
ch_chunks,
chunk_model,
ch_versions
)
emit:
Expand Down Expand Up @@ -141,6 +143,7 @@ workflow {
PIPELINE_INITIALISATION.out.gmap,
PIPELINE_INITIALISATION.out.posfile,
PIPELINE_INITIALISATION.out.chunks,
PIPELINE_INITIALISATION.out.chunk_model,
PIPELINE_INITIALISATION.out.versions
)
//
Expand Down
3 changes: 2 additions & 1 deletion nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
params {

// steps
steps = null
steps = null

// Input options
input = null
Expand All @@ -24,6 +24,7 @@ params {
normalize = true
compute_freq = false
remove_samples = null
chunk_model = 'sequential'

// ChrCheck parameters
rename_chr = false
Expand Down
7 changes: 7 additions & 0 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,13 @@
"binaryref": {
"type": "string",
"description": "Whether to generate a binary reference file to be used with GLIMPSE2"
},
"chunk_model": {
"type": "string",
"description": "Model type to use for GLIMPSE2_CHUNK",
"enum": ["recursive", "sequential"],
"default": "sequential",
"hidden": true
}
}
},
Expand Down
7 changes: 7 additions & 0 deletions subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,9 @@ workflow PIPELINE_INITIALISATION {
// Check that all input files have the correct index
checkFileIndex(ch_input.mix(ch_input_truth, ch_ref_gen, ch_panel))

// Chunk model
chunk_model = params.chunk_model

emit:
input = ch_input // [ [meta], file, index ]
input_truth = ch_input_truth // [ [meta], file, index ]
Expand All @@ -289,6 +292,7 @@ workflow PIPELINE_INITIALISATION {
gmap = ch_map // [ [map], map ]
posfile = ch_posfile // [ [panel, chr], vcf, index, hap, legend ]
chunks = ch_chunks // [ [chr], txt ]
chunk_model = chunk_model
versions = ch_versions
}

Expand Down Expand Up @@ -407,6 +411,9 @@ def validateInputParameters() {
error("To use `--remove_samples` you need to include `--normalize`.")
}
}

// Check that the chunk model is provided
assert params.chunk_model : "No chunk model provided"
}

//
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ nextflow_workflow {
chunks = "../../../tests/csv/chunks.csv"
posfile = "../../../tests/csv/posfile.csv"
panel = "../../../tests/csv/panel.csv"
chunk_model = "recursive"
}
workflow {
"""
Expand Down Expand Up @@ -49,6 +50,7 @@ nextflow_workflow {
posfile = "../../../tests/csv/posfile.csv"
panel = "../../../tests/csv/panel.csv"
input_region = "$moduleTestDir/region.csv"
chunk_model = "sequential"
}
workflow {
"""
Expand Down Expand Up @@ -80,6 +82,7 @@ nextflow_workflow {
chunks = "../../../tests/csv/chunks.csv"
panel = "../../../tests/csv/panel.csv"
input_region = "$moduleTestDir/region.csv"
chunk_model = "recursive"
}
workflow {
"""
Expand Down
4 changes: 1 addition & 3 deletions subworkflows/local/vcf_chunk_glimpse/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ workflow VCF_CHUNK_GLIMPSE {
take:
ch_reference // channel: [ [panel, chr], vcf, csi ]
ch_map // channel (optional): [ [chr], map ]
chunk_model // channel : model

main:

Expand Down Expand Up @@ -36,9 +37,6 @@ workflow VCF_CHUNK_GLIMPSE {
)
.map { metaPC, it -> [metaPC, it["RegionIn"], it["RegionOut"]]}

// Make chunks with Glimpse2 (does not work with "sequential" mode)
chunk_model = "recursive"

ch_input_glimpse2 = ch_vcf_csi_chr
.map{
metaPC, vcf, csi, chr -> [metaPC.subMap("chr"), metaPC, vcf, csi, chr]
Expand Down
65 changes: 57 additions & 8 deletions subworkflows/local/vcf_chunk_glimpse/tests/main.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -47,17 +47,20 @@ nextflow_workflow {
file(params.pipelines_testdata_base_path + "hum_data/reference_genome/GRCh38_21.map", checkIfExist:true)
]
)
input[2] = "recursive"
"""
}
}

then {
assertAll(
{ assert workflow.success },
{ assert snapshot(workflow.out).match() },
{ assert snapshot(workflow.out.chunks.collect{
path(it[1]).readLines()
}).match("chunksWithMap")
{ assert snapshot(
workflow.out,
workflow.out.chunks.collect{
path(it[1]).readLines()
}
).match()
}
)
}
Expand Down Expand Up @@ -87,17 +90,63 @@ nextflow_workflow {
[[chr: "chr22"], []],
[[chr: "chr21"], []]
)
input[2] = "recursive"
"""
}
}

then {
assertAll(
{ assert workflow.success },
{ assert snapshot(workflow.out).match() },
{ assert snapshot(workflow.out.chunks.collect{
path(it[1]).readLines()
}).match("chunksWithoutMap")
{ assert snapshot(
workflow.out,
workflow.out.chunks.collect{
path(it[1]).readLines()
}
).match()
}
)
}
}

test("Chunks with sequential model") {
when {
params {
max_cpus = 2
max_memory = '2.GB'
}
workflow {
"""
input[0] = Channel.of(
[
[id: "1000GP", chr: "chr22"],
file(params.pipelines_testdata_base_path + "hum_data/panel/chr22/1000GP.chr22.s.norel.vcf.gz", checkIfExist:true),
file(params.pipelines_testdata_base_path + "hum_data/panel/chr22/1000GP.chr22.s.norel.vcf.gz.csi", checkIfExist:true),
],
[
[id: "1000GP", chr: "chr21"],
file(params.pipelines_testdata_base_path + "hum_data/panel/chr21/1000GP.chr21.s.norel.vcf.gz", checkIfExist:true),
file(params.pipelines_testdata_base_path + "hum_data/panel/chr21/1000GP.chr21.s.norel.vcf.gz.csi", checkIfExist:true),
]
)
input[1] = Channel.of(
[[chr: "chr22"], []],
[[chr: "chr21"], []]
)
input[2] = "sequential"
"""
}
}

then {
assertAll(
{ assert workflow.success },
{ assert snapshot(
workflow.out,
workflow.out.chunks.collect{
path(it[1]).readLines()
}
).match()
}
)
}
Expand Down
Loading

0 comments on commit 06d6b60

Please sign in to comment.