diff --git a/CHANGELOG.md b/CHANGELOG.md index 6cea3088..3a1b8d40 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -74,6 +74,7 @@ Special thanks to [Matthias Hörtenhuber](https://github.com/mashehu) and [Mazza - [#117](https://github.com/nf-core/phaseimpute/pull/117) - Fix directories in CSV. - [#151](https://github.com/nf-core/phaseimpute/pull/151) - Fix `Type not supported: class org.codehaus.groovy.runtime.GStringImpl` error due to `String` test in `getFileExtension()`. - [#153](https://github.com/nf-core/phaseimpute/pull/153) - Fix getFileExtension function. Fix image in `usage.md`. Fix small warnings and errors with updated language server. `def` has been added when necessary, `:` use instead of `,` in assertions, `_` added to variables not used in closures, `for` loop replaced by `.each{}`, remove unused code / input. +- [#161](https://github.com/nf-core/phaseimpute/pull/161) - Fix `VCF_SPLIT_BCFTOOLS` when only one sample present by updating `BCFTOOLS_PLUGINSPLIT` and adding `BCFTOOLS_QUERY` to get truth samples names for renaming the resulting files. ### `Dependencies` diff --git a/conf/modules.config b/conf/modules.config index 693570cf..65d412dd 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -88,7 +88,6 @@ process { withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_SPLIT_BCFTOOLS:BCFTOOLS_PLUGINSPLIT' { tag = { "${meta.id} Batch ${meta.batch} ${meta.tools}" } ext.args = ["--output-type z", "--write-index=tbi"].join(' ') - ext.prefix = { "${meta.id}_${meta.batch}" } publishDir = [ path: { "${params.outdir}/imputation/${meta.tools}/samples/" }, mode: params.publish_dir_mode, diff --git a/conf/steps/validation.config b/conf/steps/validation.config index cd261954..3a9b85cb 100644 --- a/conf/steps/validation.config +++ b/conf/steps/validation.config @@ -64,8 +64,23 @@ process { ext.args = ["--ligate", "--output-type z", "--write-index=tbi"].join(' ') } + // Compute sample files for renaming + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BCFTOOLS_QUERY' { + tag = { "${meta.id} Batch ${meta.batch} ${meta.tools}" } + ext.args = '--list-samples' + publishDir = [enabled: false] + } + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:GAWK' { + tag = { "${meta.id} Batch ${meta.batch} ${meta.tools}" } + ext.prefix = { "${meta.id}_samples"} + ext.args2 = "'BEGIN { OFS = \"\\t\" } { print \$1, \"-\", \$1\".truth\" }'" + publishDir = [enabled: false] + } + // Split by samples - withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:SPLIT_TRUTH:.*' { + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:SPLIT_TRUTH:BCFTOOLS_PLUGINSPLIT' { + ext.args = ["--output-type z", "--write-index=tbi"].join(' ') publishDir = [ path: { "${params.outdir}/validation/samples" }, mode: params.publish_dir_mode, @@ -73,12 +88,6 @@ process { ] } - withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:SPLIT_TRUTH:BCFTOOLS_PLUGINSPLIT' { - ext.args = ["--output-type z", "--write-index=tbi"].join(' ') - ext.prefix = { "${meta.id}" } - ext.suffix = ".truth" - } - // Validation subworkflow withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CONCORDANCE_GLIMPSE2:.*' { publishDir = [ @@ -106,6 +115,6 @@ process { withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CONCORDANCE_GLIMPSE2:GAWK' { ext.args2 = "'(NR == 1) || (FNR > 1)'" // Skip header line ext.suffix = { "txt" } - tag = {"Test Quality"} + tag = {"${meta.id}"} } } diff --git a/modules.json b/modules.json index 6db896db..72f0a33e 100644 --- a/modules.json +++ b/modules.json @@ -46,10 +46,15 @@ }, "bcftools/pluginsplit": { "branch": "master", - "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "git_sha": "14c910af1f9c20c65e5df9325a1e4d3939d524d1", "installed_by": ["modules"], "patch": "modules/nf-core/bcftools/pluginsplit/bcftools-pluginsplit.diff" }, + "bcftools/query": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, "bcftools/stats": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", diff --git a/modules/nf-core/bcftools/pluginsplit/bcftools-pluginsplit.diff b/modules/nf-core/bcftools/pluginsplit/bcftools-pluginsplit.diff index f06044e4..5e3092d8 100644 --- a/modules/nf-core/bcftools/pluginsplit/bcftools-pluginsplit.diff +++ b/modules/nf-core/bcftools/pluginsplit/bcftools-pluginsplit.diff @@ -3,43 +3,16 @@ Changes in module 'nf-core/bcftools/pluginsplit' Changes in 'bcftools/pluginsplit/main.nf': --- modules/nf-core/bcftools/pluginsplit/main.nf +++ modules/nf-core/bcftools/pluginsplit/main.nf -@@ -26,11 +26,17 @@ - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" -+ def suffix = task.ext.suffix ?: "" +@@ -8,8 +8,7 @@ + 'biocontainers/bcftools:1.20--h8b25389_0' }" - def samples_arg = samples ? "--samples-file ${samples}" : "" - def groups_arg = groups ? "--groups-file ${groups}" : "" - def regions_arg = regions ? "--regions-file ${regions}" : "" - def targets_arg = targets ? "--targets-file ${targets}" : "" -+ def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : -+ args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : -+ args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : -+ args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : -+ "vcf" - - """ - bcftools plugin split \\ -@@ -42,7 +48,17 @@ - ${targets_arg} \\ - --output ${prefix} - -- mv ${prefix}/* . -+ for file in ${prefix}/*; do -+ # Extract the basename -+ base_name=\$(basename "\$file") -+ # Extract the part of the basename before the first dot -+ name_before_dot="\${base_name%%.*}" -+ # Extract the extension -+ extension="\${base_name#\${name_before_dot}}" -+ # Construct the new name -+ new_name="\${name_before_dot}${suffix}\${extension}" -+ mv "\$file" "./\$new_name" -+ done - - cat <<-END_VERSIONS > versions.yml - "${task.process}": + input: +- tuple val(meta), path(vcf, stageAs: "input/*"), path(tbi, stageAs: "input/*") +- path(samples) ++ tuple val(meta), path(vcf, stageAs: "input/*"), path(tbi, stageAs: "input/*"), path(samples) + path(groups) + path(regions) + path(targets) 'modules/nf-core/bcftools/pluginsplit/meta.yml' is unchanged 'modules/nf-core/bcftools/pluginsplit/tests/main.nf.test' is unchanged diff --git a/modules/nf-core/bcftools/pluginsplit/main.nf b/modules/nf-core/bcftools/pluginsplit/main.nf index 8b493087..d1977fbc 100644 --- a/modules/nf-core/bcftools/pluginsplit/main.nf +++ b/modules/nf-core/bcftools/pluginsplit/main.nf @@ -8,8 +8,7 @@ process BCFTOOLS_PLUGINSPLIT { 'biocontainers/bcftools:1.20--h8b25389_0' }" input: - tuple val(meta), path(vcf), path(tbi) - path(samples) + tuple val(meta), path(vcf, stageAs: "input/*"), path(tbi, stageAs: "input/*"), path(samples) path(groups) path(regions) path(targets) @@ -25,18 +24,11 @@ process BCFTOOLS_PLUGINSPLIT { script: def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def suffix = task.ext.suffix ?: "" def samples_arg = samples ? "--samples-file ${samples}" : "" def groups_arg = groups ? "--groups-file ${groups}" : "" def regions_arg = regions ? "--regions-file ${regions}" : "" def targets_arg = targets ? "--targets-file ${targets}" : "" - def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : - args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : - args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : - args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : - "vcf" """ bcftools plugin split \\ @@ -46,19 +38,7 @@ process BCFTOOLS_PLUGINSPLIT { ${groups_arg} \\ ${regions_arg} \\ ${targets_arg} \\ - --output ${prefix} - - for file in ${prefix}/*; do - # Extract the basename - base_name=\$(basename "\$file") - # Extract the part of the basename before the first dot - name_before_dot="\${base_name%%.*}" - # Extract the extension - extension="\${base_name#\${name_before_dot}}" - # Construct the new name - new_name="\${name_before_dot}${suffix}\${extension}" - mv "\$file" "./\$new_name" - done + --output . cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -68,7 +48,6 @@ process BCFTOOLS_PLUGINSPLIT { stub: def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : @@ -81,11 +60,15 @@ process BCFTOOLS_PLUGINSPLIT { "" def determination_file = samples ?: targets def create_cmd = extension.matches("vcf|bcf") ? "touch " : "echo '' | gzip > " - def create_files = "cut -f 3 ${determination_file} | sed -e 's/\$/.${extension}/' > files.txt; while IFS= read -r filename; do ${create_cmd} \"\$filename\"; done < files.txt" - def create_index = index.matches("csi|tbi") ? "cut -f 3 ${determination_file} | sed -e 's/\$/.${extension}.${index}/' > indices.txt; touch \$( files.txt + while IFS= read -r filename; + do ${create_cmd} "./\$filename"; + if [ -n "${index}" ]; then + index_file=\$(sed -e 's/\$/.${index}/' <<< \$filename); + touch ./\$index_file; + fi; + done < files.txt cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/bcftools/pluginsplit/tests/main.nf.test b/modules/nf-core/bcftools/pluginsplit/tests/main.nf.test index e3160851..e7ae574e 100644 --- a/modules/nf-core/bcftools/pluginsplit/tests/main.nf.test +++ b/modules/nf-core/bcftools/pluginsplit/tests/main.nf.test @@ -67,7 +67,6 @@ nextflow_process { } test("homo_sapiens - [ vcf, tbi ], [], groups, regions, targets - tbi") { - config "./nextflow.config" when { @@ -91,16 +90,43 @@ nextflow_process { assertAll( { assert process.success }, { assert snapshot( - process.out.vcf, - process.out.tbi.get(0).get(1).find { file(it).name.matches("normal.vcf.gz.tbi|tumor.vcf.gz.tbi") }, - ) } + process.out.vcf.collect{ it[1].collect { file(it).name } }, + process.out.tbi.collect{ it[1].collect { file(it).name } }, + ).match() } ) } } - test("homo_sapiens - [ vcf, tbi ], samples, [], [], [] -stub") { + test("homo_sapiens - [ vcf, tbi ], [], [], [], [], - error no sample") { + config "./nextflow.config" + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [] + input[2] = [] + input[3] = [] + input[4] = [] + """ + } + } + + then { + assertAll( + { assert process.failed }, + { assert process.errorReport.contains("No samples to split: input/dbsnp_146.hg38.vcf.gz") } + ) + } + + } + + test("homo_sapiens - [ vcf, tbi ], samples, [], [], [] -stub") { options "-stub" when { @@ -130,7 +156,6 @@ nextflow_process { } test("homo_sapiens - [ vcf, tbi ], [], groups, regions, targets -stub") { - options "-stub" when { @@ -160,7 +185,6 @@ nextflow_process { } test("homo_sapiens - [ vcf, tbi ], [], groups, regions, targets - tbi -stub") { - config "./nextflow.config" options "-stub" diff --git a/modules/nf-core/bcftools/pluginsplit/tests/main.nf.test.snap b/modules/nf-core/bcftools/pluginsplit/tests/main.nf.test.snap index 66c3c1dd..b915b7cf 100644 --- a/modules/nf-core/bcftools/pluginsplit/tests/main.nf.test.snap +++ b/modules/nf-core/bcftools/pluginsplit/tests/main.nf.test.snap @@ -47,18 +47,39 @@ } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.2", + "nextflow": "24.10.1" }, - "timestamp": "2024-07-09T15:56:42.307673651" + "timestamp": "2024-11-20T14:56:54.383979416" + }, + "homo_sapiens - [ vcf, tbi ], [], groups, regions, targets - tbi": { + "content": [ + [ + [ + "normal.vcf.gz", + "tumour.vcf.gz" + ] + ], + [ + [ + "normal.vcf.gz.tbi", + "tumour.vcf.gz.tbi" + ] + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-20T14:56:44.796391578" }, "homo_sapiens - [ vcf, tbi ], [], groups, regions, targets": { "content": null, "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.2", + "nextflow": "24.10.1" }, - "timestamp": "2024-07-09T15:56:21.498991402" + "timestamp": "2024-11-20T14:56:36.709842966" }, "homo_sapiens - [ vcf, tbi ], [], groups, regions, targets - tbi -stub": { "content": [ @@ -126,10 +147,10 @@ } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.2", + "nextflow": "24.10.1" }, - "timestamp": "2024-07-09T15:57:04.483688966" + "timestamp": "2024-11-20T14:57:11.163588435" }, "homo_sapiens - [ vcf, tbi ], samples, [], [], []": { "content": [ @@ -179,10 +200,10 @@ } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.2", + "nextflow": "24.10.1" }, - "timestamp": "2024-07-09T15:56:10.033818589" + "timestamp": "2024-11-20T14:56:27.978161766" }, "homo_sapiens - [ vcf, tbi ], [], groups, regions, targets -stub": { "content": [ @@ -232,9 +253,9 @@ } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.2", + "nextflow": "24.10.1" }, - "timestamp": "2024-07-09T15:56:53.641165787" + "timestamp": "2024-11-20T14:57:02.456908152" } } \ No newline at end of file diff --git a/modules/nf-core/bcftools/query/environment.yml b/modules/nf-core/bcftools/query/environment.yml new file mode 100644 index 00000000..5c00b116 --- /dev/null +++ b/modules/nf-core/bcftools/query/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::bcftools=1.20 diff --git a/modules/nf-core/bcftools/query/main.nf b/modules/nf-core/bcftools/query/main.nf new file mode 100644 index 00000000..58019f4d --- /dev/null +++ b/modules/nf-core/bcftools/query/main.nf @@ -0,0 +1,56 @@ +process BCFTOOLS_QUERY { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bcftools:1.20--h8b25389_0': + 'biocontainers/bcftools:1.20--h8b25389_0' }" + + input: + tuple val(meta), path(vcf), path(tbi) + path regions + path targets + path samples + + output: + tuple val(meta), path("*.${suffix}"), emit: output + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + suffix = task.ext.suffix ?: "txt" + def regions_file = regions ? "--regions-file ${regions}" : "" + def targets_file = targets ? "--targets-file ${targets}" : "" + def samples_file = samples ? "--samples-file ${samples}" : "" + """ + bcftools query \\ + $regions_file \\ + $targets_file \\ + $samples_file \\ + $args \\ + $vcf \\ + > ${prefix}.${suffix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + suffix = task.ext.suffix ?: "txt" + """ + touch ${prefix}.${suffix} \\ + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/bcftools/query/meta.yml b/modules/nf-core/bcftools/query/meta.yml new file mode 100644 index 00000000..279b3205 --- /dev/null +++ b/modules/nf-core/bcftools/query/meta.yml @@ -0,0 +1,67 @@ +name: bcftools_query +description: Extracts fields from VCF or BCF files and outputs them in user-defined + format. +keywords: + - query + - variant calling + - bcftools + - VCF +tools: + - query: + description: | + Extracts fields from VCF or BCF files and outputs them in user-defined format. + homepage: http://samtools.github.io/bcftools/bcftools.html + documentation: http://www.htslib.org/doc/bcftools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:bcftools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: | + The vcf file to be qeuried. + pattern: "*.{vcf.gz, vcf}" + - tbi: + type: file + description: | + The tab index for the VCF file to be inspected. + pattern: "*.tbi" + - - regions: + type: file + description: | + Optionally, restrict the operation to regions listed in this file. + - - targets: + type: file + description: | + Optionally, restrict the operation to regions listed in this file (doesn't rely upon index files) + - - samples: + type: file + description: | + Optional, file of sample names to be included or excluded. + e.g. 'file.tsv' +output: + - output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.${suffix}": + type: file + description: BCFTools query output file + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@abhi18av" + - "@drpatelh" +maintainers: + - "@abhi18av" + - "@drpatelh" diff --git a/modules/nf-core/bcftools/query/tests/main.nf.test b/modules/nf-core/bcftools/query/tests/main.nf.test new file mode 100644 index 00000000..39e67b35 --- /dev/null +++ b/modules/nf-core/bcftools/query/tests/main.nf.test @@ -0,0 +1,101 @@ +nextflow_process { + + name "Test Process BCFTOOLS_QUERY" + script "../main.nf" + process "BCFTOOLS_QUERY" + + tag "modules" + tag "modules_nfcore" + tag "bcftools" + tag "bcftools/query" + + config "./nextflow.config" + + test("sarscov2 - [vcf, tbi], [], [], []") { + + when { + process { + """ + input[0] = [ + [ id:'out' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.output, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [vcf, tbi], vcf, tsv, []") { + + when { + process { + """ + input[0] = [ + [ id:'out' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz', checkIfExists: true) + input[2] = file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.targets.tsv.gz', checkIfExists: true) + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.output, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [vcf, tbi], [], [], [] - stub") { + + when { + process { + """ + input[0] = [ + [ id:'out' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.output[0][1]).name, + process.out.versions + ).match() } + ) + } + + } + +} diff --git a/modules/nf-core/bcftools/query/tests/main.nf.test.snap b/modules/nf-core/bcftools/query/tests/main.nf.test.snap new file mode 100644 index 00000000..3ead1f2c --- /dev/null +++ b/modules/nf-core/bcftools/query/tests/main.nf.test.snap @@ -0,0 +1,55 @@ +{ + "sarscov2 - [vcf, tbi], vcf, tsv, []": { + "content": [ + [ + [ + { + "id": "out" + }, + "out.txt:md5,75a6bd0084e2e1838cf7baba11b99d19" + ] + ], + [ + "versions.yml:md5,3d93ea9cd5d314743254618b49e4bd16" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-31T15:15:44.916249758" + }, + "sarscov2 - [vcf, tbi], [], [], [] - stub": { + "content": [ + "out.txt", + [ + "versions.yml:md5,3d93ea9cd5d314743254618b49e4bd16" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-31T15:15:49.932359271" + }, + "sarscov2 - [vcf, tbi], [], [], []": { + "content": [ + [ + [ + { + "id": "out" + }, + "out.txt:md5,87a2ab194e1ee3219b44e58429ec3307" + ] + ], + [ + "versions.yml:md5,3d93ea9cd5d314743254618b49e4bd16" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-31T15:15:39.930697926" + } +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/query/tests/nextflow.config b/modules/nf-core/bcftools/query/tests/nextflow.config new file mode 100644 index 00000000..da81c2a0 --- /dev/null +++ b/modules/nf-core/bcftools/query/tests/nextflow.config @@ -0,0 +1,3 @@ +process { + ext.args = "-f '%CHROM %POS %REF %ALT[%SAMPLE=%GT]'" +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/query/tests/tags.yml b/modules/nf-core/bcftools/query/tests/tags.yml new file mode 100644 index 00000000..fb9455cb --- /dev/null +++ b/modules/nf-core/bcftools/query/tests/tags.yml @@ -0,0 +1,2 @@ +bcftools/query: + - "modules/nf-core/bcftools/query/**" diff --git a/nextflow.config b/nextflow.config index 3e402c96..5fb08a09 100644 --- a/nextflow.config +++ b/nextflow.config @@ -103,6 +103,30 @@ params { // Load base.config by default for all pipelines includeConfig 'conf/base.config' +// Load modules.config for DSL2 module specific options +includeConfig 'conf/modules.config' + +// initialisation step +includeConfig 'conf/steps/initialisation.config' + +// chrcheck workflow +includeConfig 'conf/steps/chrcheck.config' + +// simulation step +includeConfig 'conf/steps/simulation.config' + +// panel_prep step +includeConfig 'conf/steps/panel_prep.config' + +// imputation step +includeConfig 'conf/steps/imputation_glimpse1.config' +includeConfig 'conf/steps/imputation_quilt.config' +includeConfig 'conf/steps/imputation_stitch.config' +includeConfig 'conf/steps/imputation_glimpse2.config' + +// validation step +includeConfig 'conf/steps/validation.config' + profiles { debug { dumpHashes = true @@ -322,27 +346,3 @@ validation { afterText = validation.help.afterText } } - -// Load modules.config for DSL2 module specific options -includeConfig 'conf/modules.config' - -// initialisation step -includeConfig 'conf/steps/initialisation.config' - -// chrcheck workflow -includeConfig 'conf/steps/chrcheck.config' - -// simulation step -includeConfig 'conf/steps/simulation.config' - -// panel_prep step -includeConfig 'conf/steps/panel_prep.config' - -// imputation step -includeConfig 'conf/steps/imputation_glimpse1.config' -includeConfig 'conf/steps/imputation_quilt.config' -includeConfig 'conf/steps/imputation_stitch.config' -includeConfig 'conf/steps/imputation_glimpse2.config' - -// validation step -includeConfig 'conf/steps/validation.config' diff --git a/subworkflows/local/vcf_split_bcftools/main.nf b/subworkflows/local/vcf_split_bcftools/main.nf index 524b3e88..72832214 100644 --- a/subworkflows/local/vcf_split_bcftools/main.nf +++ b/subworkflows/local/vcf_split_bcftools/main.nf @@ -2,13 +2,13 @@ include { BCFTOOLS_PLUGINSPLIT } from '../../../modules/nf-core/bcftools/plugin workflow VCF_SPLIT_BCFTOOLS { take: - ch_vcf // channel: [ [id, chr, tools], vcf, index ] + ch_vcf // channel: [ [id, chr, tools], vcf, index, samples ] main: ch_versions = Channel.empty() - BCFTOOLS_PLUGINSPLIT(ch_vcf, [], [], [], []) + BCFTOOLS_PLUGINSPLIT(ch_vcf, [], [], []) ch_versions = ch_versions.mix(BCFTOOLS_PLUGINSPLIT.out.versions.first()) ch_vcf_samples = BCFTOOLS_PLUGINSPLIT.out.vcf diff --git a/subworkflows/local/vcf_split_bcftools/tests/main.nf.test b/subworkflows/local/vcf_split_bcftools/tests/main.nf.test index 46c09660..b85b2be9 100644 --- a/subworkflows/local/vcf_split_bcftools/tests/main.nf.test +++ b/subworkflows/local/vcf_split_bcftools/tests/main.nf.test @@ -15,7 +15,7 @@ nextflow_workflow { tag "bcftools" tag "bcftools/split" - test("Split vcf file") { + test("Split multiple vcf file - with renaming") { setup { run("BCFTOOLS_MERGE") { script "../../../../modules/nf-core/bcftools/merge/main.nf" @@ -23,7 +23,7 @@ nextflow_workflow { """ input[0] = Channel.of( [ - [id: "allSamples"], + [id: "allSamples.batch0"], [file(params.pipelines_testdata_base_path + "hum_data/individuals/NA12878/NA12878.s_imputed.bcf", checkIfExist:true), file(params.pipelines_testdata_base_path + "hum_data/individuals/NA19401/NA19401.s_imputed.bcf", checkIfExist:true), file(params.pipelines_testdata_base_path + "hum_data/individuals/NA20359/NA20359.s_imputed.bcf", checkIfExist:true)], @@ -43,13 +43,14 @@ nextflow_workflow { } } when { - params { - max_cpus = 2 - max_memory = '2.GB' - } workflow { """ - input[0] = BCFTOOLS_MERGE.out.vcf.join(BCFTOOLS_MERGE.out.tbi) + renaming_file = channel.of( + "NA12878\tNA12878_test NA12878.myfile", + "NA19401\t-\tNA19401", + "NA20359\tNA20359_2\tNA20359_3" + ).collectFile(name: "samples.txt", newLine: true) + input[0] = BCFTOOLS_MERGE.out.vcf.join(BCFTOOLS_MERGE.out.tbi).combine(renaming_file) """ } } @@ -74,4 +75,56 @@ nextflow_workflow { ) } } + + test("Split one sample vcf file") { + when { + workflow { + """ + input[0] = Channel.of([ + [id: 'NA12878'], + file(params.pipelines_testdata_base_path + "hum_data/individuals/NA12878/NA12878.s_imputed.bcf", checkIfExist:true), + file(params.pipelines_testdata_base_path + "hum_data/individuals/NA12878/NA12878.s_imputed.bcf.csi", checkIfExist:true), + [] + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.versions, + workflow.out.vcf_tbi.collect{[ + it[0], + path(it[1]).getFileName().toString(), + path(it[2]).getFileName().toString() + ] } + ).match() } + ) + } + } + + test("Split no sample vcf file") { + when { + workflow { + """ + input[0] = Channel.of([ + [id: 'dbsnp_146.hg38'], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz", checkIfExist:true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz.tbi", checkIfExist:true), + [] + ]) + """ + } + } + + then { + assertAll( + { assert workflow.failed }, + { assert workflow.errorReport.contains("No samples to split: input/dbsnp_146.hg38.vcf.gz") + } + ) + } + } } diff --git a/subworkflows/local/vcf_split_bcftools/tests/main.nf.test.snap b/subworkflows/local/vcf_split_bcftools/tests/main.nf.test.snap index d3bd681e..d6084e40 100644 --- a/subworkflows/local/vcf_split_bcftools/tests/main.nf.test.snap +++ b/subworkflows/local/vcf_split_bcftools/tests/main.nf.test.snap @@ -1,5 +1,5 @@ { - "Split vcf file": { + "Split one sample vcf file": { "content": [ [ "versions.yml:md5,6c3351d97e3a99f7a7a3231fc49f92e2" @@ -11,6 +11,27 @@ }, "NA12878.vcf.gz", "NA12878.vcf.gz.tbi" + ] + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-22T13:53:09.194659411" + }, + "Split multiple vcf file - with renaming": { + "content": [ + [ + "versions.yml:md5,6c3351d97e3a99f7a7a3231fc49f92e2" + ], + [ + [ + { + "id": "NA12878" + }, + "NA12878.myfile.vcf.gz", + "NA12878.myfile.vcf.gz.tbi" ], [ { @@ -21,10 +42,10 @@ ], [ { - "id": "NA20359" + "id": "NA20359_3" }, - "NA20359.vcf.gz", - "NA20359.vcf.gz.tbi" + "NA20359_3.vcf.gz", + "NA20359_3.vcf.gz.tbi" ] ], [ @@ -34,9 +55,9 @@ ] ], "meta": { - "nf-test": "0.9.1", - "nextflow": "24.10.0" + "nf-test": "0.9.2", + "nextflow": "24.10.1" }, - "timestamp": "2024-11-07T14:13:06.801872176" + "timestamp": "2024-11-22T13:53:01.127637055" } } \ No newline at end of file diff --git a/subworkflows/local/vcf_split_bcftools/tests/nextflow.config b/subworkflows/local/vcf_split_bcftools/tests/nextflow.config index 523678dc..a2282fbf 100644 --- a/subworkflows/local/vcf_split_bcftools/tests/nextflow.config +++ b/subworkflows/local/vcf_split_bcftools/tests/nextflow.config @@ -1,7 +1,10 @@ process { + resourceLimits = [cpus: 2, memory: '2.GB'] + withName: BCFTOOLS_MERGE { ext.args = ["--write-index=tbi", "--output-type z"].join(' ') } + withName: BCFTOOLS_PLUGINSPLIT { ext.args = ["--write-index=tbi", "--output-type z"].join(' ') } diff --git a/workflows/phaseimpute/main.nf b/workflows/phaseimpute/main.nf index 4ca49bea..34ea9e40 100644 --- a/workflows/phaseimpute/main.nf +++ b/workflows/phaseimpute/main.nf @@ -65,6 +65,8 @@ include { BCFTOOLS_STATS as BCFTOOLS_STATS_TOOLS } from '../../modules/nf-co // Concordance subworkflows include { BAM_GL_BCFTOOLS as GL_TRUTH } from '../../subworkflows/local/bam_gl_bcftools' +include { BCFTOOLS_QUERY } from '../../modules/nf-core/bcftools/query' +include { GAWK } from '../../modules/nf-core/gawk' include { VCF_SPLIT_BCFTOOLS as SPLIT_TRUTH } from '../../subworkflows/local/vcf_split_bcftools' include { BCFTOOLS_STATS as BCFTOOLS_STATS_TRUTH } from '../../modules/nf-core/bcftools/stats' include { VCF_CONCATENATE_BCFTOOLS as CONCAT_TRUTH } from '../../subworkflows/local/vcf_concatenate_bcftools' @@ -372,7 +374,7 @@ workflow PHASEIMPUTE { } // Split result by samples - VCF_SPLIT_BCFTOOLS(ch_input_validate) + VCF_SPLIT_BCFTOOLS(ch_input_validate.map{ [it[0], it[1], it[2], []] }) ch_input_validate = VCF_SPLIT_BCFTOOLS.out.vcf_tbi // Compute stats on imputed files @@ -443,8 +445,13 @@ workflow PHASEIMPUTE { CONCAT_TRUTH(ch_truth_vcf) ch_versions = ch_versions.mix(CONCAT_TRUTH.out.versions) + // Prepare renaming file + BCFTOOLS_QUERY(CONCAT_TRUTH.out.vcf_tbi, [], [], []) + GAWK(BCFTOOLS_QUERY.out.output, []) + ch_pluginsplit = CONCAT_TRUTH.out.vcf_tbi.join(GAWK.out.output.view()) + // Split truth vcf by samples - SPLIT_TRUTH(CONCAT_TRUTH.out.vcf_tbi) + SPLIT_TRUTH(ch_pluginsplit) ch_versions = ch_versions.mix(SPLIT_TRUTH.out.versions) // Compute stats on truth files diff --git a/workflows/phaseimpute/tests/main.nf.test.snap b/workflows/phaseimpute/tests/main.nf.test.snap index 97871b8f..ed71c96a 100644 --- a/workflows/phaseimpute/tests/main.nf.test.snap +++ b/workflows/phaseimpute/tests/main.nf.test.snap @@ -25,10 +25,10 @@ ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "24.10.1" }, - "timestamp": "2024-10-25T16:47:19.203956394" + "timestamp": "2024-11-22T13:07:42.012993182" }, "Check test_all": { "content": [ @@ -167,9 +167,9 @@ ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.0" + "nextflow": "24.10.1" }, - "timestamp": "2024-11-17T21:32:08.538026091" + "timestamp": "2024-11-22T13:58:54.188813202" }, "Check test_validate": { "content": [ @@ -191,9 +191,9 @@ ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.0" + "nextflow": "24.10.1" }, - "timestamp": "2024-11-06T21:16:53.209347713" + "timestamp": "2024-11-22T13:54:09.435194577" }, "Check test_batch": { "content": [ @@ -269,9 +269,9 @@ ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.0" + "nextflow": "24.10.1" }, - "timestamp": "2024-11-06T21:03:44.505303287" + "timestamp": "2024-11-22T14:06:57.642618122" }, "Check test_quilt": { "content": [ @@ -299,10 +299,10 @@ ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "24.10.1" }, - "timestamp": "2024-10-25T16:46:11.131198832" + "timestamp": "2024-11-22T13:05:58.709941089" }, "Check test_sim": { "content": [ @@ -380,10 +380,10 @@ ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "24.10.1" }, - "timestamp": "2024-10-25T16:44:41.049965503" + "timestamp": "2024-11-22T13:03:28.516026252" }, "Check test": { "content": [ @@ -463,9 +463,9 @@ ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "24.10.1" }, - "timestamp": "2024-10-25T16:56:33.72923425" + "timestamp": "2024-11-22T13:16:12.803136748" } } \ No newline at end of file