diff --git a/modules/nf-core/bcftools/mpileup/environment.yml b/modules/nf-core/bcftools/mpileup/environment.yml new file mode 100644 index 00000000..114390be --- /dev/null +++ b/modules/nf-core/bcftools/mpileup/environment.yml @@ -0,0 +1,7 @@ +name: bcftools_mpileup +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::bcftools=1.18 diff --git a/modules/nf-core/bcftools/mpileup/main.nf b/modules/nf-core/bcftools/mpileup/main.nf new file mode 100644 index 00000000..5e57644a --- /dev/null +++ b/modules/nf-core/bcftools/mpileup/main.nf @@ -0,0 +1,58 @@ +process BCFTOOLS_MPILEUP { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bcftools:1.18--h8b25389_0': + 'biocontainers/bcftools:1.18--h8b25389_0' }" + + input: + tuple val(meta), path(bam), path(intervals) + tuple val(meta2), path(fasta) + val save_mpileup + + output: + tuple val(meta), path("*vcf.gz") , emit: vcf + tuple val(meta), path("*vcf.gz.tbi") , emit: tbi + tuple val(meta), path("*stats.txt") , emit: stats + tuple val(meta), path("*.mpileup.gz"), emit: mpileup, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def args3 = task.ext.args3 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def mpileup = save_mpileup ? "| tee ${prefix}.mpileup" : "" + def bgzip_mpileup = save_mpileup ? "bgzip ${prefix}.mpileup" : "" + def intervals = intervals ? "-T ${intervals}" : "" + """ + echo "${meta.id}" > sample_name.list + + bcftools \\ + mpileup \\ + --fasta-ref $fasta \\ + $args \\ + $bam \\ + $intervals \\ + $mpileup \\ + | bcftools call --output-type v $args2 \\ + | bcftools reheader --samples sample_name.list \\ + | bcftools view --output-file ${prefix}.vcf.gz --output-type z $args3 + + $bgzip_mpileup + + tabix -p vcf -f ${prefix}.vcf.gz + + bcftools stats ${prefix}.vcf.gz > ${prefix}.bcftools_stats.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/bcftools/mpileup/meta.yml b/modules/nf-core/bcftools/mpileup/meta.yml new file mode 100644 index 00000000..65410ddd --- /dev/null +++ b/modules/nf-core/bcftools/mpileup/meta.yml @@ -0,0 +1,70 @@ +name: bcftools_mpileup +description: Compresses VCF files +keywords: + - variant calling + - mpileup + - VCF +tools: + - mpileup: + description: | + Generates genotype likelihoods at each genomic position with coverage. + homepage: http://samtools.github.io/bcftools/bcftools.html + documentation: http://www.htslib.org/doc/bcftools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: Input BAM file + pattern: "*.{bam}" + - intervals: + type: file + description: Input intervals file. A file (commonly '.bed') containing regions to subset + - meta: + type: map + description: | + Groovy Map containing information about the genome fasta, e.g. [ id: 'sarscov2' ] + - fasta: + type: file + description: FASTA reference file + pattern: "*.{fasta,fa}" + - save_mpileup: + type: boolean + description: Save mpileup file generated by bcftools mpileup +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: VCF gzipped output file + pattern: "*.{vcf.gz}" + - tbi: + type: file + description: tabix index file + pattern: "*.{vcf.gz.tbi}" + - stats: + type: file + description: Text output file containing stats + pattern: "*{stats.txt}" + - mpileup: + type: file + description: mpileup gzipped output for all positions + pattern: "{*.mpileup.gz}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" +maintainers: + - "@joseespinosa" + - "@drpatelh" diff --git a/modules/nf-core/bcftools/mpileup/tests/main.nf.test b/modules/nf-core/bcftools/mpileup/tests/main.nf.test new file mode 100644 index 00000000..6478bbc2 --- /dev/null +++ b/modules/nf-core/bcftools/mpileup/tests/main.nf.test @@ -0,0 +1,116 @@ +nextflow_process { + + name "Test Process BCFTOOLS_MPILEUP" + script "../main.nf" + process "BCFTOOLS_MPILEUP" + + tag "modules" + tag "modules_nfcore" + tag "bcftools" + tag "bcftools/mpileup" + + config "./nextflow.config" + + test("sarscov2 - [bam, []], fasta, false") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + [] + ] + input[1] = [ + [ id:'sarscov2' ], // meta map + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + input[2] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf, + process.out.tbi, + process.out.stats, + process.out.mpileup, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [bam, []], fasta, true") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + [] + ] + input[1] = [ + [ id:'sarscov2' ], // meta map + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + input[2] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf, + process.out.tbi, + process.out.stats, + process.out.mpileup, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [bam, bed], fasta, false") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true) + ] + input[1] = [ + [ id:'sarscov2' ], // meta map + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + input[2] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf, + process.out.tbi, + process.out.stats, + process.out.mpileup, + process.out.versions + ).match() } + ) + } + + } + +} diff --git a/modules/nf-core/bcftools/mpileup/tests/main.nf.test.snap b/modules/nf-core/bcftools/mpileup/tests/main.nf.test.snap new file mode 100644 index 00000000..ef80ab1b --- /dev/null +++ b/modules/nf-core/bcftools/mpileup/tests/main.nf.test.snap @@ -0,0 +1,112 @@ +{ + "sarscov2 - [bam, []], fasta, true": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,0f2f2c8488e97e7f13979380d5d3b6b5" + ] + ], + [ + [ + { + "id": "test" + }, + "test.vcf.gz.tbi:md5,34cb2eeb73f4d2b98218acecebd92704" + ] + ], + [ + [ + { + "id": "test" + }, + "test.bcftools_stats.txt:md5,a988fbcd2ea5d1ce30970dcb60a77ed7" + ] + ], + [ + [ + { + "id": "test" + }, + "test.mpileup.gz:md5,73b4a00398bddab2cd065b40d17ca4dc" + ] + ], + [ + "versions.yml:md5,e09c59d941374bb293aadc36e2f29dbf" + ] + ], + "timestamp": "2023-11-29T14:11:54.549517279" + }, + "sarscov2 - [bam, bed], fasta, false": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,687244dbf71d05b3b973ab08ecf05310" + ] + ], + [ + [ + { + "id": "test" + }, + "test.vcf.gz.tbi:md5,3785df15f3d7faf35f3ad70d167a50f7" + ] + ], + [ + [ + { + "id": "test" + }, + "test.bcftools_stats.txt:md5,f8c5ab149c4bf0e5f51c518346cb87b5" + ] + ], + [ + + ], + [ + "versions.yml:md5,e09c59d941374bb293aadc36e2f29dbf" + ] + ], + "timestamp": "2023-11-29T14:12:00.865439661" + }, + "sarscov2 - [bam, []], fasta, false": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,0f2f2c8488e97e7f13979380d5d3b6b5" + ] + ], + [ + [ + { + "id": "test" + }, + "test.vcf.gz.tbi:md5,34cb2eeb73f4d2b98218acecebd92704" + ] + ], + [ + [ + { + "id": "test" + }, + "test.bcftools_stats.txt:md5,a988fbcd2ea5d1ce30970dcb60a77ed7" + ] + ], + [ + + ], + [ + "versions.yml:md5,e09c59d941374bb293aadc36e2f29dbf" + ] + ], + "timestamp": "2023-11-29T14:11:47.814900494" + } +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/mpileup/tests/nextflow.config b/modules/nf-core/bcftools/mpileup/tests/nextflow.config new file mode 100644 index 00000000..a7ba19fe --- /dev/null +++ b/modules/nf-core/bcftools/mpileup/tests/nextflow.config @@ -0,0 +1,4 @@ +process { + ext.args2 = '--no-version --ploidy 1 --multiallelic-caller' + ext.args3 = '--no-version' +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/mpileup/tests/tags.yml b/modules/nf-core/bcftools/mpileup/tests/tags.yml new file mode 100644 index 00000000..07b91f98 --- /dev/null +++ b/modules/nf-core/bcftools/mpileup/tests/tags.yml @@ -0,0 +1,2 @@ +bcftools/mpileup: + - "modules/nf-core/bcftools/mpileup/**"