From ec1f25b9678f0ad0971784ea873e1da10a1fcf1f Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Mon, 16 Sep 2024 13:32:08 +0000 Subject: [PATCH 1/8] add nf-core pigz uncompress --- conf/modules.config | 5 ++ modules.json | 5 ++ modules/nf-core/pigz/uncompress/main.nf | 48 +++++++++++++++++++ modules/nf-core/pigz/uncompress/meta.yml | 42 ++++++++++++++++ .../pigz/uncompress/tests/main.nf.test | 33 +++++++++++++ .../pigz/uncompress/tests/main.nf.test.snap | 35 ++++++++++++++ .../nf-core/pigz/uncompress/tests/tags.yml | 2 + subworkflows/local/mirdeep2.nf | 7 +-- 8 files changed, 174 insertions(+), 3 deletions(-) create mode 100644 modules/nf-core/pigz/uncompress/main.nf create mode 100644 modules/nf-core/pigz/uncompress/meta.yml create mode 100644 modules/nf-core/pigz/uncompress/tests/main.nf.test create mode 100644 modules/nf-core/pigz/uncompress/tests/main.nf.test.snap create mode 100644 modules/nf-core/pigz/uncompress/tests/tags.yml diff --git a/conf/modules.config b/conf/modules.config index f1992ea2..37a55c1a 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -383,6 +383,11 @@ process { // // MIRDEEP // + + withName: 'PIGZ_UNCOMPRESS' { + tag = {"${meta.id}"} + } + withName: 'NFCORE_SMRNASEQ:MIRDEEP2:MIRDEEP2_MAPPER' { publishDir = [ path: { "${params.outdir}/mirdeep2/mapper" }, diff --git a/modules.json b/modules.json index 5d7f9eeb..dd1c1a5e 100644 --- a/modules.json +++ b/modules.json @@ -45,6 +45,11 @@ "git_sha": "b80f5fd12ff7c43938f424dd76392a2704fa2396", "installed_by": ["modules"] }, + "pigz/uncompress": { + "branch": "master", + "git_sha": "c00055a0b13d622b4f1f51a8e5be31deaf99ded7", + "installed_by": ["modules"] + }, "samtools/flagstat": { "branch": "master", "git_sha": "46eca555142d6e597729fcb682adcc791796f514", diff --git a/modules/nf-core/pigz/uncompress/main.nf b/modules/nf-core/pigz/uncompress/main.nf new file mode 100644 index 00000000..11e43dff --- /dev/null +++ b/modules/nf-core/pigz/uncompress/main.nf @@ -0,0 +1,48 @@ +process PIGZ_UNCOMPRESS { + label 'process_low' + //stageInMode 'copy' // this directive can be set in case the original input should be kept + + conda "conda-forge::pigz" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/pigz:2.8': + 'biocontainers/pigz:2.8' }" + + input: + tuple val(meta), path(zip) + + output: + tuple val(meta), path("${uncompressed_filename}") , emit: file + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + uncompressed_filename = zip.toString() - '.gz' + // calling pigz -f to make it follow symlinks + """ + unpigz \\ + -p $task.cpus \\ + -fk \\ + $args \\ + ${zip} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + uncompressed_filename = zip.toString() - '.gz' + """ + touch ${zip.dropRight(3)} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\w*//' )) + END_VERSIONS + """ +} diff --git a/modules/nf-core/pigz/uncompress/meta.yml b/modules/nf-core/pigz/uncompress/meta.yml new file mode 100644 index 00000000..c2d16cd4 --- /dev/null +++ b/modules/nf-core/pigz/uncompress/meta.yml @@ -0,0 +1,42 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json +name: "pigz_uncompress" +description: write your description here +keywords: + - uncompress + - gzip + - parallelized +tools: + - "pigz": + description: "Parallel implementation of the gzip algorithm." + homepage: "https://zlib.net/pigz/" + documentation: "https://zlib.net/pigz/pigz.pdf" + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test']` + - zip: + type: file + description: Gzipped file + pattern: "*.{gzip}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test']` + - file: + type: file + description: File to compress + pattern: "*" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@lrauschning" diff --git a/modules/nf-core/pigz/uncompress/tests/main.nf.test b/modules/nf-core/pigz/uncompress/tests/main.nf.test new file mode 100644 index 00000000..62ab27e2 --- /dev/null +++ b/modules/nf-core/pigz/uncompress/tests/main.nf.test @@ -0,0 +1,33 @@ +nextflow_process { + + name "Test Process PIGZ_UNCOMPRESS" + script "modules/nf-core/pigz/uncompress/main.nf" + process "PIGZ_UNCOMPRESS" + tag "modules" + tag "modules_nfcore" + tag "pigz" + tag "pigz/uncompress" + + test("Should run without failures") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + """ + } + } + + then { + assert process.success + assert snapshot(process.out).match() + } + + } + +} \ No newline at end of file diff --git a/modules/nf-core/pigz/uncompress/tests/main.nf.test.snap b/modules/nf-core/pigz/uncompress/tests/main.nf.test.snap new file mode 100644 index 00000000..126dd7d6 --- /dev/null +++ b/modules/nf-core/pigz/uncompress/tests/main.nf.test.snap @@ -0,0 +1,35 @@ +{ + "Should run without failures": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_1.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "1": [ + "versions.yml:md5,a2d5ce72baa8b303f25afb9cf094f683" + ], + "file": [ + [ + { + "id": "test" + }, + "test_1.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "versions": [ + "versions.yml:md5,a2d5ce72baa8b303f25afb9cf094f683" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.03.0" + }, + "timestamp": "2024-05-15T16:43:21.55056643" + } +} \ No newline at end of file diff --git a/modules/nf-core/pigz/uncompress/tests/tags.yml b/modules/nf-core/pigz/uncompress/tests/tags.yml new file mode 100644 index 00000000..6719a90a --- /dev/null +++ b/modules/nf-core/pigz/uncompress/tests/tags.yml @@ -0,0 +1,2 @@ +pigz/uncompress: + - modules/nf-core/pigz/uncompress/** diff --git a/subworkflows/local/mirdeep2.nf b/subworkflows/local/mirdeep2.nf index 76afa8a4..f83415a5 100644 --- a/subworkflows/local/mirdeep2.nf +++ b/subworkflows/local/mirdeep2.nf @@ -3,6 +3,7 @@ // include { MIRDEEP2_PIGZ } from '../../modules/local/mirdeep2_prepare' +include { PIGZ_UNCOMPRESS } from '../../modules/nf-core/pigz/uncompress/main' include { MIRDEEP2_MAPPER } from '../../modules/local/mirdeep2_mapper' include { MIRDEEP2_RUN } from '../../modules/local/mirdeep2_run' @@ -17,10 +18,10 @@ workflow MIRDEEP2 { main: ch_versions = Channel.empty() - MIRDEEP2_PIGZ ( reads ) - ch_versions = ch_versions.mix(MIRDEEP2_PIGZ.out.versions.first()) + PIGZ_UNCOMPRESS ( reads ) + ch_versions = ch_versions.mix(PIGZ_UNCOMPRESS.out.versions.first()) - MIRDEEP2_MAPPER ( MIRDEEP2_PIGZ.out.reads, index ) + MIRDEEP2_MAPPER ( PIGZ_UNCOMPRESS.out.file, index ) ch_versions = ch_versions.mix(MIRDEEP2_MAPPER.out.versions.first()) MIRDEEP2_RUN ( fasta.map{meta,file->file}, MIRDEEP2_MAPPER.out.mirdeep2_inputs, hairpin, mature ) From c2b82b87ab13d427e82736d1846e4a1093815eb1 Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Mon, 16 Sep 2024 13:32:17 +0000 Subject: [PATCH 2/8] update changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 44a03835..45c238e9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,6 +29,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [[#405]](https://github.com/nf-core/smrnaseq/pull/405) - Fix [Umicollapse algo wrong set](https://github.com/nf-core/smrnaseq/issues/404) - Fix potential bug in Umicollapse (not effective as we do not allow PE data in smrnaseq - but for consistency) - [[#420]](https://github.com/nf-core/smrnaseq/pull/420) - Fix [mirTrace produces an error in test nextflex](https://github.com/nf-core/smrnaseq/issues/419) - Allow config mode to be used in mirtrace/qc - [[#425]](https://github.com/nf-core/smrnaseq/pull/425) - Raise [minimum required NXF version for pipeline](https://github.com/nf-core/smrnaseq/issues/424) - usage of `arity` in some modules now requires this +- [[#426]](https://github.com/nf-core/smrnaseq/pull/426) - Add [nf-core mirtop](https://github.com/nf-core/smrnaseq/issues/417) - replace local `mirtop` +- [[#427]](https://github.com/nf-core/smrnaseq/pull/427) - Add [nf-core pigz uncompress](https://github.com/nf-core/smrnaseq/issues/422) - replace local `mirdeep_pigz` ## v2.3.1 - 2024-04-18 - Gray Zinc Dalmation Patch From 6090c0fb810976516150b811a23a859dd526dafa Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Mon, 16 Sep 2024 15:18:09 +0000 Subject: [PATCH 3/8] copy staged files --- conf/modules.config | 1 + 1 file changed, 1 insertion(+) diff --git a/conf/modules.config b/conf/modules.config index 37a55c1a..cad31ce3 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -386,6 +386,7 @@ process { withName: 'PIGZ_UNCOMPRESS' { tag = {"${meta.id}"} + stageInMode = 'copy' } withName: 'NFCORE_SMRNASEQ:MIRDEEP2:MIRDEEP2_MAPPER' { From 41b5cbfd37a8023cee6973a65a69074d7ecf1e6d Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Mon, 16 Sep 2024 15:18:20 +0000 Subject: [PATCH 4/8] remove unused local module --- modules/local/mirdeep2_prepare.nf | 31 ------------------------------- subworkflows/local/mirdeep2.nf | 1 - 2 files changed, 32 deletions(-) delete mode 100644 modules/local/mirdeep2_prepare.nf diff --git a/modules/local/mirdeep2_prepare.nf b/modules/local/mirdeep2_prepare.nf deleted file mode 100644 index ce66b9f1..00000000 --- a/modules/local/mirdeep2_prepare.nf +++ /dev/null @@ -1,31 +0,0 @@ -process MIRDEEP2_PIGZ { - label 'process_low' - tag "$meta.id" - - // TODO maybe create a mulled container and uncompress within mirdeep2_mapper? - conda 'bioconda::bioconvert=1.1.1' - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bioconvert:1.1.1--pyhdfd78af_0' : - 'biocontainers/bioconvert:1.1.1--pyhdfd78af_0' }" - - input: - tuple val(meta), path(reads) - - output: - tuple val(meta), path("*.{fastq,fq}"), emit: reads - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - """ - pigz -f -d -p $task.cpus $reads - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) - END_VERSIONS - """ - -} diff --git a/subworkflows/local/mirdeep2.nf b/subworkflows/local/mirdeep2.nf index f83415a5..40dd9f29 100644 --- a/subworkflows/local/mirdeep2.nf +++ b/subworkflows/local/mirdeep2.nf @@ -2,7 +2,6 @@ // Quantify mirna with bowtie and mirtop // -include { MIRDEEP2_PIGZ } from '../../modules/local/mirdeep2_prepare' include { PIGZ_UNCOMPRESS } from '../../modules/nf-core/pigz/uncompress/main' include { MIRDEEP2_MAPPER } from '../../modules/local/mirdeep2_mapper' include { MIRDEEP2_RUN } from '../../modules/local/mirdeep2_run' From b54447f235120eea13afeb9df48204659ca07ed9 Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Mon, 16 Sep 2024 15:23:03 +0000 Subject: [PATCH 5/8] remove unused local mirtop --- modules/local/mirtop_quant.nf | 43 ----------------------------------- 1 file changed, 43 deletions(-) delete mode 100644 modules/local/mirtop_quant.nf diff --git a/modules/local/mirtop_quant.nf b/modules/local/mirtop_quant.nf deleted file mode 100644 index cdb6d32b..00000000 --- a/modules/local/mirtop_quant.nf +++ /dev/null @@ -1,43 +0,0 @@ -process MIRTOP_QUANT { - label 'process_medium' - - conda 'mirtop=0.4.25 bioconda::samtools=1.20 conda-forge::r-base=4.1.1 conda-forge::r-data.table=1.14.2 python=3.7' - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-0c13ef770dd7cc5c76c2ce23ba6669234cf03385:63be019f50581cc5dfe4fc0f73ae50f2d4d661f7-0' : - 'biocontainers/mulled-v2-0c13ef770dd7cc5c76c2ce23ba6669234cf03385:63be019f50581cc5dfe4fc0f73ae50f2d4d661f7-0' }" - - input: - path ("bams/*") - path hairpin - path gtf - val mirtrace_species - - output: - path "mirtop/mirtop.gff" , emit: mirtop_gff - path "mirtop/mirtop.tsv" , emit: mirtop_table - path "mirtop/mirtop_rawData.tsv", emit: mirtop_rawdata - path "mirtop/stats/*" , emit: logs - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def filter_species = params.mirgenedb ? params.mirgenedb_species : mirtrace_species - """ - #Cleanup the GTF if mirbase html form is broken - GTF="$gtf" - sed 's/>/>/g' \$GTF | sed 's#
#\\n#g' | sed 's#

##g' | sed 's#

##g' | sed -e :a -e '/^\\n*\$/{\$d;N;};/\\n\$/ba' > \${GTF}_html_cleaned.gtf - mirtop gff --hairpin $hairpin --gtf \${GTF}_html_cleaned.gtf -o mirtop --sps $filter_species ./bams/* - mirtop counts --hairpin $hairpin --gtf \${GTF}_html_cleaned.gtf -o mirtop --sps $filter_species --add-extra --gff mirtop/mirtop.gff - mirtop export --format isomir --hairpin $hairpin --gtf \${GTF}_html_cleaned.gtf --sps $filter_species -o mirtop mirtop/mirtop.gff - mirtop stats mirtop/mirtop.gff --out mirtop/stats - mv mirtop/stats/mirtop_stats.log mirtop/stats/full_mirtop_stats.log - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - mirtop: \$(echo \$(mirtop --version 2>&1) | sed 's/^.*mirtop //') - END_VERSIONS - """ - -} From 9a24f8dbd3c2a357418ddbbce1f8709fd0bcbf9e Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Mon, 16 Sep 2024 15:30:30 +0000 Subject: [PATCH 6/8] remove unused local mirtop quant --- conf/modules.config | 9 +-------- subworkflows/local/mirna_quant.nf | 1 - 2 files changed, 1 insertion(+), 9 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 0452d94b..5dc3c678 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -329,14 +329,7 @@ process { enabled: params.save_aligned_mirna_quant ] } - withName: 'MIRTOP_QUANT' { - publishDir = [ - //mirtop already part of the output folder - path: { "${params.outdir}/mirna_quant/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } + // Mirtop diff --git a/subworkflows/local/mirna_quant.nf b/subworkflows/local/mirna_quant.nf index e8a99a4c..cbca7956 100644 --- a/subworkflows/local/mirna_quant.nf +++ b/subworkflows/local/mirna_quant.nf @@ -19,7 +19,6 @@ include { BAM_SORT_STATS_SAMTOOLS as BAM_STATS_MATURE BAM_SORT_STATS_SAMTOOLS as BAM_STATS_HAIRPIN } from '../nf-core/bam_sort_stats_samtools' include { SEQCLUSTER_COLLAPSE } from '../../modules/nf-core/seqcluster/collapse/main' -include { MIRTOP_QUANT } from '../../modules/local/mirtop_quant.nf' include { TABLE_MERGE } from '../../modules/local/datatable_merge/datatable_merge.nf' include { EDGER_QC } from '../../modules/local/edger_qc/edger_qc.nf' include { BAM_STATS_MIRNA_MIRTOP } from '../../subworkflows/nf-core/bam_stats_mirna_mirtop/main' From 21b552222dc0a59a9eea8935fa74e88835684226 Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Tue, 17 Sep 2024 17:20:48 +0000 Subject: [PATCH 7/8] save intermediates in tests --- conf/test_contamination_tech_reps.config | 3 +-- conf/test_mirgenedb.config | 3 +-- conf/test_skipfastp.config | 3 +-- conf/test_technical_repeats.config | 3 +-- conf/test_umi.config | 7 ++++--- 5 files changed, 8 insertions(+), 11 deletions(-) diff --git a/conf/test_contamination_tech_reps.config b/conf/test_contamination_tech_reps.config index ffcfafb6..f164bde4 100644 --- a/conf/test_contamination_tech_reps.config +++ b/conf/test_contamination_tech_reps.config @@ -24,8 +24,7 @@ params { input = 'https://github.com/nf-core/test-datasets/raw/smrnaseq/samplesheet/v2.0/samplesheet_technical_repeats_short.csv' mirtrace_species = 'hsa' - save_merged = false - save_aligned_mirna_quant = false + save_intermediates = true skip_multiqc = true skip_mirdeep = true diff --git a/conf/test_mirgenedb.config b/conf/test_mirgenedb.config index 8bd1c46a..ea7babb4 100644 --- a/conf/test_mirgenedb.config +++ b/conf/test_mirgenedb.config @@ -31,8 +31,7 @@ params { mirgenedb_species = "Hsa" skip_mirdeep = true - save_merged = false - save_aligned_mirna_quant = false + save_intermediates = true } diff --git a/conf/test_skipfastp.config b/conf/test_skipfastp.config index 6ccd228d..82f2f2f8 100644 --- a/conf/test_skipfastp.config +++ b/conf/test_skipfastp.config @@ -29,8 +29,7 @@ params { mirtrace_species = 'hsa' skip_mirdeep = true skip_fastp = true - save_merged = false - save_aligned_mirna_quant = false + save_intermediates = true } // Do not include any additional config so it defaults to protocol custom diff --git a/conf/test_technical_repeats.config b/conf/test_technical_repeats.config index b462a5ad..2c75e9bc 100644 --- a/conf/test_technical_repeats.config +++ b/conf/test_technical_repeats.config @@ -26,8 +26,7 @@ params { mirtrace_species = 'hsa' skip_mirdeep = true - save_merged = false - save_aligned_mirna_quant = false + save_intermediates = true skip_fastqc = true skip_multiqc = true diff --git a/conf/test_umi.config b/conf/test_umi.config index 4cb1be2b..16bc7256 100644 --- a/conf/test_umi.config +++ b/conf/test_umi.config @@ -30,9 +30,10 @@ params { //UMI Specific testcase with_umi = true - umitools_extract_method = 'regex' - umitools_bc_pattern = '.+(?PAACTGTAGGCACCATCAAT){s<=2}(?P.{12})(?P.*)' - save_umi_intermeds = true + umitools_extract_method = 'regex' + umitools_bc_pattern = '.+(?PAACTGTAGGCACCATCAAT){s<=2}(?P.{12})(?P.*)' + save_umi_intermeds = true + save_intermediates = true } // Include illumina config to run test without additional profiles From a41a338d0de09ecaf69c6905c70dda0c6b5154ed Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Tue, 17 Sep 2024 17:36:04 +0000 Subject: [PATCH 8/8] save intermediates --- conf/test_nextflex.config | 1 + 1 file changed, 1 insertion(+) diff --git a/conf/test_nextflex.config b/conf/test_nextflex.config index 6967c933..4db4a2d3 100644 --- a/conf/test_nextflex.config +++ b/conf/test_nextflex.config @@ -28,6 +28,7 @@ params { mirtrace_species = 'hsa' skip_mirdeep = true + save_intermediates = true }