From ec1f25b9678f0ad0971784ea873e1da10a1fcf1f Mon Sep 17 00:00:00 2001
From: atrigila <18577080+atrigila@users.noreply.github.com>
Date: Mon, 16 Sep 2024 13:32:08 +0000
Subject: [PATCH 1/8] add nf-core pigz uncompress
---
conf/modules.config | 5 ++
modules.json | 5 ++
modules/nf-core/pigz/uncompress/main.nf | 48 +++++++++++++++++++
modules/nf-core/pigz/uncompress/meta.yml | 42 ++++++++++++++++
.../pigz/uncompress/tests/main.nf.test | 33 +++++++++++++
.../pigz/uncompress/tests/main.nf.test.snap | 35 ++++++++++++++
.../nf-core/pigz/uncompress/tests/tags.yml | 2 +
subworkflows/local/mirdeep2.nf | 7 +--
8 files changed, 174 insertions(+), 3 deletions(-)
create mode 100644 modules/nf-core/pigz/uncompress/main.nf
create mode 100644 modules/nf-core/pigz/uncompress/meta.yml
create mode 100644 modules/nf-core/pigz/uncompress/tests/main.nf.test
create mode 100644 modules/nf-core/pigz/uncompress/tests/main.nf.test.snap
create mode 100644 modules/nf-core/pigz/uncompress/tests/tags.yml
diff --git a/conf/modules.config b/conf/modules.config
index f1992ea2..37a55c1a 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -383,6 +383,11 @@ process {
//
// MIRDEEP
//
+
+ withName: 'PIGZ_UNCOMPRESS' {
+ tag = {"${meta.id}"}
+ }
+
withName: 'NFCORE_SMRNASEQ:MIRDEEP2:MIRDEEP2_MAPPER' {
publishDir = [
path: { "${params.outdir}/mirdeep2/mapper" },
diff --git a/modules.json b/modules.json
index 5d7f9eeb..dd1c1a5e 100644
--- a/modules.json
+++ b/modules.json
@@ -45,6 +45,11 @@
"git_sha": "b80f5fd12ff7c43938f424dd76392a2704fa2396",
"installed_by": ["modules"]
},
+ "pigz/uncompress": {
+ "branch": "master",
+ "git_sha": "c00055a0b13d622b4f1f51a8e5be31deaf99ded7",
+ "installed_by": ["modules"]
+ },
"samtools/flagstat": {
"branch": "master",
"git_sha": "46eca555142d6e597729fcb682adcc791796f514",
diff --git a/modules/nf-core/pigz/uncompress/main.nf b/modules/nf-core/pigz/uncompress/main.nf
new file mode 100644
index 00000000..11e43dff
--- /dev/null
+++ b/modules/nf-core/pigz/uncompress/main.nf
@@ -0,0 +1,48 @@
+process PIGZ_UNCOMPRESS {
+ label 'process_low'
+ //stageInMode 'copy' // this directive can be set in case the original input should be kept
+
+ conda "conda-forge::pigz"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/pigz:2.8':
+ 'biocontainers/pigz:2.8' }"
+
+ input:
+ tuple val(meta), path(zip)
+
+ output:
+ tuple val(meta), path("${uncompressed_filename}") , emit: file
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ uncompressed_filename = zip.toString() - '.gz'
+ // calling pigz -f to make it follow symlinks
+ """
+ unpigz \\
+ -p $task.cpus \\
+ -fk \\
+ $args \\
+ ${zip}
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' ))
+ END_VERSIONS
+ """
+
+ stub:
+ def args = task.ext.args ?: ''
+ uncompressed_filename = zip.toString() - '.gz'
+ """
+ touch ${zip.dropRight(3)}
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\w*//' ))
+ END_VERSIONS
+ """
+}
diff --git a/modules/nf-core/pigz/uncompress/meta.yml b/modules/nf-core/pigz/uncompress/meta.yml
new file mode 100644
index 00000000..c2d16cd4
--- /dev/null
+++ b/modules/nf-core/pigz/uncompress/meta.yml
@@ -0,0 +1,42 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json
+name: "pigz_uncompress"
+description: write your description here
+keywords:
+ - uncompress
+ - gzip
+ - parallelized
+tools:
+ - "pigz":
+ description: "Parallel implementation of the gzip algorithm."
+ homepage: "https://zlib.net/pigz/"
+ documentation: "https://zlib.net/pigz/pigz.pdf"
+
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. `[ id:'test']`
+ - zip:
+ type: file
+ description: Gzipped file
+ pattern: "*.{gzip}"
+
+output:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. `[ id:'test']`
+ - file:
+ type: file
+ description: File to compress
+ pattern: "*"
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+
+authors:
+ - "@lrauschning"
diff --git a/modules/nf-core/pigz/uncompress/tests/main.nf.test b/modules/nf-core/pigz/uncompress/tests/main.nf.test
new file mode 100644
index 00000000..62ab27e2
--- /dev/null
+++ b/modules/nf-core/pigz/uncompress/tests/main.nf.test
@@ -0,0 +1,33 @@
+nextflow_process {
+
+ name "Test Process PIGZ_UNCOMPRESS"
+ script "modules/nf-core/pigz/uncompress/main.nf"
+ process "PIGZ_UNCOMPRESS"
+ tag "modules"
+ tag "modules_nfcore"
+ tag "pigz"
+ tag "pigz/uncompress"
+
+ test("Should run without failures") {
+
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ input[0] = [ [ id:'test'],
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ then {
+ assert process.success
+ assert snapshot(process.out).match()
+ }
+
+ }
+
+}
\ No newline at end of file
diff --git a/modules/nf-core/pigz/uncompress/tests/main.nf.test.snap b/modules/nf-core/pigz/uncompress/tests/main.nf.test.snap
new file mode 100644
index 00000000..126dd7d6
--- /dev/null
+++ b/modules/nf-core/pigz/uncompress/tests/main.nf.test.snap
@@ -0,0 +1,35 @@
+{
+ "Should run without failures": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test"
+ },
+ "test_1.fastq:md5,4161df271f9bfcd25d5845a1e220dbec"
+ ]
+ ],
+ "1": [
+ "versions.yml:md5,a2d5ce72baa8b303f25afb9cf094f683"
+ ],
+ "file": [
+ [
+ {
+ "id": "test"
+ },
+ "test_1.fastq:md5,4161df271f9bfcd25d5845a1e220dbec"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,a2d5ce72baa8b303f25afb9cf094f683"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.03.0"
+ },
+ "timestamp": "2024-05-15T16:43:21.55056643"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/pigz/uncompress/tests/tags.yml b/modules/nf-core/pigz/uncompress/tests/tags.yml
new file mode 100644
index 00000000..6719a90a
--- /dev/null
+++ b/modules/nf-core/pigz/uncompress/tests/tags.yml
@@ -0,0 +1,2 @@
+pigz/uncompress:
+ - modules/nf-core/pigz/uncompress/**
diff --git a/subworkflows/local/mirdeep2.nf b/subworkflows/local/mirdeep2.nf
index 76afa8a4..f83415a5 100644
--- a/subworkflows/local/mirdeep2.nf
+++ b/subworkflows/local/mirdeep2.nf
@@ -3,6 +3,7 @@
//
include { MIRDEEP2_PIGZ } from '../../modules/local/mirdeep2_prepare'
+include { PIGZ_UNCOMPRESS } from '../../modules/nf-core/pigz/uncompress/main'
include { MIRDEEP2_MAPPER } from '../../modules/local/mirdeep2_mapper'
include { MIRDEEP2_RUN } from '../../modules/local/mirdeep2_run'
@@ -17,10 +18,10 @@ workflow MIRDEEP2 {
main:
ch_versions = Channel.empty()
- MIRDEEP2_PIGZ ( reads )
- ch_versions = ch_versions.mix(MIRDEEP2_PIGZ.out.versions.first())
+ PIGZ_UNCOMPRESS ( reads )
+ ch_versions = ch_versions.mix(PIGZ_UNCOMPRESS.out.versions.first())
- MIRDEEP2_MAPPER ( MIRDEEP2_PIGZ.out.reads, index )
+ MIRDEEP2_MAPPER ( PIGZ_UNCOMPRESS.out.file, index )
ch_versions = ch_versions.mix(MIRDEEP2_MAPPER.out.versions.first())
MIRDEEP2_RUN ( fasta.map{meta,file->file}, MIRDEEP2_MAPPER.out.mirdeep2_inputs, hairpin, mature )
From c2b82b87ab13d427e82736d1846e4a1093815eb1 Mon Sep 17 00:00:00 2001
From: atrigila <18577080+atrigila@users.noreply.github.com>
Date: Mon, 16 Sep 2024 13:32:17 +0000
Subject: [PATCH 2/8] update changelog
---
CHANGELOG.md | 2 ++
1 file changed, 2 insertions(+)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 44a03835..45c238e9 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -29,6 +29,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- [[#405]](https://github.com/nf-core/smrnaseq/pull/405) - Fix [Umicollapse algo wrong set](https://github.com/nf-core/smrnaseq/issues/404) - Fix potential bug in Umicollapse (not effective as we do not allow PE data in smrnaseq - but for consistency)
- [[#420]](https://github.com/nf-core/smrnaseq/pull/420) - Fix [mirTrace produces an error in test nextflex](https://github.com/nf-core/smrnaseq/issues/419) - Allow config mode to be used in mirtrace/qc
- [[#425]](https://github.com/nf-core/smrnaseq/pull/425) - Raise [minimum required NXF version for pipeline](https://github.com/nf-core/smrnaseq/issues/424) - usage of `arity` in some modules now requires this
+- [[#426]](https://github.com/nf-core/smrnaseq/pull/426) - Add [nf-core mirtop](https://github.com/nf-core/smrnaseq/issues/417) - replace local `mirtop`
+- [[#427]](https://github.com/nf-core/smrnaseq/pull/427) - Add [nf-core pigz uncompress](https://github.com/nf-core/smrnaseq/issues/422) - replace local `mirdeep_pigz`
## v2.3.1 - 2024-04-18 - Gray Zinc Dalmation Patch
From 6090c0fb810976516150b811a23a859dd526dafa Mon Sep 17 00:00:00 2001
From: atrigila <18577080+atrigila@users.noreply.github.com>
Date: Mon, 16 Sep 2024 15:18:09 +0000
Subject: [PATCH 3/8] copy staged files
---
conf/modules.config | 1 +
1 file changed, 1 insertion(+)
diff --git a/conf/modules.config b/conf/modules.config
index 37a55c1a..cad31ce3 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -386,6 +386,7 @@ process {
withName: 'PIGZ_UNCOMPRESS' {
tag = {"${meta.id}"}
+ stageInMode = 'copy'
}
withName: 'NFCORE_SMRNASEQ:MIRDEEP2:MIRDEEP2_MAPPER' {
From 41b5cbfd37a8023cee6973a65a69074d7ecf1e6d Mon Sep 17 00:00:00 2001
From: atrigila <18577080+atrigila@users.noreply.github.com>
Date: Mon, 16 Sep 2024 15:18:20 +0000
Subject: [PATCH 4/8] remove unused local module
---
modules/local/mirdeep2_prepare.nf | 31 -------------------------------
subworkflows/local/mirdeep2.nf | 1 -
2 files changed, 32 deletions(-)
delete mode 100644 modules/local/mirdeep2_prepare.nf
diff --git a/modules/local/mirdeep2_prepare.nf b/modules/local/mirdeep2_prepare.nf
deleted file mode 100644
index ce66b9f1..00000000
--- a/modules/local/mirdeep2_prepare.nf
+++ /dev/null
@@ -1,31 +0,0 @@
-process MIRDEEP2_PIGZ {
- label 'process_low'
- tag "$meta.id"
-
- // TODO maybe create a mulled container and uncompress within mirdeep2_mapper?
- conda 'bioconda::bioconvert=1.1.1'
- container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/bioconvert:1.1.1--pyhdfd78af_0' :
- 'biocontainers/bioconvert:1.1.1--pyhdfd78af_0' }"
-
- input:
- tuple val(meta), path(reads)
-
- output:
- tuple val(meta), path("*.{fastq,fq}"), emit: reads
- path "versions.yml" , emit: versions
-
- when:
- task.ext.when == null || task.ext.when
-
- script:
- """
- pigz -f -d -p $task.cpus $reads
-
- cat <<-END_VERSIONS > versions.yml
- "${task.process}":
- pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' )
- END_VERSIONS
- """
-
-}
diff --git a/subworkflows/local/mirdeep2.nf b/subworkflows/local/mirdeep2.nf
index f83415a5..40dd9f29 100644
--- a/subworkflows/local/mirdeep2.nf
+++ b/subworkflows/local/mirdeep2.nf
@@ -2,7 +2,6 @@
// Quantify mirna with bowtie and mirtop
//
-include { MIRDEEP2_PIGZ } from '../../modules/local/mirdeep2_prepare'
include { PIGZ_UNCOMPRESS } from '../../modules/nf-core/pigz/uncompress/main'
include { MIRDEEP2_MAPPER } from '../../modules/local/mirdeep2_mapper'
include { MIRDEEP2_RUN } from '../../modules/local/mirdeep2_run'
From b54447f235120eea13afeb9df48204659ca07ed9 Mon Sep 17 00:00:00 2001
From: atrigila <18577080+atrigila@users.noreply.github.com>
Date: Mon, 16 Sep 2024 15:23:03 +0000
Subject: [PATCH 5/8] remove unused local mirtop
---
modules/local/mirtop_quant.nf | 43 -----------------------------------
1 file changed, 43 deletions(-)
delete mode 100644 modules/local/mirtop_quant.nf
diff --git a/modules/local/mirtop_quant.nf b/modules/local/mirtop_quant.nf
deleted file mode 100644
index cdb6d32b..00000000
--- a/modules/local/mirtop_quant.nf
+++ /dev/null
@@ -1,43 +0,0 @@
-process MIRTOP_QUANT {
- label 'process_medium'
-
- conda 'mirtop=0.4.25 bioconda::samtools=1.20 conda-forge::r-base=4.1.1 conda-forge::r-data.table=1.14.2 python=3.7'
- container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/mulled-v2-0c13ef770dd7cc5c76c2ce23ba6669234cf03385:63be019f50581cc5dfe4fc0f73ae50f2d4d661f7-0' :
- 'biocontainers/mulled-v2-0c13ef770dd7cc5c76c2ce23ba6669234cf03385:63be019f50581cc5dfe4fc0f73ae50f2d4d661f7-0' }"
-
- input:
- path ("bams/*")
- path hairpin
- path gtf
- val mirtrace_species
-
- output:
- path "mirtop/mirtop.gff" , emit: mirtop_gff
- path "mirtop/mirtop.tsv" , emit: mirtop_table
- path "mirtop/mirtop_rawData.tsv", emit: mirtop_rawdata
- path "mirtop/stats/*" , emit: logs
- path "versions.yml" , emit: versions
-
- when:
- task.ext.when == null || task.ext.when
-
- script:
- def filter_species = params.mirgenedb ? params.mirgenedb_species : mirtrace_species
- """
- #Cleanup the GTF if mirbase html form is broken
- GTF="$gtf"
- sed 's/>/>/g' \$GTF | sed 's#
#\\n#g' | sed 's#
##g' | sed -e :a -e '/^\\n*\$/{\$d;N;};/\\n\$/ba' > \${GTF}_html_cleaned.gtf
- mirtop gff --hairpin $hairpin --gtf \${GTF}_html_cleaned.gtf -o mirtop --sps $filter_species ./bams/*
- mirtop counts --hairpin $hairpin --gtf \${GTF}_html_cleaned.gtf -o mirtop --sps $filter_species --add-extra --gff mirtop/mirtop.gff
- mirtop export --format isomir --hairpin $hairpin --gtf \${GTF}_html_cleaned.gtf --sps $filter_species -o mirtop mirtop/mirtop.gff
- mirtop stats mirtop/mirtop.gff --out mirtop/stats
- mv mirtop/stats/mirtop_stats.log mirtop/stats/full_mirtop_stats.log
-
- cat <<-END_VERSIONS > versions.yml
- "${task.process}":
- mirtop: \$(echo \$(mirtop --version 2>&1) | sed 's/^.*mirtop //')
- END_VERSIONS
- """
-
-}
From 9a24f8dbd3c2a357418ddbbce1f8709fd0bcbf9e Mon Sep 17 00:00:00 2001
From: atrigila <18577080+atrigila@users.noreply.github.com>
Date: Mon, 16 Sep 2024 15:30:30 +0000
Subject: [PATCH 6/8] remove unused local mirtop quant
---
conf/modules.config | 9 +--------
subworkflows/local/mirna_quant.nf | 1 -
2 files changed, 1 insertion(+), 9 deletions(-)
diff --git a/conf/modules.config b/conf/modules.config
index 0452d94b..5dc3c678 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -329,14 +329,7 @@ process {
enabled: params.save_aligned_mirna_quant
]
}
- withName: 'MIRTOP_QUANT' {
- publishDir = [
- //mirtop already part of the output folder
- path: { "${params.outdir}/mirna_quant/" },
- mode: params.publish_dir_mode,
- saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
- ]
- }
+
// Mirtop
diff --git a/subworkflows/local/mirna_quant.nf b/subworkflows/local/mirna_quant.nf
index e8a99a4c..cbca7956 100644
--- a/subworkflows/local/mirna_quant.nf
+++ b/subworkflows/local/mirna_quant.nf
@@ -19,7 +19,6 @@ include { BAM_SORT_STATS_SAMTOOLS as BAM_STATS_MATURE
BAM_SORT_STATS_SAMTOOLS as BAM_STATS_HAIRPIN } from '../nf-core/bam_sort_stats_samtools'
include { SEQCLUSTER_COLLAPSE } from '../../modules/nf-core/seqcluster/collapse/main'
-include { MIRTOP_QUANT } from '../../modules/local/mirtop_quant.nf'
include { TABLE_MERGE } from '../../modules/local/datatable_merge/datatable_merge.nf'
include { EDGER_QC } from '../../modules/local/edger_qc/edger_qc.nf'
include { BAM_STATS_MIRNA_MIRTOP } from '../../subworkflows/nf-core/bam_stats_mirna_mirtop/main'
From 21b552222dc0a59a9eea8935fa74e88835684226 Mon Sep 17 00:00:00 2001
From: atrigila <18577080+atrigila@users.noreply.github.com>
Date: Tue, 17 Sep 2024 17:20:48 +0000
Subject: [PATCH 7/8] save intermediates in tests
---
conf/test_contamination_tech_reps.config | 3 +--
conf/test_mirgenedb.config | 3 +--
conf/test_skipfastp.config | 3 +--
conf/test_technical_repeats.config | 3 +--
conf/test_umi.config | 7 ++++---
5 files changed, 8 insertions(+), 11 deletions(-)
diff --git a/conf/test_contamination_tech_reps.config b/conf/test_contamination_tech_reps.config
index ffcfafb6..f164bde4 100644
--- a/conf/test_contamination_tech_reps.config
+++ b/conf/test_contamination_tech_reps.config
@@ -24,8 +24,7 @@ params {
input = 'https://github.com/nf-core/test-datasets/raw/smrnaseq/samplesheet/v2.0/samplesheet_technical_repeats_short.csv'
mirtrace_species = 'hsa'
- save_merged = false
- save_aligned_mirna_quant = false
+ save_intermediates = true
skip_multiqc = true
skip_mirdeep = true
diff --git a/conf/test_mirgenedb.config b/conf/test_mirgenedb.config
index 8bd1c46a..ea7babb4 100644
--- a/conf/test_mirgenedb.config
+++ b/conf/test_mirgenedb.config
@@ -31,8 +31,7 @@ params {
mirgenedb_species = "Hsa"
skip_mirdeep = true
- save_merged = false
- save_aligned_mirna_quant = false
+ save_intermediates = true
}
diff --git a/conf/test_skipfastp.config b/conf/test_skipfastp.config
index 6ccd228d..82f2f2f8 100644
--- a/conf/test_skipfastp.config
+++ b/conf/test_skipfastp.config
@@ -29,8 +29,7 @@ params {
mirtrace_species = 'hsa'
skip_mirdeep = true
skip_fastp = true
- save_merged = false
- save_aligned_mirna_quant = false
+ save_intermediates = true
}
// Do not include any additional config so it defaults to protocol custom
diff --git a/conf/test_technical_repeats.config b/conf/test_technical_repeats.config
index b462a5ad..2c75e9bc 100644
--- a/conf/test_technical_repeats.config
+++ b/conf/test_technical_repeats.config
@@ -26,8 +26,7 @@ params {
mirtrace_species = 'hsa'
skip_mirdeep = true
- save_merged = false
- save_aligned_mirna_quant = false
+ save_intermediates = true
skip_fastqc = true
skip_multiqc = true
diff --git a/conf/test_umi.config b/conf/test_umi.config
index 4cb1be2b..16bc7256 100644
--- a/conf/test_umi.config
+++ b/conf/test_umi.config
@@ -30,9 +30,10 @@ params {
//UMI Specific testcase
with_umi = true
- umitools_extract_method = 'regex'
- umitools_bc_pattern = '.+(?P