From 4106d8e11ef11fe49f029bf4a1a60ef9bc3bab4d Mon Sep 17 00:00:00 2001
From: atrigila <18577080+atrigila@users.noreply.github.com>
Date: Fri, 6 Dec 2024 20:18:52 +0000
Subject: [PATCH 01/10] refactor: add pivot functions

---
 bin/pivot_longer.R                         | 29 ++++++++++++++++++
 bin/pivot_wider.R                          | 32 ++++++++++++++++++++
 modules/local/pivot/longer/environment.yml |  5 ++++
 modules/local/pivot/longer/main.nf         | 29 ++++++++++++++++++
 modules/local/pivot/wider/environment.yml  |  5 ++++
 modules/local/pivot/wider/main.nf          | 35 ++++++++++++++++++++++
 6 files changed, 135 insertions(+)
 create mode 100755 bin/pivot_longer.R
 create mode 100755 bin/pivot_wider.R
 create mode 100644 modules/local/pivot/longer/environment.yml
 create mode 100644 modules/local/pivot/longer/main.nf
 create mode 100644 modules/local/pivot/wider/environment.yml
 create mode 100644 modules/local/pivot/wider/main.nf

diff --git a/bin/pivot_longer.R b/bin/pivot_longer.R
new file mode 100755
index 00000000..7b84fa4d
--- /dev/null
+++ b/bin/pivot_longer.R
@@ -0,0 +1,29 @@
+#!/usr/bin/env Rscript
+
+library(optparse)
+library(tidyr)
+library(vroom)
+
+option_list <- list(
+    make_option(c("--input"), type = "character", help = "Input TSV file", metavar = "character"),
+    make_option(c("--output"), type = "character", help = "Output CSV file", metavar = "character")
+)
+
+opt_parser <- OptionParser(option_list = option_list)
+opt <- parse_args(opt_parser)
+
+# Read CSV with vroom
+data <- vroom::vroom(opt$input, delim = "\t", col_types = c(.default = "c"))
+
+last_col <- names(data)[ncol(data)]
+
+# Convert from wide to long format
+long_data <- data %>%
+    pivot_longer(
+        cols = last_col,
+        names_to = "Sample_ID",
+        values_to = "Counts"
+    )
+
+vroom_write(long_data, opt$output, delim = ",")
+
diff --git a/bin/pivot_wider.R b/bin/pivot_wider.R
new file mode 100755
index 00000000..a61c37be
--- /dev/null
+++ b/bin/pivot_wider.R
@@ -0,0 +1,32 @@
+#!/usr/bin/env Rscript
+
+library(optparse)
+library(tidyr)
+library(vroom)
+library(dplyr)
+
+option_list <- list(
+    make_option(c("--input"), type = "character", help = "Input CSV file in long format", metavar = "character"),
+    make_option(c("--output"), type = "character", help = "Output CSV file in wide format", metavar = "character")
+)
+
+opt_parser <- OptionParser(option_list = option_list)
+opt <- parse_args(opt_parser)
+
+# Read CSV with vroom
+long_data <- vroom::vroom(opt$input, delim = ",",
+        col_types = c(
+        Counts = "d",
+        .default = "c"
+    ))
+
+# Transform to wide format
+wide_data <- long_data %>%
+    pivot_wider(
+        names_from = Sample_ID,
+        values_from = Counts,
+        values_fill = 0
+    )
+
+# Export wide format
+vroom_write(wide_data, opt$output, delim = "\t")
diff --git a/modules/local/pivot/longer/environment.yml b/modules/local/pivot/longer/environment.yml
new file mode 100644
index 00000000..379e91b5
--- /dev/null
+++ b/modules/local/pivot/longer/environment.yml
@@ -0,0 +1,5 @@
+channels:
+  - conda-forge
+dependencies:
+  - conda-forge::r-optparse
+  - conda-forge::r-tidyverse
diff --git a/modules/local/pivot/longer/main.nf b/modules/local/pivot/longer/main.nf
new file mode 100644
index 00000000..c4d8a14a
--- /dev/null
+++ b/modules/local/pivot/longer/main.nf
@@ -0,0 +1,29 @@
+process PIVOT_LONGER {
+    tag"$meta.id"
+    label 'process_single'
+
+    conda "${moduleDir}/environment.yml"
+    container "community.wave.seqera.io/library/r-optparse_r-tidyverse_r-vroom:3cbb224fea84a0e1"
+
+    input:
+    tuple val(meta), path(tsv)
+
+    output:
+    tuple val(meta), path("*_long.csv") , emit: csv
+    path "versions.yml"                 , emit: versions
+
+    script:
+    """
+    pivot_longer.R \\
+        --input ${tsv} \\
+        --output ${meta.id}_long.csv
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        r-base: \$(echo \$(R --version 2>&1) | sed 's/^.*R version //; s/ .*\$//')
+        tidyr: \$(Rscript -e "library(limma); cat(as.character(packageVersion('tidyr')))")
+        optparse: \$(Rscript -e "library(edgeR); cat(as.character(packageVersion('optparse')))")
+    END_VERSIONS
+    """
+
+}
diff --git a/modules/local/pivot/wider/environment.yml b/modules/local/pivot/wider/environment.yml
new file mode 100644
index 00000000..379e91b5
--- /dev/null
+++ b/modules/local/pivot/wider/environment.yml
@@ -0,0 +1,5 @@
+channels:
+  - conda-forge
+dependencies:
+  - conda-forge::r-optparse
+  - conda-forge::r-tidyverse
diff --git a/modules/local/pivot/wider/main.nf b/modules/local/pivot/wider/main.nf
new file mode 100644
index 00000000..b693eabb
--- /dev/null
+++ b/modules/local/pivot/wider/main.nf
@@ -0,0 +1,35 @@
+process PIVOT_WIDER {
+    tag"$meta.id"
+    label 'process_single'
+
+    conda "${moduleDir}/environment.yml"
+    container "community.wave.seqera.io/library/r-optparse_r-tidyverse_r-vroom:3cbb224fea84a0e1"
+
+    input:
+    tuple val(meta), path(csvs)
+
+    output:
+    tuple val(meta), path("*joined_samples_mirtop.csv") , emit: csv
+    path "versions.yml"                         , emit: versions
+
+    script:
+    """
+    awk 'NR == 1 || FNR > 1' ${csvs.join(' ')} > final_long_results_temp.csv
+
+    pivot_wider.R \\
+        --input final_long_results_temp.csv \\
+        --output ${meta.id}_concatenated_temp.csv
+
+    sort -t\$'\t' -k1,1 ${meta.id}_concatenated_temp.csv > joined_samples_mirtop.csv
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        r-base: \$(echo \$(R --version 2>&1) | sed 's/^.*R version //; s/ .*\$//')
+        tidyr: \$(Rscript -e "library(limma); cat(as.character(packageVersion('tidyr')))")
+        dplyr: \$(Rscript -e "library(limma); cat(as.character(packageVersion('dplyr')))")
+        optparse: \$(Rscript -e "library(edgeR); cat(as.character(packageVersion('optparse')))")
+        vroom: \$(Rscript -e "library(edgeR); cat(as.character(packageVersion('vroom')))")
+    END_VERSIONS
+    """
+
+}

From e093df4b926a40c845a826cd0d895a417d075534 Mon Sep 17 00:00:00 2001
From: atrigila <18577080+atrigila@users.noreply.github.com>
Date: Fri, 6 Dec 2024 20:20:55 +0000
Subject: [PATCH 02/10] refactor: add more resources to wider process

---
 modules/local/pivot/wider/main.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/local/pivot/wider/main.nf b/modules/local/pivot/wider/main.nf
index b693eabb..46aa6a32 100644
--- a/modules/local/pivot/wider/main.nf
+++ b/modules/local/pivot/wider/main.nf
@@ -1,6 +1,6 @@
 process PIVOT_WIDER {
     tag"$meta.id"
-    label 'process_single'
+    label 'process_high'
 
     conda "${moduleDir}/environment.yml"
     container "community.wave.seqera.io/library/r-optparse_r-tidyverse_r-vroom:3cbb224fea84a0e1"

From 32b34a16c98f158d5f846a2f1b7f49fef36b3823 Mon Sep 17 00:00:00 2001
From: atrigila <18577080+atrigila@users.noreply.github.com>
Date: Fri, 6 Dec 2024 20:33:53 +0000
Subject: [PATCH 03/10] refactor: add config to pivot modules

---
 conf/modules.config | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/conf/modules.config b/conf/modules.config
index 2abe9be6..8c99520d 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -468,9 +468,11 @@ process {
         ]
     }
 
-    withName: 'NFCORE_SMRNASEQ:MIRNA_QUANT:CSVTK_JOIN' {
-        ext.args = "--fields 'UID,Read,miRNA,Variant,iso_5p,iso_3p,iso_add3p,iso_snp,iso_5p_nt,iso_3p_nt,iso_add3p_nt,iso_snp_nt' --tabs --outer-join --na \"0\" --out-delimiter \"\t\""
-        ext.prefix = "joined_samples_mirtop"
+    withName: 'NFCORE_SMRNASEQ:MIRNA_QUANT:PIVOT_LONGER' {
+        publishDir = [ enabled: false ]
+    }
+
+    withName: 'NFCORE_SMRNASEQ:MIRNA_QUANT:PIVOT_WIDER' {
         publishDir = [
             path: { "${params.outdir}/mirna_quant/mirtop" },
             mode: params.publish_dir_mode,

From aa8f24b764e321632fb8dd2052a0b402c8b99936 Mon Sep 17 00:00:00 2001
From: atrigila <18577080+atrigila@users.noreply.github.com>
Date: Fri, 6 Dec 2024 20:34:55 +0000
Subject: [PATCH 04/10] refactor: add pivot modules to mirna_quant

---
 subworkflows/local/mirna_quant.nf | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/subworkflows/local/mirna_quant.nf b/subworkflows/local/mirna_quant.nf
index f2157a47..fbdd623c 100644
--- a/subworkflows/local/mirna_quant.nf
+++ b/subworkflows/local/mirna_quant.nf
@@ -24,6 +24,9 @@ include { EDGER_QC               } from '../../modules/local/edger_qc/main'
 include { BAM_STATS_MIRNA_MIRTOP } from '../../subworkflows/nf-core/bam_stats_mirna_mirtop/main'
 include { CSVTK_JOIN             } from '../../modules/nf-core/csvtk/join/main'
 
+include { PIVOT_LONGER           } from '../../modules/local/pivot/longer/main'
+include { PIVOT_WIDER            } from '../../modules/local/pivot/wider/main'
+
 workflow MIRNA_QUANT {
     take:
     ch_reference_mature  // channel: [ val(meta), fasta file]
@@ -105,10 +108,20 @@ workflow MIRNA_QUANT {
         .collect{it[1]}
         .map{it -> return [[id:"TSVs"], it]}
 
-    CSVTK_JOIN ( ch_tsvs )
-    ch_versions = ch_versions.mix(CSVTK_JOIN.out.versions)
+    PIVOT_LONGER( BAM_STATS_MIRNA_MIRTOP.out.counts )
+    ch_versions = ch_versions.mix(PIVOT_LONGER.out.versions)
+
+    ch_long_files = PIVOT_LONGER.out.csv
+        .map { meta, file -> file }
+        .collect()
+        .map { files ->
+            return [[id: "pivoted_files"], files]
+        }
+
+    PIVOT_WIDER( ch_long_files )
+    ch_versions = ch_versions.mix(PIVOT_WIDER.out.versions)
 
-    DATATABLE_MERGE ( CSVTK_JOIN.out.csv )
+    DATATABLE_MERGE ( PIVOT_WIDER.out.csv )
     ch_versions = ch_versions.mix(DATATABLE_MERGE.out.versions)
 
     ch_reads_genome = BOWTIE_MAP_HAIRPIN.out.fastq

From d84f81024a5ce373fafb9f4ffe3bb9f3565349b6 Mon Sep 17 00:00:00 2001
From: atrigila <18577080+atrigila@users.noreply.github.com>
Date: Tue, 10 Dec 2024 00:03:05 +0000
Subject: [PATCH 05/10] fix: library names, output extension

---
 modules/local/pivot/longer/main.nf |  4 ++--
 modules/local/pivot/wider/main.nf  | 12 ++++++------
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/modules/local/pivot/longer/main.nf b/modules/local/pivot/longer/main.nf
index c4d8a14a..05ba879c 100644
--- a/modules/local/pivot/longer/main.nf
+++ b/modules/local/pivot/longer/main.nf
@@ -21,8 +21,8 @@ process PIVOT_LONGER {
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
         r-base: \$(echo \$(R --version 2>&1) | sed 's/^.*R version //; s/ .*\$//')
-        tidyr: \$(Rscript -e "library(limma); cat(as.character(packageVersion('tidyr')))")
-        optparse: \$(Rscript -e "library(edgeR); cat(as.character(packageVersion('optparse')))")
+        tidyr: \$(Rscript -e "library(tidyr); cat(as.character(packageVersion('tidyr')))")
+        optparse: \$(Rscript -e "library(optparse); cat(as.character(packageVersion('optparse')))")
     END_VERSIONS
     """
 
diff --git a/modules/local/pivot/wider/main.nf b/modules/local/pivot/wider/main.nf
index 46aa6a32..3b2a7333 100644
--- a/modules/local/pivot/wider/main.nf
+++ b/modules/local/pivot/wider/main.nf
@@ -9,8 +9,8 @@ process PIVOT_WIDER {
     tuple val(meta), path(csvs)
 
     output:
-    tuple val(meta), path("*joined_samples_mirtop.csv") , emit: csv
-    path "versions.yml"                         , emit: versions
+    tuple val(meta), path("*joined_samples_mirtop.tsv") , emit: csv
+    path "versions.yml"                                 , emit: versions
 
     script:
     """
@@ -25,10 +25,10 @@ process PIVOT_WIDER {
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
         r-base: \$(echo \$(R --version 2>&1) | sed 's/^.*R version //; s/ .*\$//')
-        tidyr: \$(Rscript -e "library(limma); cat(as.character(packageVersion('tidyr')))")
-        dplyr: \$(Rscript -e "library(limma); cat(as.character(packageVersion('dplyr')))")
-        optparse: \$(Rscript -e "library(edgeR); cat(as.character(packageVersion('optparse')))")
-        vroom: \$(Rscript -e "library(edgeR); cat(as.character(packageVersion('vroom')))")
+        tidyr: \$(Rscript -e "library(tidyr); cat(as.character(packageVersion('tidyr')))")
+        dplyr: \$(Rscript -e "library(dplyr); cat(as.character(packageVersion('dplyr')))")
+        optparse: \$(Rscript -e "library(optparse); cat(as.character(packageVersion('optparse')))")
+        vroom: \$(Rscript -e "library(vroom); cat(as.character(packageVersion('vroom')))")
     END_VERSIONS
     """
 

From 6c21e4c3ad4b8f665ed47ae711439270a5f75a14 Mon Sep 17 00:00:00 2001
From: atrigila <18577080+atrigila@users.noreply.github.com>
Date: Tue, 10 Dec 2024 00:06:41 +0000
Subject: [PATCH 06/10] refactor: rename meta channel

---
 subworkflows/local/mirna_quant.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/subworkflows/local/mirna_quant.nf b/subworkflows/local/mirna_quant.nf
index fbdd623c..4d5b5192 100644
--- a/subworkflows/local/mirna_quant.nf
+++ b/subworkflows/local/mirna_quant.nf
@@ -115,7 +115,7 @@ workflow MIRNA_QUANT {
         .map { meta, file -> file }
         .collect()
         .map { files ->
-            return [[id: "pivoted_files"], files]
+            return [[id: "Long_Files"], files]
         }
 
     PIVOT_WIDER( ch_long_files )

From 33943b1ab721bf21fa69c34a194f46795354ac0a Mon Sep 17 00:00:00 2001
From: atrigila <18577080+atrigila@users.noreply.github.com>
Date: Tue, 10 Dec 2024 00:07:02 +0000
Subject: [PATCH 07/10] test: update tests succeeded size

---
 tests/test_contamination_tech_reps.nf.test | 2 +-
 tests/test_mirgenedb.nf.test               | 2 +-
 tests/test_nextflex.nf.test                | 2 +-
 tests/test_skipfastp.nf.test               | 2 +-
 tests/test_umi.nf.test                     | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/tests/test_contamination_tech_reps.nf.test b/tests/test_contamination_tech_reps.nf.test
index 02266078..010514e1 100644
--- a/tests/test_contamination_tech_reps.nf.test
+++ b/tests/test_contamination_tech_reps.nf.test
@@ -20,7 +20,7 @@ nextflow_pipeline {
             assertAll(
                 { assert workflow.success },
                 { assert snapshot(UTILS.removeNextflowVersion("$outputDir")).match("software_versions") },
-                { assert workflow.trace.succeeded().size() == 100 },
+                { assert workflow.trace.succeeded().size() == 103 },
 
                 { assert snapshot(
                     path("$outputDir/contaminant_filter/filter/Clone1_N1_trimmed.contamination_mqc.yaml").exists(), //TODO see if we can make these deterministic or why they are non-deterministic
diff --git a/tests/test_mirgenedb.nf.test b/tests/test_mirgenedb.nf.test
index 4e08158d..93bb3666 100644
--- a/tests/test_mirgenedb.nf.test
+++ b/tests/test_mirgenedb.nf.test
@@ -19,7 +19,7 @@ nextflow_pipeline {
             assertAll(
                 { assert workflow.success },
                 { assert snapshot(UTILS.removeNextflowVersion("$outputDir")).match("software_versions") },
-                { assert workflow.trace.succeeded().size() == 104 },
+                { assert workflow.trace.succeeded().size() == 107 },
                 { assert workflow.trace.failed().size() == 1 },
 
                 { assert snapshot(
diff --git a/tests/test_nextflex.nf.test b/tests/test_nextflex.nf.test
index 4330c2b0..99b03842 100644
--- a/tests/test_nextflex.nf.test
+++ b/tests/test_nextflex.nf.test
@@ -19,7 +19,7 @@ nextflow_pipeline {
             assertAll(
                 { assert workflow.success },
                 { assert snapshot(UTILS.removeNextflowVersion("$outputDir")).match("software_versions") },
-                { assert workflow.trace.succeeded().size() == 79 },
+                { assert workflow.trace.succeeded().size() == 82 },
 
                 { assert snapshot(
                     path("$outputDir/mirna_quant/bam/mature/sample2_mature.sorted.idxstats"),
diff --git a/tests/test_skipfastp.nf.test b/tests/test_skipfastp.nf.test
index eb4a0456..4537aabf 100644
--- a/tests/test_skipfastp.nf.test
+++ b/tests/test_skipfastp.nf.test
@@ -19,7 +19,7 @@ nextflow_pipeline {
             assertAll(
                 { assert workflow.success },
                 { assert snapshot(UTILS.removeNextflowVersion("$outputDir")).match("software_versions") },
-                { assert workflow.trace.succeeded().size() == 64 },
+                { assert workflow.trace.succeeded().size() == 66 },
 
                 { assert snapshot(
                     path("$outputDir/mirna_quant/mirtop/joined_samples_mirtop.tsv").exists(),
diff --git a/tests/test_umi.nf.test b/tests/test_umi.nf.test
index e2c4cff5..b7948758 100644
--- a/tests/test_umi.nf.test
+++ b/tests/test_umi.nf.test
@@ -19,7 +19,7 @@ nextflow_pipeline {
             assertAll(
                 { assert workflow.success },
                 { assert snapshot(UTILS.removeNextflowVersion("$outputDir")).match("software_versions") },
-                { assert workflow.trace.succeeded().size() == 74 },
+                { assert workflow.trace.succeeded().size() == 76 },
 
                 { assert snapshot(
                     path("$outputDir/mirna_quant/bam/mature/SRX8195118_SRR11631014_mature.sorted.stats"),

From 955961a1962b3b419fd02cf612bd1234ebee2300 Mon Sep 17 00:00:00 2001
From: atrigila <18577080+atrigila@users.noreply.github.com>
Date: Tue, 10 Dec 2024 00:45:14 +0000
Subject: [PATCH 08/10] fix: add tsv instead of csv

---
 modules/local/pivot/wider/main.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/local/pivot/wider/main.nf b/modules/local/pivot/wider/main.nf
index 3b2a7333..62b0e9e3 100644
--- a/modules/local/pivot/wider/main.nf
+++ b/modules/local/pivot/wider/main.nf
@@ -20,7 +20,7 @@ process PIVOT_WIDER {
         --input final_long_results_temp.csv \\
         --output ${meta.id}_concatenated_temp.csv
 
-    sort -t\$'\t' -k1,1 ${meta.id}_concatenated_temp.csv > joined_samples_mirtop.csv
+    sort -t\$'\t' -k1,1 ${meta.id}_concatenated_temp.csv > joined_samples_mirtop.tsv
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":

From 6e67071675a4a5de8bcdd9b27991e5ef6e5cadb9 Mon Sep 17 00:00:00 2001
From: atrigila <18577080+atrigila@users.noreply.github.com>
Date: Tue, 10 Dec 2024 00:45:23 +0000
Subject: [PATCH 09/10] test: update tests snapshots

---
 .../test_contamination_tech_reps.nf.test.snap | 20 +++++++++----------
 tests/test_mirgenedb.nf.test.snap             |  6 +++---
 tests/test_nextflex.nf.test.snap              |  8 ++++----
 tests/test_skipfastp.nf.test.snap             |  8 ++++----
 tests/test_umi.nf.test.snap                   |  8 ++++----
 5 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/tests/test_contamination_tech_reps.nf.test.snap b/tests/test_contamination_tech_reps.nf.test.snap
index 106bc8ed..01dfb805 100644
--- a/tests/test_contamination_tech_reps.nf.test.snap
+++ b/tests/test_contamination_tech_reps.nf.test.snap
@@ -34,13 +34,13 @@
     },
     "software_versions": {
         "content": [
-            "{BLAT_CDNA={blat=36}, BLAT_NCRNA={blat=36}, BOWTIE2_ALIGN_CDNA={bowtie2=2.5.2, samtools=1.18, pigz=2.6}, BOWTIE2_ALIGN_NCRNA={bowtie2=2.5.2, samtools=1.18, pigz=2.6}, BOWTIE2_ALIGN_TRNA={bowtie2=2.5.2, samtools=1.18, pigz=2.6}, BOWTIE_MAP_HAIRPIN={bowtie=1.3.0, samtools=1.16.1}, BOWTIE_MAP_MATURE={bowtie=1.3.0, samtools=1.16.1}, BOWTIE_MAP_SEQCLUSTER={bowtie=1.3.0, samtools=1.16.1}, CAT_FASTQ={cat=8.3}, CSVTK_JOIN={csvtk=0.30.0}, DATATABLE_MERGE={r-base=3.6.2}, FASTP={fastp=0.23.4}, FILTER_STATS={BusyBox=1.32.1}, FORMAT_HAIRPIN={fastx_toolkit=0.0.14}, FORMAT_MATURE={fastx_toolkit=0.0.14}, GAWK_CDNA={gawk=5.3.0}, GAWK_NCRNA={gawk=5.3.0}, INDEX_CDNA={bowtie2=2.5.2}, INDEX_HAIRPIN={bowtie=1.3.0}, INDEX_MATURE={bowtie=1.3.0}, INDEX_NCRNA={bowtie2=2.5.2}, INDEX_TRNA={bowtie2=2.5.2}, MIRTOP_COUNTS={mirtop=0.4.28}, MIRTOP_EXPORT={mirtop=0.4.28}, MIRTOP_GFF={mirtop=0.4.28}, MIRTOP_STATS={mirtop=0.4.28}, MIRTRACE_QC={mirtrace=1.0.1}, PARSE_HAIRPIN={seqkit=2.6.1}, PARSE_MATURE={seqkit=2.6.1}, SAMTOOLS_FLAGSTAT={samtools=1.21}, SAMTOOLS_IDXSTATS={samtools=1.21}, SAMTOOLS_INDEX={samtools=1.21}, SAMTOOLS_SORT={samtools=1.21}, SAMTOOLS_STATS={samtools=1.21}, SEQCLUSTER_COLLAPSE={seqcluster=1.2.9}, SEQKIT_GREP_CDNA={seqkit=2.8.0}, SEQKIT_GREP_NCRNA={seqkit=2.8.0}, STATS_GAWK_CDNA={gawk=5.3.0}, STATS_GAWK_NCRNA={gawk=5.3.0}, STATS_GAWK_TRNA={gawk=5.3.0}, Workflow={nf-core/smrnaseq=v2.4.0}}"
+            "{BLAT_CDNA={blat=36}, BLAT_NCRNA={blat=36}, BOWTIE2_ALIGN_CDNA={bowtie2=2.5.2, samtools=1.18, pigz=2.6}, BOWTIE2_ALIGN_NCRNA={bowtie2=2.5.2, samtools=1.18, pigz=2.6}, BOWTIE2_ALIGN_TRNA={bowtie2=2.5.2, samtools=1.18, pigz=2.6}, BOWTIE_MAP_HAIRPIN={bowtie=1.3.0, samtools=1.16.1}, BOWTIE_MAP_MATURE={bowtie=1.3.0, samtools=1.16.1}, BOWTIE_MAP_SEQCLUSTER={bowtie=1.3.0, samtools=1.16.1}, CAT_FASTQ={cat=8.3}, DATATABLE_MERGE={r-base=3.6.2}, FASTP={fastp=0.23.4}, FILTER_STATS={BusyBox=1.32.1}, FORMAT_HAIRPIN={fastx_toolkit=0.0.14}, FORMAT_MATURE={fastx_toolkit=0.0.14}, GAWK_CDNA={gawk=5.3.0}, GAWK_NCRNA={gawk=5.3.0}, INDEX_CDNA={bowtie2=2.5.2}, INDEX_HAIRPIN={bowtie=1.3.0}, INDEX_MATURE={bowtie=1.3.0}, INDEX_NCRNA={bowtie2=2.5.2}, INDEX_TRNA={bowtie2=2.5.2}, MIRTOP_COUNTS={mirtop=0.4.28}, MIRTOP_EXPORT={mirtop=0.4.28}, MIRTOP_GFF={mirtop=0.4.28}, MIRTOP_STATS={mirtop=0.4.28}, MIRTRACE_QC={mirtrace=1.0.1}, PARSE_HAIRPIN={seqkit=2.6.1}, PARSE_MATURE={seqkit=2.6.1}, PIVOT_LONGER={r-base=4.4.2, tidyr=1.3.1, optparse=1.7.5}, PIVOT_WIDER={r-base=4.4.2, tidyr=1.3.1, dplyr=1.1.4, optparse=1.7.5, vroom=1.6.5}, SAMTOOLS_FLAGSTAT={samtools=1.21}, SAMTOOLS_IDXSTATS={samtools=1.21}, SAMTOOLS_INDEX={samtools=1.21}, SAMTOOLS_SORT={samtools=1.21}, SAMTOOLS_STATS={samtools=1.21}, SEQCLUSTER_COLLAPSE={seqcluster=1.2.9}, SEQKIT_GREP_CDNA={seqkit=2.8.0}, SEQKIT_GREP_NCRNA={seqkit=2.8.0}, STATS_GAWK_CDNA={gawk=5.3.0}, STATS_GAWK_NCRNA={gawk=5.3.0}, STATS_GAWK_TRNA={gawk=5.3.0}, Workflow={nf-core/smrnaseq=v2.4.0}}"
         ],
         "meta": {
             "nf-test": "0.9.0",
-            "nextflow": "24.04.4"
+            "nextflow": "24.10.2"
         },
-        "timestamp": "2024-10-08T23:16:26.853242481"
+        "timestamp": "2024-12-10T00:29:32.052341"
     },
     "mirna_quant_bam": {
         "content": [
@@ -65,9 +65,9 @@
         ],
         "meta": {
             "nf-test": "0.9.0",
-            "nextflow": "24.04.4"
+            "nextflow": "24.10.2"
         },
-        "timestamp": "2024-10-01T20:06:04.974546479"
+        "timestamp": "2024-12-10T00:29:32.116301175"
     },
     "mirna_quant_edger_qc": {
         "content": [
@@ -90,9 +90,9 @@
         ],
         "meta": {
             "nf-test": "0.9.0",
-            "nextflow": "24.04.4"
+            "nextflow": "24.10.2"
         },
-        "timestamp": "2024-10-01T20:06:05.025175321"
+        "timestamp": "2024-12-10T00:29:32.164075991"
     },
     "contaminant_filter_filter": {
         "content": [
@@ -113,8 +113,8 @@
         ],
         "meta": {
             "nf-test": "0.9.0",
-            "nextflow": "24.04.4"
+            "nextflow": "24.10.2"
         },
-        "timestamp": "2024-10-01T20:06:05.070939602"
+        "timestamp": "2024-12-10T00:29:32.208602197"
     }
-}
+}
\ No newline at end of file
diff --git a/tests/test_mirgenedb.nf.test.snap b/tests/test_mirgenedb.nf.test.snap
index 9ed11a97..f8da0a9d 100644
--- a/tests/test_mirgenedb.nf.test.snap
+++ b/tests/test_mirgenedb.nf.test.snap
@@ -19,13 +19,13 @@
     },
     "software_versions": {
         "content": [
-            "{BOWTIE_MAP_GENOME={bowtie=1.3.0, samtools=1.16.1}, BOWTIE_MAP_HAIRPIN={bowtie=1.3.0, samtools=1.16.1}, BOWTIE_MAP_MATURE={bowtie=1.3.0, samtools=1.16.1}, BOWTIE_MAP_SEQCLUSTER={bowtie=1.3.0, samtools=1.16.1}, CSVTK_JOIN={csvtk=0.30.0}, DATATABLE_MERGE={r-base=3.6.2}, FASTP={fastp=0.23.4}, FASTQC_RAW={fastqc=0.12.1}, FASTQC_TRIM={fastqc=0.12.1}, FORMAT_HAIRPIN={fastx_toolkit=0.0.14}, FORMAT_MATURE={fastx_toolkit=0.0.14}, INDEX_HAIRPIN={bowtie=1.3.0}, INDEX_MATURE={bowtie=1.3.0}, MIRDEEP2_MAPPER={mirdeep2=2.0.1}, MIRDEEP2_MIRDEEP2={mirdeep2=2.0.1}, MIRTOP_COUNTS={mirtop=0.4.28}, MIRTOP_EXPORT={mirtop=0.4.28}, MIRTOP_GFF={mirtop=0.4.28}, MIRTOP_STATS={mirtop=0.4.28}, PARSE_HAIRPIN={seqkit=2.6.1}, PARSE_MATURE={seqkit=2.6.1}, SAMTOOLS_FLAGSTAT={samtools=1.21}, SAMTOOLS_IDXSTATS={samtools=1.21}, SAMTOOLS_INDEX={samtools=1.21}, SAMTOOLS_SORT={samtools=1.21}, SAMTOOLS_STATS={samtools=1.21}, SEQCLUSTER_COLLAPSE={seqcluster=1.2.9}, SEQKIT_FQ2FA={seqkit=2.8.0}, SEQKIT_REPLACE={seqkit=2.8.0}, Workflow={nf-core/smrnaseq=v2.4.0}}"
+            "{BOWTIE_MAP_GENOME={bowtie=1.3.0, samtools=1.16.1}, BOWTIE_MAP_HAIRPIN={bowtie=1.3.0, samtools=1.16.1}, BOWTIE_MAP_MATURE={bowtie=1.3.0, samtools=1.16.1}, BOWTIE_MAP_SEQCLUSTER={bowtie=1.3.0, samtools=1.16.1}, DATATABLE_MERGE={r-base=3.6.2}, FASTP={fastp=0.23.4}, FASTQC_RAW={fastqc=0.12.1}, FASTQC_TRIM={fastqc=0.12.1}, FORMAT_HAIRPIN={fastx_toolkit=0.0.14}, FORMAT_MATURE={fastx_toolkit=0.0.14}, INDEX_HAIRPIN={bowtie=1.3.0}, INDEX_MATURE={bowtie=1.3.0}, MIRDEEP2_MAPPER={mirdeep2=2.0.1}, MIRDEEP2_MIRDEEP2={mirdeep2=2.0.1}, MIRTOP_COUNTS={mirtop=0.4.28}, MIRTOP_EXPORT={mirtop=0.4.28}, MIRTOP_GFF={mirtop=0.4.28}, MIRTOP_STATS={mirtop=0.4.28}, PARSE_HAIRPIN={seqkit=2.6.1}, PARSE_MATURE={seqkit=2.6.1}, PIVOT_LONGER={r-base=4.4.2, tidyr=1.3.1, optparse=1.7.5}, PIVOT_WIDER={r-base=4.4.2, tidyr=1.3.1, dplyr=1.1.4, optparse=1.7.5, vroom=1.6.5}, SAMTOOLS_FLAGSTAT={samtools=1.21}, SAMTOOLS_IDXSTATS={samtools=1.21}, SAMTOOLS_INDEX={samtools=1.21}, SAMTOOLS_SORT={samtools=1.21}, SAMTOOLS_STATS={samtools=1.21}, SEQCLUSTER_COLLAPSE={seqcluster=1.2.9}, SEQKIT_FQ2FA={seqkit=2.8.0}, SEQKIT_REPLACE={seqkit=2.8.0}, Workflow={nf-core/smrnaseq=v2.4.0}}"
         ],
         "meta": {
             "nf-test": "0.9.0",
-            "nextflow": "24.10.0"
+            "nextflow": "24.10.2"
         },
-        "timestamp": "2024-11-11T13:44:14.583324793"
+        "timestamp": "2024-12-10T00:35:18.448206326"
     },
     "mirna_quant_bam": {
         "content": [
diff --git a/tests/test_nextflex.nf.test.snap b/tests/test_nextflex.nf.test.snap
index dfc54c7f..c4a3209b 100644
--- a/tests/test_nextflex.nf.test.snap
+++ b/tests/test_nextflex.nf.test.snap
@@ -34,13 +34,13 @@
     },
     "software_versions": {
         "content": [
-            "{BOWTIE_MAP_HAIRPIN={bowtie=1.3.0, samtools=1.16.1}, BOWTIE_MAP_MATURE={bowtie=1.3.0, samtools=1.16.1}, BOWTIE_MAP_SEQCLUSTER={bowtie=1.3.0, samtools=1.16.1}, CSVTK_JOIN={csvtk=0.30.0}, DATATABLE_MERGE={r-base=3.6.2}, FASTP={fastp=0.23.4}, FASTQC_RAW={fastqc=0.12.1}, FASTQC_TRIM={fastqc=0.12.1}, FORMAT_HAIRPIN={fastx_toolkit=0.0.14}, FORMAT_MATURE={fastx_toolkit=0.0.14}, INDEX_HAIRPIN={bowtie=1.3.0}, INDEX_MATURE={bowtie=1.3.0}, MIRTOP_COUNTS={mirtop=0.4.28}, MIRTOP_EXPORT={mirtop=0.4.28}, MIRTOP_GFF={mirtop=0.4.28}, MIRTOP_STATS={mirtop=0.4.28}, MIRTRACE_QC={mirtrace=1.0.1}, PARSE_HAIRPIN={seqkit=2.6.1}, PARSE_MATURE={seqkit=2.6.1}, SAMTOOLS_FLAGSTAT={samtools=1.21}, SAMTOOLS_IDXSTATS={samtools=1.21}, SAMTOOLS_INDEX={samtools=1.21}, SAMTOOLS_SORT={samtools=1.21}, SAMTOOLS_STATS={samtools=1.21}, SEQCLUSTER_COLLAPSE={seqcluster=1.2.9}, Workflow={nf-core/smrnaseq=v2.4.0}}"
+            "{BOWTIE_MAP_HAIRPIN={bowtie=1.3.0, samtools=1.16.1}, BOWTIE_MAP_MATURE={bowtie=1.3.0, samtools=1.16.1}, BOWTIE_MAP_SEQCLUSTER={bowtie=1.3.0, samtools=1.16.1}, DATATABLE_MERGE={r-base=3.6.2}, FASTP={fastp=0.23.4}, FASTQC_RAW={fastqc=0.12.1}, FASTQC_TRIM={fastqc=0.12.1}, FORMAT_HAIRPIN={fastx_toolkit=0.0.14}, FORMAT_MATURE={fastx_toolkit=0.0.14}, INDEX_HAIRPIN={bowtie=1.3.0}, INDEX_MATURE={bowtie=1.3.0}, MIRTOP_COUNTS={mirtop=0.4.28}, MIRTOP_EXPORT={mirtop=0.4.28}, MIRTOP_GFF={mirtop=0.4.28}, MIRTOP_STATS={mirtop=0.4.28}, MIRTRACE_QC={mirtrace=1.0.1}, PARSE_HAIRPIN={seqkit=2.6.1}, PARSE_MATURE={seqkit=2.6.1}, PIVOT_LONGER={r-base=4.4.2, tidyr=1.3.1, optparse=1.7.5}, PIVOT_WIDER={r-base=4.4.2, tidyr=1.3.1, dplyr=1.1.4, optparse=1.7.5, vroom=1.6.5}, SAMTOOLS_FLAGSTAT={samtools=1.21}, SAMTOOLS_IDXSTATS={samtools=1.21}, SAMTOOLS_INDEX={samtools=1.21}, SAMTOOLS_SORT={samtools=1.21}, SAMTOOLS_STATS={samtools=1.21}, SEQCLUSTER_COLLAPSE={seqcluster=1.2.9}, Workflow={nf-core/smrnaseq=v2.4.0}}"
         ],
         "meta": {
             "nf-test": "0.9.0",
-            "nextflow": "24.04.4"
+            "nextflow": "24.10.2"
         },
-        "timestamp": "2024-10-08T23:25:57.880948228"
+        "timestamp": "2024-12-10T00:37:47.333537716"
     },
     "mirna_quant_bam": {
         "content": [
@@ -142,4 +142,4 @@
         },
         "timestamp": "2024-09-20T17:11:24.369706104"
     }
-}
+}
\ No newline at end of file
diff --git a/tests/test_skipfastp.nf.test.snap b/tests/test_skipfastp.nf.test.snap
index 2352aaf1..56d83a64 100644
--- a/tests/test_skipfastp.nf.test.snap
+++ b/tests/test_skipfastp.nf.test.snap
@@ -41,13 +41,13 @@
     },
     "software_versions": {
         "content": [
-            "{BOWTIE_MAP_GENOME={bowtie=1.3.0, samtools=1.16.1}, BOWTIE_MAP_HAIRPIN={bowtie=1.3.0, samtools=1.16.1}, BOWTIE_MAP_MATURE={bowtie=1.3.0, samtools=1.16.1}, BOWTIE_MAP_SEQCLUSTER={bowtie=1.3.0, samtools=1.16.1}, CSVTK_JOIN={csvtk=0.30.0}, DATATABLE_MERGE={r-base=3.6.2}, FASTQC_RAW={fastqc=0.12.1}, FORMAT_HAIRPIN={fastx_toolkit=0.0.14}, FORMAT_MATURE={fastx_toolkit=0.0.14}, INDEX_HAIRPIN={bowtie=1.3.0}, INDEX_MATURE={bowtie=1.3.0}, MIRTOP_COUNTS={mirtop=0.4.28}, MIRTOP_EXPORT={mirtop=0.4.28}, MIRTOP_GFF={mirtop=0.4.28}, MIRTOP_STATS={mirtop=0.4.28}, MIRTRACE_QC={mirtrace=1.0.1}, PARSE_HAIRPIN={seqkit=2.6.1}, PARSE_MATURE={seqkit=2.6.1}, SAMTOOLS_FLAGSTAT={samtools=1.21}, SAMTOOLS_IDXSTATS={samtools=1.21}, SAMTOOLS_INDEX={samtools=1.21}, SAMTOOLS_SORT={samtools=1.21}, SAMTOOLS_STATS={samtools=1.21}, SEQCLUSTER_COLLAPSE={seqcluster=1.2.9}, Workflow={nf-core/smrnaseq=v2.4.0}}"
+            "{BOWTIE_MAP_GENOME={bowtie=1.3.0, samtools=1.16.1}, BOWTIE_MAP_HAIRPIN={bowtie=1.3.0, samtools=1.16.1}, BOWTIE_MAP_MATURE={bowtie=1.3.0, samtools=1.16.1}, BOWTIE_MAP_SEQCLUSTER={bowtie=1.3.0, samtools=1.16.1}, DATATABLE_MERGE={r-base=3.6.2}, FASTQC_RAW={fastqc=0.12.1}, FORMAT_HAIRPIN={fastx_toolkit=0.0.14}, FORMAT_MATURE={fastx_toolkit=0.0.14}, INDEX_HAIRPIN={bowtie=1.3.0}, INDEX_MATURE={bowtie=1.3.0}, MIRTOP_COUNTS={mirtop=0.4.28}, MIRTOP_EXPORT={mirtop=0.4.28}, MIRTOP_GFF={mirtop=0.4.28}, MIRTOP_STATS={mirtop=0.4.28}, MIRTRACE_QC={mirtrace=1.0.1}, PARSE_HAIRPIN={seqkit=2.6.1}, PARSE_MATURE={seqkit=2.6.1}, PIVOT_LONGER={r-base=4.4.2, tidyr=1.3.1, optparse=1.7.5}, PIVOT_WIDER={r-base=4.4.2, tidyr=1.3.1, dplyr=1.1.4, optparse=1.7.5, vroom=1.6.5}, SAMTOOLS_FLAGSTAT={samtools=1.21}, SAMTOOLS_IDXSTATS={samtools=1.21}, SAMTOOLS_INDEX={samtools=1.21}, SAMTOOLS_SORT={samtools=1.21}, SAMTOOLS_STATS={samtools=1.21}, SEQCLUSTER_COLLAPSE={seqcluster=1.2.9}, Workflow={nf-core/smrnaseq=v2.4.0}}"
         ],
         "meta": {
             "nf-test": "0.9.0",
-            "nextflow": "24.04.4"
+            "nextflow": "24.10.2"
         },
-        "timestamp": "2024-10-08T23:28:49.241105443"
+        "timestamp": "2024-12-10T00:40:10.829696529"
     },
     "mirna_quant_bam": {
         "content": [
@@ -142,4 +142,4 @@
         },
         "timestamp": "2024-10-01T20:19:25.557700049"
     }
-}
+}
\ No newline at end of file
diff --git a/tests/test_umi.nf.test.snap b/tests/test_umi.nf.test.snap
index fb0b6d09..bf9933a9 100644
--- a/tests/test_umi.nf.test.snap
+++ b/tests/test_umi.nf.test.snap
@@ -41,13 +41,13 @@
     },
     "software_versions": {
         "content": [
-            "{BOWTIE_MAP_GENOME={bowtie=1.3.0, samtools=1.16.1}, BOWTIE_MAP_HAIRPIN={bowtie=1.3.0, samtools=1.16.1}, BOWTIE_MAP_MATURE={bowtie=1.3.0, samtools=1.16.1}, BOWTIE_MAP_SEQCLUSTER={bowtie=1.3.0, samtools=1.16.1}, CSVTK_JOIN={csvtk=0.30.0}, DATATABLE_MERGE={r-base=3.6.2}, FASTP={fastp=0.23.4}, FASTP_LENGTH_FILTER={fastp=0.23.4}, FASTQC_RAW={fastqc=0.12.1}, FASTQC_TRIM={fastqc=0.12.1}, FORMAT_HAIRPIN={fastx_toolkit=0.0.14}, FORMAT_MATURE={fastx_toolkit=0.0.14}, INDEX_HAIRPIN={bowtie=1.3.0}, INDEX_MATURE={bowtie=1.3.0}, MIRTOP_COUNTS={mirtop=0.4.28}, MIRTOP_EXPORT={mirtop=0.4.28}, MIRTOP_GFF={mirtop=0.4.28}, MIRTOP_STATS={mirtop=0.4.28}, MIRTRACE_QC={mirtrace=1.0.1}, PARSE_HAIRPIN={seqkit=2.6.1}, PARSE_MATURE={seqkit=2.6.1}, SAMTOOLS_FLAGSTAT={samtools=1.21}, SAMTOOLS_IDXSTATS={samtools=1.21}, SAMTOOLS_INDEX={samtools=1.21}, SAMTOOLS_SORT={samtools=1.21}, SAMTOOLS_STATS={samtools=1.21}, SEQCLUSTER_COLLAPSE={seqcluster=1.2.9}, UMICOLLAPSE_FASTQ={umicollapse=1.0.0-1}, Workflow={nf-core/smrnaseq=v2.4.0}}"
+            "{BOWTIE_MAP_GENOME={bowtie=1.3.0, samtools=1.16.1}, BOWTIE_MAP_HAIRPIN={bowtie=1.3.0, samtools=1.16.1}, BOWTIE_MAP_MATURE={bowtie=1.3.0, samtools=1.16.1}, BOWTIE_MAP_SEQCLUSTER={bowtie=1.3.0, samtools=1.16.1}, DATATABLE_MERGE={r-base=3.6.2}, FASTP={fastp=0.23.4}, FASTP_LENGTH_FILTER={fastp=0.23.4}, FASTQC_RAW={fastqc=0.12.1}, FASTQC_TRIM={fastqc=0.12.1}, FORMAT_HAIRPIN={fastx_toolkit=0.0.14}, FORMAT_MATURE={fastx_toolkit=0.0.14}, INDEX_HAIRPIN={bowtie=1.3.0}, INDEX_MATURE={bowtie=1.3.0}, MIRTOP_COUNTS={mirtop=0.4.28}, MIRTOP_EXPORT={mirtop=0.4.28}, MIRTOP_GFF={mirtop=0.4.28}, MIRTOP_STATS={mirtop=0.4.28}, MIRTRACE_QC={mirtrace=1.0.1}, PARSE_HAIRPIN={seqkit=2.6.1}, PARSE_MATURE={seqkit=2.6.1}, PIVOT_LONGER={r-base=4.4.2, tidyr=1.3.1, optparse=1.7.5}, PIVOT_WIDER={r-base=4.4.2, tidyr=1.3.1, dplyr=1.1.4, optparse=1.7.5, vroom=1.6.5}, SAMTOOLS_FLAGSTAT={samtools=1.21}, SAMTOOLS_IDXSTATS={samtools=1.21}, SAMTOOLS_INDEX={samtools=1.21}, SAMTOOLS_SORT={samtools=1.21}, SAMTOOLS_STATS={samtools=1.21}, SEQCLUSTER_COLLAPSE={seqcluster=1.2.9}, UMICOLLAPSE_FASTQ={umicollapse=1.0.0-1}, Workflow={nf-core/smrnaseq=v2.4.0}}"
         ],
         "meta": {
             "nf-test": "0.9.0",
-            "nextflow": "24.04.4"
+            "nextflow": "24.10.2"
         },
-        "timestamp": "2024-10-08T23:34:54.715037951"
+        "timestamp": "2024-12-10T00:44:45.433524507"
     },
     "mirna_quant_bam": {
         "content": [
@@ -160,4 +160,4 @@
         },
         "timestamp": "2024-09-20T19:12:28.290360163"
     }
-}
+}
\ No newline at end of file

From eab8687a53582e44646234f2c7411cde0b0e8472 Mon Sep 17 00:00:00 2001
From: atrigila <18577080+atrigila@users.noreply.github.com>
Date: Thu, 12 Dec 2024 17:22:59 +0000
Subject: [PATCH 10/10] docs: update changelog

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index f25e7a40..78517362 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 - [[#481]](https://github.com/nf-core/smrnaseq/pull/481) - Fix [MIRTOP_STATS IndexError](https://github.com/nf-core/smrnaseq/issues/477) - Fix mirtop process execution when mirgenedb is used.
 - [[#482]](https://github.com/nf-core/smrnaseq/pull/482) - Update documentation regarding MirgeneDB input files.
+- [[#486]](https://github.com/nf-core/smrnaseq/pull/486) - Replace `CSVTK_JOIN` to improve processing in large amount of files.
 
 ## v2.4.0 - 2024-10-14 - Navy Iron Boxer