From 4106d8e11ef11fe49f029bf4a1a60ef9bc3bab4d Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Fri, 6 Dec 2024 20:18:52 +0000 Subject: [PATCH 01/10] refactor: add pivot functions --- bin/pivot_longer.R | 29 ++++++++++++++++++ bin/pivot_wider.R | 32 ++++++++++++++++++++ modules/local/pivot/longer/environment.yml | 5 ++++ modules/local/pivot/longer/main.nf | 29 ++++++++++++++++++ modules/local/pivot/wider/environment.yml | 5 ++++ modules/local/pivot/wider/main.nf | 35 ++++++++++++++++++++++ 6 files changed, 135 insertions(+) create mode 100755 bin/pivot_longer.R create mode 100755 bin/pivot_wider.R create mode 100644 modules/local/pivot/longer/environment.yml create mode 100644 modules/local/pivot/longer/main.nf create mode 100644 modules/local/pivot/wider/environment.yml create mode 100644 modules/local/pivot/wider/main.nf diff --git a/bin/pivot_longer.R b/bin/pivot_longer.R new file mode 100755 index 00000000..7b84fa4d --- /dev/null +++ b/bin/pivot_longer.R @@ -0,0 +1,29 @@ +#!/usr/bin/env Rscript + +library(optparse) +library(tidyr) +library(vroom) + +option_list <- list( + make_option(c("--input"), type = "character", help = "Input TSV file", metavar = "character"), + make_option(c("--output"), type = "character", help = "Output CSV file", metavar = "character") +) + +opt_parser <- OptionParser(option_list = option_list) +opt <- parse_args(opt_parser) + +# Read CSV with vroom +data <- vroom::vroom(opt$input, delim = "\t", col_types = c(.default = "c")) + +last_col <- names(data)[ncol(data)] + +# Convert from wide to long format +long_data <- data %>% + pivot_longer( + cols = last_col, + names_to = "Sample_ID", + values_to = "Counts" + ) + +vroom_write(long_data, opt$output, delim = ",") + diff --git a/bin/pivot_wider.R b/bin/pivot_wider.R new file mode 100755 index 00000000..a61c37be --- /dev/null +++ b/bin/pivot_wider.R @@ -0,0 +1,32 @@ +#!/usr/bin/env Rscript + +library(optparse) +library(tidyr) +library(vroom) +library(dplyr) + +option_list <- list( + make_option(c("--input"), type = "character", help = "Input CSV file in long format", metavar = "character"), + make_option(c("--output"), type = "character", help = "Output CSV file in wide format", metavar = "character") +) + +opt_parser <- OptionParser(option_list = option_list) +opt <- parse_args(opt_parser) + +# Read CSV with vroom +long_data <- vroom::vroom(opt$input, delim = ",", + col_types = c( + Counts = "d", + .default = "c" + )) + +# Transform to wide format +wide_data <- long_data %>% + pivot_wider( + names_from = Sample_ID, + values_from = Counts, + values_fill = 0 + ) + +# Export wide format +vroom_write(wide_data, opt$output, delim = "\t") diff --git a/modules/local/pivot/longer/environment.yml b/modules/local/pivot/longer/environment.yml new file mode 100644 index 00000000..379e91b5 --- /dev/null +++ b/modules/local/pivot/longer/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge +dependencies: + - conda-forge::r-optparse + - conda-forge::r-tidyverse diff --git a/modules/local/pivot/longer/main.nf b/modules/local/pivot/longer/main.nf new file mode 100644 index 00000000..c4d8a14a --- /dev/null +++ b/modules/local/pivot/longer/main.nf @@ -0,0 +1,29 @@ +process PIVOT_LONGER { + tag"$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "community.wave.seqera.io/library/r-optparse_r-tidyverse_r-vroom:3cbb224fea84a0e1" + + input: + tuple val(meta), path(tsv) + + output: + tuple val(meta), path("*_long.csv") , emit: csv + path "versions.yml" , emit: versions + + script: + """ + pivot_longer.R \\ + --input ${tsv} \\ + --output ${meta.id}_long.csv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + r-base: \$(echo \$(R --version 2>&1) | sed 's/^.*R version //; s/ .*\$//') + tidyr: \$(Rscript -e "library(limma); cat(as.character(packageVersion('tidyr')))") + optparse: \$(Rscript -e "library(edgeR); cat(as.character(packageVersion('optparse')))") + END_VERSIONS + """ + +} diff --git a/modules/local/pivot/wider/environment.yml b/modules/local/pivot/wider/environment.yml new file mode 100644 index 00000000..379e91b5 --- /dev/null +++ b/modules/local/pivot/wider/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge +dependencies: + - conda-forge::r-optparse + - conda-forge::r-tidyverse diff --git a/modules/local/pivot/wider/main.nf b/modules/local/pivot/wider/main.nf new file mode 100644 index 00000000..b693eabb --- /dev/null +++ b/modules/local/pivot/wider/main.nf @@ -0,0 +1,35 @@ +process PIVOT_WIDER { + tag"$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "community.wave.seqera.io/library/r-optparse_r-tidyverse_r-vroom:3cbb224fea84a0e1" + + input: + tuple val(meta), path(csvs) + + output: + tuple val(meta), path("*joined_samples_mirtop.csv") , emit: csv + path "versions.yml" , emit: versions + + script: + """ + awk 'NR == 1 || FNR > 1' ${csvs.join(' ')} > final_long_results_temp.csv + + pivot_wider.R \\ + --input final_long_results_temp.csv \\ + --output ${meta.id}_concatenated_temp.csv + + sort -t\$'\t' -k1,1 ${meta.id}_concatenated_temp.csv > joined_samples_mirtop.csv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + r-base: \$(echo \$(R --version 2>&1) | sed 's/^.*R version //; s/ .*\$//') + tidyr: \$(Rscript -e "library(limma); cat(as.character(packageVersion('tidyr')))") + dplyr: \$(Rscript -e "library(limma); cat(as.character(packageVersion('dplyr')))") + optparse: \$(Rscript -e "library(edgeR); cat(as.character(packageVersion('optparse')))") + vroom: \$(Rscript -e "library(edgeR); cat(as.character(packageVersion('vroom')))") + END_VERSIONS + """ + +} From e093df4b926a40c845a826cd0d895a417d075534 Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Fri, 6 Dec 2024 20:20:55 +0000 Subject: [PATCH 02/10] refactor: add more resources to wider process --- modules/local/pivot/wider/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/pivot/wider/main.nf b/modules/local/pivot/wider/main.nf index b693eabb..46aa6a32 100644 --- a/modules/local/pivot/wider/main.nf +++ b/modules/local/pivot/wider/main.nf @@ -1,6 +1,6 @@ process PIVOT_WIDER { tag"$meta.id" - label 'process_single' + label 'process_high' conda "${moduleDir}/environment.yml" container "community.wave.seqera.io/library/r-optparse_r-tidyverse_r-vroom:3cbb224fea84a0e1" From 32b34a16c98f158d5f846a2f1b7f49fef36b3823 Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Fri, 6 Dec 2024 20:33:53 +0000 Subject: [PATCH 03/10] refactor: add config to pivot modules --- conf/modules.config | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 2abe9be6..8c99520d 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -468,9 +468,11 @@ process { ] } - withName: 'NFCORE_SMRNASEQ:MIRNA_QUANT:CSVTK_JOIN' { - ext.args = "--fields 'UID,Read,miRNA,Variant,iso_5p,iso_3p,iso_add3p,iso_snp,iso_5p_nt,iso_3p_nt,iso_add3p_nt,iso_snp_nt' --tabs --outer-join --na \"0\" --out-delimiter \"\t\"" - ext.prefix = "joined_samples_mirtop" + withName: 'NFCORE_SMRNASEQ:MIRNA_QUANT:PIVOT_LONGER' { + publishDir = [ enabled: false ] + } + + withName: 'NFCORE_SMRNASEQ:MIRNA_QUANT:PIVOT_WIDER' { publishDir = [ path: { "${params.outdir}/mirna_quant/mirtop" }, mode: params.publish_dir_mode, From aa8f24b764e321632fb8dd2052a0b402c8b99936 Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Fri, 6 Dec 2024 20:34:55 +0000 Subject: [PATCH 04/10] refactor: add pivot modules to mirna_quant --- subworkflows/local/mirna_quant.nf | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/subworkflows/local/mirna_quant.nf b/subworkflows/local/mirna_quant.nf index f2157a47..fbdd623c 100644 --- a/subworkflows/local/mirna_quant.nf +++ b/subworkflows/local/mirna_quant.nf @@ -24,6 +24,9 @@ include { EDGER_QC } from '../../modules/local/edger_qc/main' include { BAM_STATS_MIRNA_MIRTOP } from '../../subworkflows/nf-core/bam_stats_mirna_mirtop/main' include { CSVTK_JOIN } from '../../modules/nf-core/csvtk/join/main' +include { PIVOT_LONGER } from '../../modules/local/pivot/longer/main' +include { PIVOT_WIDER } from '../../modules/local/pivot/wider/main' + workflow MIRNA_QUANT { take: ch_reference_mature // channel: [ val(meta), fasta file] @@ -105,10 +108,20 @@ workflow MIRNA_QUANT { .collect{it[1]} .map{it -> return [[id:"TSVs"], it]} - CSVTK_JOIN ( ch_tsvs ) - ch_versions = ch_versions.mix(CSVTK_JOIN.out.versions) + PIVOT_LONGER( BAM_STATS_MIRNA_MIRTOP.out.counts ) + ch_versions = ch_versions.mix(PIVOT_LONGER.out.versions) + + ch_long_files = PIVOT_LONGER.out.csv + .map { meta, file -> file } + .collect() + .map { files -> + return [[id: "pivoted_files"], files] + } + + PIVOT_WIDER( ch_long_files ) + ch_versions = ch_versions.mix(PIVOT_WIDER.out.versions) - DATATABLE_MERGE ( CSVTK_JOIN.out.csv ) + DATATABLE_MERGE ( PIVOT_WIDER.out.csv ) ch_versions = ch_versions.mix(DATATABLE_MERGE.out.versions) ch_reads_genome = BOWTIE_MAP_HAIRPIN.out.fastq From d84f81024a5ce373fafb9f4ffe3bb9f3565349b6 Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Tue, 10 Dec 2024 00:03:05 +0000 Subject: [PATCH 05/10] fix: library names, output extension --- modules/local/pivot/longer/main.nf | 4 ++-- modules/local/pivot/wider/main.nf | 12 ++++++------ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/modules/local/pivot/longer/main.nf b/modules/local/pivot/longer/main.nf index c4d8a14a..05ba879c 100644 --- a/modules/local/pivot/longer/main.nf +++ b/modules/local/pivot/longer/main.nf @@ -21,8 +21,8 @@ process PIVOT_LONGER { cat <<-END_VERSIONS > versions.yml "${task.process}": r-base: \$(echo \$(R --version 2>&1) | sed 's/^.*R version //; s/ .*\$//') - tidyr: \$(Rscript -e "library(limma); cat(as.character(packageVersion('tidyr')))") - optparse: \$(Rscript -e "library(edgeR); cat(as.character(packageVersion('optparse')))") + tidyr: \$(Rscript -e "library(tidyr); cat(as.character(packageVersion('tidyr')))") + optparse: \$(Rscript -e "library(optparse); cat(as.character(packageVersion('optparse')))") END_VERSIONS """ diff --git a/modules/local/pivot/wider/main.nf b/modules/local/pivot/wider/main.nf index 46aa6a32..3b2a7333 100644 --- a/modules/local/pivot/wider/main.nf +++ b/modules/local/pivot/wider/main.nf @@ -9,8 +9,8 @@ process PIVOT_WIDER { tuple val(meta), path(csvs) output: - tuple val(meta), path("*joined_samples_mirtop.csv") , emit: csv - path "versions.yml" , emit: versions + tuple val(meta), path("*joined_samples_mirtop.tsv") , emit: csv + path "versions.yml" , emit: versions script: """ @@ -25,10 +25,10 @@ process PIVOT_WIDER { cat <<-END_VERSIONS > versions.yml "${task.process}": r-base: \$(echo \$(R --version 2>&1) | sed 's/^.*R version //; s/ .*\$//') - tidyr: \$(Rscript -e "library(limma); cat(as.character(packageVersion('tidyr')))") - dplyr: \$(Rscript -e "library(limma); cat(as.character(packageVersion('dplyr')))") - optparse: \$(Rscript -e "library(edgeR); cat(as.character(packageVersion('optparse')))") - vroom: \$(Rscript -e "library(edgeR); cat(as.character(packageVersion('vroom')))") + tidyr: \$(Rscript -e "library(tidyr); cat(as.character(packageVersion('tidyr')))") + dplyr: \$(Rscript -e "library(dplyr); cat(as.character(packageVersion('dplyr')))") + optparse: \$(Rscript -e "library(optparse); cat(as.character(packageVersion('optparse')))") + vroom: \$(Rscript -e "library(vroom); cat(as.character(packageVersion('vroom')))") END_VERSIONS """ From 6c21e4c3ad4b8f665ed47ae711439270a5f75a14 Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Tue, 10 Dec 2024 00:06:41 +0000 Subject: [PATCH 06/10] refactor: rename meta channel --- subworkflows/local/mirna_quant.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/mirna_quant.nf b/subworkflows/local/mirna_quant.nf index fbdd623c..4d5b5192 100644 --- a/subworkflows/local/mirna_quant.nf +++ b/subworkflows/local/mirna_quant.nf @@ -115,7 +115,7 @@ workflow MIRNA_QUANT { .map { meta, file -> file } .collect() .map { files -> - return [[id: "pivoted_files"], files] + return [[id: "Long_Files"], files] } PIVOT_WIDER( ch_long_files ) From 33943b1ab721bf21fa69c34a194f46795354ac0a Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Tue, 10 Dec 2024 00:07:02 +0000 Subject: [PATCH 07/10] test: update tests succeeded size --- tests/test_contamination_tech_reps.nf.test | 2 +- tests/test_mirgenedb.nf.test | 2 +- tests/test_nextflex.nf.test | 2 +- tests/test_skipfastp.nf.test | 2 +- tests/test_umi.nf.test | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/test_contamination_tech_reps.nf.test b/tests/test_contamination_tech_reps.nf.test index 02266078..010514e1 100644 --- a/tests/test_contamination_tech_reps.nf.test +++ b/tests/test_contamination_tech_reps.nf.test @@ -20,7 +20,7 @@ nextflow_pipeline { assertAll( { assert workflow.success }, { assert snapshot(UTILS.removeNextflowVersion("$outputDir")).match("software_versions") }, - { assert workflow.trace.succeeded().size() == 100 }, + { assert workflow.trace.succeeded().size() == 103 }, { assert snapshot( path("$outputDir/contaminant_filter/filter/Clone1_N1_trimmed.contamination_mqc.yaml").exists(), //TODO see if we can make these deterministic or why they are non-deterministic diff --git a/tests/test_mirgenedb.nf.test b/tests/test_mirgenedb.nf.test index 4e08158d..93bb3666 100644 --- a/tests/test_mirgenedb.nf.test +++ b/tests/test_mirgenedb.nf.test @@ -19,7 +19,7 @@ nextflow_pipeline { assertAll( { assert workflow.success }, { assert snapshot(UTILS.removeNextflowVersion("$outputDir")).match("software_versions") }, - { assert workflow.trace.succeeded().size() == 104 }, + { assert workflow.trace.succeeded().size() == 107 }, { assert workflow.trace.failed().size() == 1 }, { assert snapshot( diff --git a/tests/test_nextflex.nf.test b/tests/test_nextflex.nf.test index 4330c2b0..99b03842 100644 --- a/tests/test_nextflex.nf.test +++ b/tests/test_nextflex.nf.test @@ -19,7 +19,7 @@ nextflow_pipeline { assertAll( { assert workflow.success }, { assert snapshot(UTILS.removeNextflowVersion("$outputDir")).match("software_versions") }, - { assert workflow.trace.succeeded().size() == 79 }, + { assert workflow.trace.succeeded().size() == 82 }, { assert snapshot( path("$outputDir/mirna_quant/bam/mature/sample2_mature.sorted.idxstats"), diff --git a/tests/test_skipfastp.nf.test b/tests/test_skipfastp.nf.test index eb4a0456..4537aabf 100644 --- a/tests/test_skipfastp.nf.test +++ b/tests/test_skipfastp.nf.test @@ -19,7 +19,7 @@ nextflow_pipeline { assertAll( { assert workflow.success }, { assert snapshot(UTILS.removeNextflowVersion("$outputDir")).match("software_versions") }, - { assert workflow.trace.succeeded().size() == 64 }, + { assert workflow.trace.succeeded().size() == 66 }, { assert snapshot( path("$outputDir/mirna_quant/mirtop/joined_samples_mirtop.tsv").exists(), diff --git a/tests/test_umi.nf.test b/tests/test_umi.nf.test index e2c4cff5..b7948758 100644 --- a/tests/test_umi.nf.test +++ b/tests/test_umi.nf.test @@ -19,7 +19,7 @@ nextflow_pipeline { assertAll( { assert workflow.success }, { assert snapshot(UTILS.removeNextflowVersion("$outputDir")).match("software_versions") }, - { assert workflow.trace.succeeded().size() == 74 }, + { assert workflow.trace.succeeded().size() == 76 }, { assert snapshot( path("$outputDir/mirna_quant/bam/mature/SRX8195118_SRR11631014_mature.sorted.stats"), From 955961a1962b3b419fd02cf612bd1234ebee2300 Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Tue, 10 Dec 2024 00:45:14 +0000 Subject: [PATCH 08/10] fix: add tsv instead of csv --- modules/local/pivot/wider/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/pivot/wider/main.nf b/modules/local/pivot/wider/main.nf index 3b2a7333..62b0e9e3 100644 --- a/modules/local/pivot/wider/main.nf +++ b/modules/local/pivot/wider/main.nf @@ -20,7 +20,7 @@ process PIVOT_WIDER { --input final_long_results_temp.csv \\ --output ${meta.id}_concatenated_temp.csv - sort -t\$'\t' -k1,1 ${meta.id}_concatenated_temp.csv > joined_samples_mirtop.csv + sort -t\$'\t' -k1,1 ${meta.id}_concatenated_temp.csv > joined_samples_mirtop.tsv cat <<-END_VERSIONS > versions.yml "${task.process}": From 6e67071675a4a5de8bcdd9b27991e5ef6e5cadb9 Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Tue, 10 Dec 2024 00:45:23 +0000 Subject: [PATCH 09/10] test: update tests snapshots --- .../test_contamination_tech_reps.nf.test.snap | 20 +++++++++---------- tests/test_mirgenedb.nf.test.snap | 6 +++--- tests/test_nextflex.nf.test.snap | 8 ++++---- tests/test_skipfastp.nf.test.snap | 8 ++++---- tests/test_umi.nf.test.snap | 8 ++++---- 5 files changed, 25 insertions(+), 25 deletions(-) diff --git a/tests/test_contamination_tech_reps.nf.test.snap b/tests/test_contamination_tech_reps.nf.test.snap index 106bc8ed..01dfb805 100644 --- a/tests/test_contamination_tech_reps.nf.test.snap +++ b/tests/test_contamination_tech_reps.nf.test.snap @@ -34,13 +34,13 @@ }, "software_versions": { "content": [ - "{BLAT_CDNA={blat=36}, BLAT_NCRNA={blat=36}, BOWTIE2_ALIGN_CDNA={bowtie2=2.5.2, samtools=1.18, pigz=2.6}, BOWTIE2_ALIGN_NCRNA={bowtie2=2.5.2, samtools=1.18, pigz=2.6}, BOWTIE2_ALIGN_TRNA={bowtie2=2.5.2, samtools=1.18, pigz=2.6}, BOWTIE_MAP_HAIRPIN={bowtie=1.3.0, samtools=1.16.1}, BOWTIE_MAP_MATURE={bowtie=1.3.0, samtools=1.16.1}, BOWTIE_MAP_SEQCLUSTER={bowtie=1.3.0, samtools=1.16.1}, CAT_FASTQ={cat=8.3}, CSVTK_JOIN={csvtk=0.30.0}, DATATABLE_MERGE={r-base=3.6.2}, FASTP={fastp=0.23.4}, FILTER_STATS={BusyBox=1.32.1}, FORMAT_HAIRPIN={fastx_toolkit=0.0.14}, FORMAT_MATURE={fastx_toolkit=0.0.14}, GAWK_CDNA={gawk=5.3.0}, GAWK_NCRNA={gawk=5.3.0}, INDEX_CDNA={bowtie2=2.5.2}, INDEX_HAIRPIN={bowtie=1.3.0}, INDEX_MATURE={bowtie=1.3.0}, INDEX_NCRNA={bowtie2=2.5.2}, INDEX_TRNA={bowtie2=2.5.2}, MIRTOP_COUNTS={mirtop=0.4.28}, MIRTOP_EXPORT={mirtop=0.4.28}, MIRTOP_GFF={mirtop=0.4.28}, MIRTOP_STATS={mirtop=0.4.28}, MIRTRACE_QC={mirtrace=1.0.1}, PARSE_HAIRPIN={seqkit=2.6.1}, PARSE_MATURE={seqkit=2.6.1}, SAMTOOLS_FLAGSTAT={samtools=1.21}, SAMTOOLS_IDXSTATS={samtools=1.21}, SAMTOOLS_INDEX={samtools=1.21}, SAMTOOLS_SORT={samtools=1.21}, SAMTOOLS_STATS={samtools=1.21}, SEQCLUSTER_COLLAPSE={seqcluster=1.2.9}, SEQKIT_GREP_CDNA={seqkit=2.8.0}, SEQKIT_GREP_NCRNA={seqkit=2.8.0}, STATS_GAWK_CDNA={gawk=5.3.0}, STATS_GAWK_NCRNA={gawk=5.3.0}, STATS_GAWK_TRNA={gawk=5.3.0}, Workflow={nf-core/smrnaseq=v2.4.0}}" + "{BLAT_CDNA={blat=36}, BLAT_NCRNA={blat=36}, BOWTIE2_ALIGN_CDNA={bowtie2=2.5.2, samtools=1.18, pigz=2.6}, BOWTIE2_ALIGN_NCRNA={bowtie2=2.5.2, samtools=1.18, pigz=2.6}, BOWTIE2_ALIGN_TRNA={bowtie2=2.5.2, samtools=1.18, pigz=2.6}, BOWTIE_MAP_HAIRPIN={bowtie=1.3.0, samtools=1.16.1}, BOWTIE_MAP_MATURE={bowtie=1.3.0, samtools=1.16.1}, BOWTIE_MAP_SEQCLUSTER={bowtie=1.3.0, samtools=1.16.1}, CAT_FASTQ={cat=8.3}, DATATABLE_MERGE={r-base=3.6.2}, FASTP={fastp=0.23.4}, FILTER_STATS={BusyBox=1.32.1}, FORMAT_HAIRPIN={fastx_toolkit=0.0.14}, FORMAT_MATURE={fastx_toolkit=0.0.14}, GAWK_CDNA={gawk=5.3.0}, GAWK_NCRNA={gawk=5.3.0}, INDEX_CDNA={bowtie2=2.5.2}, INDEX_HAIRPIN={bowtie=1.3.0}, INDEX_MATURE={bowtie=1.3.0}, INDEX_NCRNA={bowtie2=2.5.2}, INDEX_TRNA={bowtie2=2.5.2}, MIRTOP_COUNTS={mirtop=0.4.28}, MIRTOP_EXPORT={mirtop=0.4.28}, MIRTOP_GFF={mirtop=0.4.28}, MIRTOP_STATS={mirtop=0.4.28}, MIRTRACE_QC={mirtrace=1.0.1}, PARSE_HAIRPIN={seqkit=2.6.1}, PARSE_MATURE={seqkit=2.6.1}, PIVOT_LONGER={r-base=4.4.2, tidyr=1.3.1, optparse=1.7.5}, PIVOT_WIDER={r-base=4.4.2, tidyr=1.3.1, dplyr=1.1.4, optparse=1.7.5, vroom=1.6.5}, SAMTOOLS_FLAGSTAT={samtools=1.21}, SAMTOOLS_IDXSTATS={samtools=1.21}, SAMTOOLS_INDEX={samtools=1.21}, SAMTOOLS_SORT={samtools=1.21}, SAMTOOLS_STATS={samtools=1.21}, SEQCLUSTER_COLLAPSE={seqcluster=1.2.9}, SEQKIT_GREP_CDNA={seqkit=2.8.0}, SEQKIT_GREP_NCRNA={seqkit=2.8.0}, STATS_GAWK_CDNA={gawk=5.3.0}, STATS_GAWK_NCRNA={gawk=5.3.0}, STATS_GAWK_TRNA={gawk=5.3.0}, Workflow={nf-core/smrnaseq=v2.4.0}}" ], "meta": { "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nextflow": "24.10.2" }, - "timestamp": "2024-10-08T23:16:26.853242481" + "timestamp": "2024-12-10T00:29:32.052341" }, "mirna_quant_bam": { "content": [ @@ -65,9 +65,9 @@ ], "meta": { "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nextflow": "24.10.2" }, - "timestamp": "2024-10-01T20:06:04.974546479" + "timestamp": "2024-12-10T00:29:32.116301175" }, "mirna_quant_edger_qc": { "content": [ @@ -90,9 +90,9 @@ ], "meta": { "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nextflow": "24.10.2" }, - "timestamp": "2024-10-01T20:06:05.025175321" + "timestamp": "2024-12-10T00:29:32.164075991" }, "contaminant_filter_filter": { "content": [ @@ -113,8 +113,8 @@ ], "meta": { "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nextflow": "24.10.2" }, - "timestamp": "2024-10-01T20:06:05.070939602" + "timestamp": "2024-12-10T00:29:32.208602197" } -} +} \ No newline at end of file diff --git a/tests/test_mirgenedb.nf.test.snap b/tests/test_mirgenedb.nf.test.snap index 9ed11a97..f8da0a9d 100644 --- a/tests/test_mirgenedb.nf.test.snap +++ b/tests/test_mirgenedb.nf.test.snap @@ -19,13 +19,13 @@ }, "software_versions": { "content": [ - "{BOWTIE_MAP_GENOME={bowtie=1.3.0, samtools=1.16.1}, BOWTIE_MAP_HAIRPIN={bowtie=1.3.0, samtools=1.16.1}, BOWTIE_MAP_MATURE={bowtie=1.3.0, samtools=1.16.1}, BOWTIE_MAP_SEQCLUSTER={bowtie=1.3.0, samtools=1.16.1}, CSVTK_JOIN={csvtk=0.30.0}, DATATABLE_MERGE={r-base=3.6.2}, FASTP={fastp=0.23.4}, FASTQC_RAW={fastqc=0.12.1}, FASTQC_TRIM={fastqc=0.12.1}, FORMAT_HAIRPIN={fastx_toolkit=0.0.14}, FORMAT_MATURE={fastx_toolkit=0.0.14}, INDEX_HAIRPIN={bowtie=1.3.0}, INDEX_MATURE={bowtie=1.3.0}, MIRDEEP2_MAPPER={mirdeep2=2.0.1}, MIRDEEP2_MIRDEEP2={mirdeep2=2.0.1}, MIRTOP_COUNTS={mirtop=0.4.28}, MIRTOP_EXPORT={mirtop=0.4.28}, MIRTOP_GFF={mirtop=0.4.28}, MIRTOP_STATS={mirtop=0.4.28}, PARSE_HAIRPIN={seqkit=2.6.1}, PARSE_MATURE={seqkit=2.6.1}, SAMTOOLS_FLAGSTAT={samtools=1.21}, SAMTOOLS_IDXSTATS={samtools=1.21}, SAMTOOLS_INDEX={samtools=1.21}, SAMTOOLS_SORT={samtools=1.21}, SAMTOOLS_STATS={samtools=1.21}, SEQCLUSTER_COLLAPSE={seqcluster=1.2.9}, SEQKIT_FQ2FA={seqkit=2.8.0}, SEQKIT_REPLACE={seqkit=2.8.0}, Workflow={nf-core/smrnaseq=v2.4.0}}" + "{BOWTIE_MAP_GENOME={bowtie=1.3.0, samtools=1.16.1}, BOWTIE_MAP_HAIRPIN={bowtie=1.3.0, samtools=1.16.1}, BOWTIE_MAP_MATURE={bowtie=1.3.0, samtools=1.16.1}, BOWTIE_MAP_SEQCLUSTER={bowtie=1.3.0, samtools=1.16.1}, DATATABLE_MERGE={r-base=3.6.2}, FASTP={fastp=0.23.4}, FASTQC_RAW={fastqc=0.12.1}, FASTQC_TRIM={fastqc=0.12.1}, FORMAT_HAIRPIN={fastx_toolkit=0.0.14}, FORMAT_MATURE={fastx_toolkit=0.0.14}, INDEX_HAIRPIN={bowtie=1.3.0}, INDEX_MATURE={bowtie=1.3.0}, MIRDEEP2_MAPPER={mirdeep2=2.0.1}, MIRDEEP2_MIRDEEP2={mirdeep2=2.0.1}, MIRTOP_COUNTS={mirtop=0.4.28}, MIRTOP_EXPORT={mirtop=0.4.28}, MIRTOP_GFF={mirtop=0.4.28}, MIRTOP_STATS={mirtop=0.4.28}, PARSE_HAIRPIN={seqkit=2.6.1}, PARSE_MATURE={seqkit=2.6.1}, PIVOT_LONGER={r-base=4.4.2, tidyr=1.3.1, optparse=1.7.5}, PIVOT_WIDER={r-base=4.4.2, tidyr=1.3.1, dplyr=1.1.4, optparse=1.7.5, vroom=1.6.5}, SAMTOOLS_FLAGSTAT={samtools=1.21}, SAMTOOLS_IDXSTATS={samtools=1.21}, SAMTOOLS_INDEX={samtools=1.21}, SAMTOOLS_SORT={samtools=1.21}, SAMTOOLS_STATS={samtools=1.21}, SEQCLUSTER_COLLAPSE={seqcluster=1.2.9}, SEQKIT_FQ2FA={seqkit=2.8.0}, SEQKIT_REPLACE={seqkit=2.8.0}, Workflow={nf-core/smrnaseq=v2.4.0}}" ], "meta": { "nf-test": "0.9.0", - "nextflow": "24.10.0" + "nextflow": "24.10.2" }, - "timestamp": "2024-11-11T13:44:14.583324793" + "timestamp": "2024-12-10T00:35:18.448206326" }, "mirna_quant_bam": { "content": [ diff --git a/tests/test_nextflex.nf.test.snap b/tests/test_nextflex.nf.test.snap index dfc54c7f..c4a3209b 100644 --- a/tests/test_nextflex.nf.test.snap +++ b/tests/test_nextflex.nf.test.snap @@ -34,13 +34,13 @@ }, "software_versions": { "content": [ - "{BOWTIE_MAP_HAIRPIN={bowtie=1.3.0, samtools=1.16.1}, BOWTIE_MAP_MATURE={bowtie=1.3.0, samtools=1.16.1}, BOWTIE_MAP_SEQCLUSTER={bowtie=1.3.0, samtools=1.16.1}, CSVTK_JOIN={csvtk=0.30.0}, DATATABLE_MERGE={r-base=3.6.2}, FASTP={fastp=0.23.4}, FASTQC_RAW={fastqc=0.12.1}, FASTQC_TRIM={fastqc=0.12.1}, FORMAT_HAIRPIN={fastx_toolkit=0.0.14}, FORMAT_MATURE={fastx_toolkit=0.0.14}, INDEX_HAIRPIN={bowtie=1.3.0}, INDEX_MATURE={bowtie=1.3.0}, MIRTOP_COUNTS={mirtop=0.4.28}, MIRTOP_EXPORT={mirtop=0.4.28}, MIRTOP_GFF={mirtop=0.4.28}, MIRTOP_STATS={mirtop=0.4.28}, MIRTRACE_QC={mirtrace=1.0.1}, PARSE_HAIRPIN={seqkit=2.6.1}, PARSE_MATURE={seqkit=2.6.1}, SAMTOOLS_FLAGSTAT={samtools=1.21}, SAMTOOLS_IDXSTATS={samtools=1.21}, SAMTOOLS_INDEX={samtools=1.21}, SAMTOOLS_SORT={samtools=1.21}, SAMTOOLS_STATS={samtools=1.21}, SEQCLUSTER_COLLAPSE={seqcluster=1.2.9}, Workflow={nf-core/smrnaseq=v2.4.0}}" + "{BOWTIE_MAP_HAIRPIN={bowtie=1.3.0, samtools=1.16.1}, BOWTIE_MAP_MATURE={bowtie=1.3.0, samtools=1.16.1}, BOWTIE_MAP_SEQCLUSTER={bowtie=1.3.0, samtools=1.16.1}, DATATABLE_MERGE={r-base=3.6.2}, FASTP={fastp=0.23.4}, FASTQC_RAW={fastqc=0.12.1}, FASTQC_TRIM={fastqc=0.12.1}, FORMAT_HAIRPIN={fastx_toolkit=0.0.14}, FORMAT_MATURE={fastx_toolkit=0.0.14}, INDEX_HAIRPIN={bowtie=1.3.0}, INDEX_MATURE={bowtie=1.3.0}, MIRTOP_COUNTS={mirtop=0.4.28}, MIRTOP_EXPORT={mirtop=0.4.28}, MIRTOP_GFF={mirtop=0.4.28}, MIRTOP_STATS={mirtop=0.4.28}, MIRTRACE_QC={mirtrace=1.0.1}, PARSE_HAIRPIN={seqkit=2.6.1}, PARSE_MATURE={seqkit=2.6.1}, PIVOT_LONGER={r-base=4.4.2, tidyr=1.3.1, optparse=1.7.5}, PIVOT_WIDER={r-base=4.4.2, tidyr=1.3.1, dplyr=1.1.4, optparse=1.7.5, vroom=1.6.5}, SAMTOOLS_FLAGSTAT={samtools=1.21}, SAMTOOLS_IDXSTATS={samtools=1.21}, SAMTOOLS_INDEX={samtools=1.21}, SAMTOOLS_SORT={samtools=1.21}, SAMTOOLS_STATS={samtools=1.21}, SEQCLUSTER_COLLAPSE={seqcluster=1.2.9}, Workflow={nf-core/smrnaseq=v2.4.0}}" ], "meta": { "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nextflow": "24.10.2" }, - "timestamp": "2024-10-08T23:25:57.880948228" + "timestamp": "2024-12-10T00:37:47.333537716" }, "mirna_quant_bam": { "content": [ @@ -142,4 +142,4 @@ }, "timestamp": "2024-09-20T17:11:24.369706104" } -} +} \ No newline at end of file diff --git a/tests/test_skipfastp.nf.test.snap b/tests/test_skipfastp.nf.test.snap index 2352aaf1..56d83a64 100644 --- a/tests/test_skipfastp.nf.test.snap +++ b/tests/test_skipfastp.nf.test.snap @@ -41,13 +41,13 @@ }, "software_versions": { "content": [ - "{BOWTIE_MAP_GENOME={bowtie=1.3.0, samtools=1.16.1}, BOWTIE_MAP_HAIRPIN={bowtie=1.3.0, samtools=1.16.1}, BOWTIE_MAP_MATURE={bowtie=1.3.0, samtools=1.16.1}, BOWTIE_MAP_SEQCLUSTER={bowtie=1.3.0, samtools=1.16.1}, CSVTK_JOIN={csvtk=0.30.0}, DATATABLE_MERGE={r-base=3.6.2}, FASTQC_RAW={fastqc=0.12.1}, FORMAT_HAIRPIN={fastx_toolkit=0.0.14}, FORMAT_MATURE={fastx_toolkit=0.0.14}, INDEX_HAIRPIN={bowtie=1.3.0}, INDEX_MATURE={bowtie=1.3.0}, MIRTOP_COUNTS={mirtop=0.4.28}, MIRTOP_EXPORT={mirtop=0.4.28}, MIRTOP_GFF={mirtop=0.4.28}, MIRTOP_STATS={mirtop=0.4.28}, MIRTRACE_QC={mirtrace=1.0.1}, PARSE_HAIRPIN={seqkit=2.6.1}, PARSE_MATURE={seqkit=2.6.1}, SAMTOOLS_FLAGSTAT={samtools=1.21}, SAMTOOLS_IDXSTATS={samtools=1.21}, SAMTOOLS_INDEX={samtools=1.21}, SAMTOOLS_SORT={samtools=1.21}, SAMTOOLS_STATS={samtools=1.21}, SEQCLUSTER_COLLAPSE={seqcluster=1.2.9}, Workflow={nf-core/smrnaseq=v2.4.0}}" + "{BOWTIE_MAP_GENOME={bowtie=1.3.0, samtools=1.16.1}, BOWTIE_MAP_HAIRPIN={bowtie=1.3.0, samtools=1.16.1}, BOWTIE_MAP_MATURE={bowtie=1.3.0, samtools=1.16.1}, BOWTIE_MAP_SEQCLUSTER={bowtie=1.3.0, samtools=1.16.1}, DATATABLE_MERGE={r-base=3.6.2}, FASTQC_RAW={fastqc=0.12.1}, FORMAT_HAIRPIN={fastx_toolkit=0.0.14}, FORMAT_MATURE={fastx_toolkit=0.0.14}, INDEX_HAIRPIN={bowtie=1.3.0}, INDEX_MATURE={bowtie=1.3.0}, MIRTOP_COUNTS={mirtop=0.4.28}, MIRTOP_EXPORT={mirtop=0.4.28}, MIRTOP_GFF={mirtop=0.4.28}, MIRTOP_STATS={mirtop=0.4.28}, MIRTRACE_QC={mirtrace=1.0.1}, PARSE_HAIRPIN={seqkit=2.6.1}, PARSE_MATURE={seqkit=2.6.1}, PIVOT_LONGER={r-base=4.4.2, tidyr=1.3.1, optparse=1.7.5}, PIVOT_WIDER={r-base=4.4.2, tidyr=1.3.1, dplyr=1.1.4, optparse=1.7.5, vroom=1.6.5}, SAMTOOLS_FLAGSTAT={samtools=1.21}, SAMTOOLS_IDXSTATS={samtools=1.21}, SAMTOOLS_INDEX={samtools=1.21}, SAMTOOLS_SORT={samtools=1.21}, SAMTOOLS_STATS={samtools=1.21}, SEQCLUSTER_COLLAPSE={seqcluster=1.2.9}, Workflow={nf-core/smrnaseq=v2.4.0}}" ], "meta": { "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nextflow": "24.10.2" }, - "timestamp": "2024-10-08T23:28:49.241105443" + "timestamp": "2024-12-10T00:40:10.829696529" }, "mirna_quant_bam": { "content": [ @@ -142,4 +142,4 @@ }, "timestamp": "2024-10-01T20:19:25.557700049" } -} +} \ No newline at end of file diff --git a/tests/test_umi.nf.test.snap b/tests/test_umi.nf.test.snap index fb0b6d09..bf9933a9 100644 --- a/tests/test_umi.nf.test.snap +++ b/tests/test_umi.nf.test.snap @@ -41,13 +41,13 @@ }, "software_versions": { "content": [ - "{BOWTIE_MAP_GENOME={bowtie=1.3.0, samtools=1.16.1}, BOWTIE_MAP_HAIRPIN={bowtie=1.3.0, samtools=1.16.1}, BOWTIE_MAP_MATURE={bowtie=1.3.0, samtools=1.16.1}, BOWTIE_MAP_SEQCLUSTER={bowtie=1.3.0, samtools=1.16.1}, CSVTK_JOIN={csvtk=0.30.0}, DATATABLE_MERGE={r-base=3.6.2}, FASTP={fastp=0.23.4}, FASTP_LENGTH_FILTER={fastp=0.23.4}, FASTQC_RAW={fastqc=0.12.1}, FASTQC_TRIM={fastqc=0.12.1}, FORMAT_HAIRPIN={fastx_toolkit=0.0.14}, FORMAT_MATURE={fastx_toolkit=0.0.14}, INDEX_HAIRPIN={bowtie=1.3.0}, INDEX_MATURE={bowtie=1.3.0}, MIRTOP_COUNTS={mirtop=0.4.28}, MIRTOP_EXPORT={mirtop=0.4.28}, MIRTOP_GFF={mirtop=0.4.28}, MIRTOP_STATS={mirtop=0.4.28}, MIRTRACE_QC={mirtrace=1.0.1}, PARSE_HAIRPIN={seqkit=2.6.1}, PARSE_MATURE={seqkit=2.6.1}, SAMTOOLS_FLAGSTAT={samtools=1.21}, SAMTOOLS_IDXSTATS={samtools=1.21}, SAMTOOLS_INDEX={samtools=1.21}, SAMTOOLS_SORT={samtools=1.21}, SAMTOOLS_STATS={samtools=1.21}, SEQCLUSTER_COLLAPSE={seqcluster=1.2.9}, UMICOLLAPSE_FASTQ={umicollapse=1.0.0-1}, Workflow={nf-core/smrnaseq=v2.4.0}}" + "{BOWTIE_MAP_GENOME={bowtie=1.3.0, samtools=1.16.1}, BOWTIE_MAP_HAIRPIN={bowtie=1.3.0, samtools=1.16.1}, BOWTIE_MAP_MATURE={bowtie=1.3.0, samtools=1.16.1}, BOWTIE_MAP_SEQCLUSTER={bowtie=1.3.0, samtools=1.16.1}, DATATABLE_MERGE={r-base=3.6.2}, FASTP={fastp=0.23.4}, FASTP_LENGTH_FILTER={fastp=0.23.4}, FASTQC_RAW={fastqc=0.12.1}, FASTQC_TRIM={fastqc=0.12.1}, FORMAT_HAIRPIN={fastx_toolkit=0.0.14}, FORMAT_MATURE={fastx_toolkit=0.0.14}, INDEX_HAIRPIN={bowtie=1.3.0}, INDEX_MATURE={bowtie=1.3.0}, MIRTOP_COUNTS={mirtop=0.4.28}, MIRTOP_EXPORT={mirtop=0.4.28}, MIRTOP_GFF={mirtop=0.4.28}, MIRTOP_STATS={mirtop=0.4.28}, MIRTRACE_QC={mirtrace=1.0.1}, PARSE_HAIRPIN={seqkit=2.6.1}, PARSE_MATURE={seqkit=2.6.1}, PIVOT_LONGER={r-base=4.4.2, tidyr=1.3.1, optparse=1.7.5}, PIVOT_WIDER={r-base=4.4.2, tidyr=1.3.1, dplyr=1.1.4, optparse=1.7.5, vroom=1.6.5}, SAMTOOLS_FLAGSTAT={samtools=1.21}, SAMTOOLS_IDXSTATS={samtools=1.21}, SAMTOOLS_INDEX={samtools=1.21}, SAMTOOLS_SORT={samtools=1.21}, SAMTOOLS_STATS={samtools=1.21}, SEQCLUSTER_COLLAPSE={seqcluster=1.2.9}, UMICOLLAPSE_FASTQ={umicollapse=1.0.0-1}, Workflow={nf-core/smrnaseq=v2.4.0}}" ], "meta": { "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nextflow": "24.10.2" }, - "timestamp": "2024-10-08T23:34:54.715037951" + "timestamp": "2024-12-10T00:44:45.433524507" }, "mirna_quant_bam": { "content": [ @@ -160,4 +160,4 @@ }, "timestamp": "2024-09-20T19:12:28.290360163" } -} +} \ No newline at end of file From eab8687a53582e44646234f2c7411cde0b0e8472 Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Thu, 12 Dec 2024 17:22:59 +0000 Subject: [PATCH 10/10] docs: update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index f25e7a40..78517362 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [[#481]](https://github.com/nf-core/smrnaseq/pull/481) - Fix [MIRTOP_STATS IndexError](https://github.com/nf-core/smrnaseq/issues/477) - Fix mirtop process execution when mirgenedb is used. - [[#482]](https://github.com/nf-core/smrnaseq/pull/482) - Update documentation regarding MirgeneDB input files. +- [[#486]](https://github.com/nf-core/smrnaseq/pull/486) - Replace `CSVTK_JOIN` to improve processing in large amount of files. ## v2.4.0 - 2024-10-14 - Navy Iron Boxer