From 4bd903a372d437c022c2c5d491158dd2952b238e Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Tue, 1 Oct 2024 19:49:33 +0000 Subject: [PATCH 1/2] move clean fasta --- subworkflows/local/prepare_genome/main.nf | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/subworkflows/local/prepare_genome/main.nf b/subworkflows/local/prepare_genome/main.nf index 9dc2be02..bf1da219 100644 --- a/subworkflows/local/prepare_genome/main.nf +++ b/subworkflows/local/prepare_genome/main.nf @@ -74,6 +74,11 @@ workflow PREPARE_GENOME { // even if bowtie index is specified, there still needs to be a fasta. // without fasta, no genome analysis. if(val_fasta) { + // Clean fasta (replace non-ATCGs with Ns, remove whitespaces from headers) + CLEAN_FASTA ( ch_fasta ) + ch_versions = ch_versions.mix(CLEAN_FASTA.out.versions) + ch_fasta = CLEAN_FASTA.out.output + //Prepare bowtie index, unless specified //This needs to be done here as the index is used by GENOME_QUANT if(val_bowtie_index) { @@ -94,16 +99,12 @@ workflow PREPARE_GENOME { } } else { - // Clean fasta (replace non-ATCGs with Ns, remove whitespaces from headers) - CLEAN_FASTA ( ch_fasta ) - ch_versions = ch_versions.mix(CLEAN_FASTA.out.versions) // Index FASTA with nf-core Bowtie1 INDEX_GENOME ( CLEAN_FASTA.out.output ) ch_versions = ch_versions.mix(INDEX_GENOME.out.versions) // Set channels: clean fasta and its index - ch_fasta = CLEAN_FASTA.out.output ch_bowtie_index = INDEX_GENOME.out.index.collect() } } From 1f9922a700bab39c55e0ba8adf5d09cf6e327327 Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Tue, 1 Oct 2024 20:29:16 +0000 Subject: [PATCH 2/2] update tests and changelog --- CHANGELOG.md | 1 + .../test_contamination_tech_reps.nf.test.snap | 18 +++++++++--------- tests/test_mirgenedb.nf.test.snap | 4 ++-- tests/test_nextflex.nf.test.snap | 4 ++-- tests/test_skipfastp.nf.test | 2 +- tests/test_skipfastp.nf.test.snap | 12 ++++++------ tests/test_umi.nf.test | 2 +- tests/test_umi.nf.test.snap | 10 +++++----- 8 files changed, 27 insertions(+), 26 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 71891856..0df0234a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -42,6 +42,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [[#447]](https://github.com/nf-core/smrnaseq/pull/447) - Fix [Minor fixes and general pipeline cleanup](https://github.com/nf-core/smrnaseq/issues/400) - Update variable and processes names, update channel comments, remove unused modules and params. - [[#448]](https://github.com/nf-core/smrnaseq/pull/448) - Migrate local mirdeep to [nf-core mirdeep2 modules and subworkflow](https://github.com/nf-core/smrnaseq/issues/443) and generate [test profile for mirdeep2](https://github.com/nf-core/smrnaseq/issues/399). - [[#452]](https://github.com/nf-core/smrnaseq/pull/452) - Fix [Fix ch_bowtie_index channel structure](https://github.com/nf-core/smrnaseq/issues/451) and replace untarfiles with untar [replace untarfiles with untar](https://github.com/nf-core/smrnaseq/issues/449). +- [[#457]](https://github.com/nf-core/smrnaseq/pull/457) - QC all input [fasta files and clean them](https://github.com/nf-core/smrnaseq/issues/455). ## v2.3.1 - 2024-04-18 - Gray Zinc Dalmation Patch diff --git a/tests/test_contamination_tech_reps.nf.test.snap b/tests/test_contamination_tech_reps.nf.test.snap index 91a343f4..e5d7b706 100644 --- a/tests/test_contamination_tech_reps.nf.test.snap +++ b/tests/test_contamination_tech_reps.nf.test.snap @@ -40,7 +40,7 @@ "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-09-30T15:53:12.532061" + "timestamp": "2024-10-01T20:06:04.830119436" }, "mirna_quant_bam": { "content": [ @@ -64,10 +64,10 @@ true ], "meta": { - "nf-test": "0.8.4", + "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-09-04T19:24:40.511510374" + "timestamp": "2024-10-01T20:06:04.974546479" }, "mirna_quant_edger_qc": { "content": [ @@ -89,10 +89,10 @@ true ], "meta": { - "nf-test": "0.8.4", + "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-09-04T19:24:40.56322797" + "timestamp": "2024-10-01T20:06:05.025175321" }, "contaminant_filter_filter": { "content": [ @@ -101,10 +101,10 @@ true ], "meta": { - "nf-test": "0.8.4", + "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-09-04T19:24:40.458015808" + "timestamp": "2024-10-01T20:06:04.920520728" }, "mirna_quant_mirtop": { "content": [ @@ -115,6 +115,6 @@ "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-09-20T18:06:26.655506797" + "timestamp": "2024-10-01T20:06:05.070939602" } -} +} \ No newline at end of file diff --git a/tests/test_mirgenedb.nf.test.snap b/tests/test_mirgenedb.nf.test.snap index 1aa062d4..db381a2e 100644 --- a/tests/test_mirgenedb.nf.test.snap +++ b/tests/test_mirgenedb.nf.test.snap @@ -22,10 +22,10 @@ "{BOWTIE_MAP_GENOME={bowtie=1.3.0, samtools=1.16.1}, BOWTIE_MAP_HAIRPIN={bowtie=1.3.0, samtools=1.16.1}, BOWTIE_MAP_MATURE={bowtie=1.3.0, samtools=1.16.1}, BOWTIE_MAP_SEQCLUSTER={bowtie=1.3.0, samtools=1.16.1}, FASTP={fastp=0.23.4}, FASTQC_RAW={fastqc=0.12.1}, FASTQC_TRIM={fastqc=0.12.1}, FORMAT_HAIRPIN={fastx_toolkit=0.0.14}, FORMAT_MATURE={fastx_toolkit=0.0.14}, INDEX_HAIRPIN={bowtie=1.3.0}, INDEX_MATURE={bowtie=1.3.0}, MIRDEEP2_MAPPER={mirdeep2=2.0.1}, MIRDEEP2_MIRDEEP2={mirdeep2=2.0.1}, PARSE_HAIRPIN={seqkit=2.6.1}, PARSE_MATURE={seqkit=2.6.1}, SAMTOOLS_FLAGSTAT={samtools=1.21}, SAMTOOLS_IDXSTATS={samtools=1.21}, SAMTOOLS_INDEX={samtools=1.21}, SAMTOOLS_SORT={samtools=1.21}, SAMTOOLS_STATS={samtools=1.21}, SEQCLUSTER_COLLAPSE={seqcluster=1.2.9}, SEQKIT_FQ2FA={seqkit=2.8.0}, SEQKIT_REPLACE={seqkit=2.8.0}, Workflow={nf-core/smrnaseq=v2.3.2dev}}" ], "meta": { - "nf-test": "0.8.4", + "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-09-26T18:15:04.369221417" + "timestamp": "2024-10-01T20:13:41.274631809" }, "mirna_quant_bam": { "content": [ diff --git a/tests/test_nextflex.nf.test.snap b/tests/test_nextflex.nf.test.snap index 58017db9..6ca6ca94 100644 --- a/tests/test_nextflex.nf.test.snap +++ b/tests/test_nextflex.nf.test.snap @@ -40,7 +40,7 @@ "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-09-30T16:10:06.50556" + "timestamp": "2024-10-01T20:16:29.435481425" }, "mirna_quant_bam": { "content": [ @@ -142,4 +142,4 @@ }, "timestamp": "2024-09-20T17:11:24.369706104" } -} +} \ No newline at end of file diff --git a/tests/test_skipfastp.nf.test b/tests/test_skipfastp.nf.test index 43dfd07a..eb4a0456 100644 --- a/tests/test_skipfastp.nf.test +++ b/tests/test_skipfastp.nf.test @@ -19,7 +19,7 @@ nextflow_pipeline { assertAll( { assert workflow.success }, { assert snapshot(UTILS.removeNextflowVersion("$outputDir")).match("software_versions") }, - { assert workflow.trace.succeeded().size() == 63 }, + { assert workflow.trace.succeeded().size() == 64 }, { assert snapshot( path("$outputDir/mirna_quant/mirtop/joined_samples_mirtop.tsv").exists(), diff --git a/tests/test_skipfastp.nf.test.snap b/tests/test_skipfastp.nf.test.snap index b2942031..06f8c346 100644 --- a/tests/test_skipfastp.nf.test.snap +++ b/tests/test_skipfastp.nf.test.snap @@ -30,14 +30,14 @@ "Clone1_N3_mature_hairpin_genome.sorted.idxstats:md5,2620288b88bba1ea3315414016c083a1", "Clone1_N1_mature_hairpin_genome.sorted.idxstats:md5,e0e4a95f5c21a926f7894cf1fbe3110b", "Clone1_N1_mature_hairpin_genome.sorted.flagstat:md5,62208acf0c7418d590b41318d7e17d67", - "Clone1_N3_mature_hairpin_genome.sorted.stats:md5,471571deab69bf7a6f4cb679035398d2", - "Clone1_N1_mature_hairpin_genome.sorted.stats:md5,cce099859e44a7e0adf1c304fdb14f4f" + "Clone1_N3_mature_hairpin_genome.sorted.stats:md5,1c03f98d16d2e418692d3227c8be2803", + "Clone1_N1_mature_hairpin_genome.sorted.stats:md5,e5aee4c4e099d5f4e82bd22f58309e2c" ], "meta": { "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-09-19T15:26:42.738485563" + "timestamp": "2024-10-01T20:19:25.61285999" }, "software_versions": { "content": [ @@ -47,7 +47,7 @@ "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-09-30T16:13:51.93255" + "timestamp": "2024-10-01T20:19:25.507173099" }, "mirna_quant_bam": { "content": [ @@ -140,6 +140,6 @@ "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-09-20T18:29:58.277371938" + "timestamp": "2024-10-01T20:19:25.557700049" } -} +} \ No newline at end of file diff --git a/tests/test_umi.nf.test b/tests/test_umi.nf.test index 58ef4cfa..e2c4cff5 100644 --- a/tests/test_umi.nf.test +++ b/tests/test_umi.nf.test @@ -19,7 +19,7 @@ nextflow_pipeline { assertAll( { assert workflow.success }, { assert snapshot(UTILS.removeNextflowVersion("$outputDir")).match("software_versions") }, - { assert workflow.trace.succeeded().size() == 73 }, + { assert workflow.trace.succeeded().size() == 74 }, { assert snapshot( path("$outputDir/mirna_quant/bam/mature/SRX8195118_SRR11631014_mature.sorted.stats"), diff --git a/tests/test_umi.nf.test.snap b/tests/test_umi.nf.test.snap index 7af606d2..9f485004 100644 --- a/tests/test_umi.nf.test.snap +++ b/tests/test_umi.nf.test.snap @@ -30,14 +30,14 @@ "SRX8195117_SRR11631013_mature_hairpin_genome.sorted.flagstat:md5,977e88cbe62027285df73e1f7f9cd9bc", "SRX8195117_SRR11631013_mature_hairpin_genome.sorted.idxstats:md5,cc0413bf90252c3b3af8926fd64bc873", "SRX8195118_SRR11631014_mature_hairpin_genome.sorted.idxstats:md5,a4874de294706a7ead30258944ff2dad", - "SRX8195118_SRR11631014_mature_hairpin_genome.sorted.stats:md5,89f8dd2287a390cf347627c51b0d2ec2", - "SRX8195117_SRR11631013_mature_hairpin_genome.sorted.stats:md5,910348e352583b44c3f30cfd6332d846" + "SRX8195118_SRR11631014_mature_hairpin_genome.sorted.stats:md5,6beddbebbd48d4ffc01e059c73a5a330", + "SRX8195117_SRR11631013_mature_hairpin_genome.sorted.stats:md5,cc5ef7cbd5a29794170ac6cff59dfd76" ], "meta": { "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-09-19T15:35:36.816602685" + "timestamp": "2024-10-01T20:24:14.961610878" }, "software_versions": { "content": [ @@ -47,7 +47,7 @@ "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-09-30T16:23:12.559357" + "timestamp": "2024-10-01T20:24:14.901461898" }, "mirna_quant_bam": { "content": [ @@ -160,4 +160,4 @@ }, "timestamp": "2024-09-20T19:12:28.290360163" } -} +} \ No newline at end of file