Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issue 477 #481

Merged
merged 4 commits into from
Nov 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## Dev vTBD - TBD - TBD

- [[#477]](https://github.com/nf-core/smrnaseq/issues/481) - Fix [MIRTOP_STATS IndexError](https://github.com/nf-core/smrnaseq/issues/477) - Fix mirtop process execution when mirgenedb is used.

## v2.4.0 - 2024-10-14 - Navy Iron Boxer

- [[#349]](https://github.com/nf-core/smrnaseq/pull/349) - Fix [MIRTOP_QUANT conda issue](https://github.com/nf-core/smrnaseq/issues/347) - change conda-base to conda-forge channel.
Expand Down
7 changes: 1 addition & 6 deletions subworkflows/local/mirna_quant.nf
Original file line number Diff line number Diff line change
Expand Up @@ -94,12 +94,7 @@ workflow MIRNA_QUANT {

ch_mirtop_logs = Channel.empty()

// nf-core/mirtop

ch_mirna_gtf_species = ch_mirna_gtf.map{ meta,gtf -> gtf }
.combine(ch_mirtrace_species)
.map{ gtf, species -> [ [id:species.toString()], gtf, species ] }
.collect()
ch_mirna_gtf_species = ch_mirna_gtf.map{ meta, gtf-> [ meta, gtf, meta.species ] }.collect()

BAM_STATS_MIRNA_MIRTOP(BOWTIE_MAP_SEQCLUSTER.out.bam, FORMAT_HAIRPIN.out.formatted_fasta, ch_mirna_gtf_species )

Expand Down
33 changes: 28 additions & 5 deletions subworkflows/local/prepare_genome/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,15 @@ workflow PREPARE_GENOME {
ch_mirtrace_species = val_mirtrace_species ? Channel.value(val_mirtrace_species) : Channel.empty()
mirna_gtf_from_species = val_mirtrace_species ? (val_mirtrace_species == 'hsa' ? "https://raw.githubusercontent.com/nf-core/test-datasets/smrnaseq/reference/hsa.gff3" : "https://mirbase.org/download/CURRENT/genomes/${val_mirtrace_species}.gff3") : false
ch_mirna_gtf = val_mirna_gtf ? Channel.fromPath(val_mirna_gtf, checkIfExists: true).map{ it -> [ [id:it.baseName], it ] }.collect() : ( mirna_gtf_from_species ? Channel.fromPath(mirna_gtf_from_species, checkIfExists: true).map{ it -> [ [id:it.baseName], it ] }.collect() : Channel.empty() )

// Add species of the gtf in the meta
ch_mirna_gtf = ch_mirna_gtf
.combine(ch_mirtrace_species.ifEmpty('unknown'))
.map { meta, gtf, species ->
def new_meta = meta.clone() + [species: species]
[new_meta, gtf]
}

ch_mirna_adapters = params.with_umi ? [] : Channel.fromPath(val_fastp_known_mirna_adapters, checkIfExists: true).collect()

ch_rrna = val_rrna ? Channel.fromPath(val_rrna, checkIfExists: true).map{ it -> [ [id:'rRNA'], it ] }.collect() : Channel.empty()
Expand Down Expand Up @@ -118,15 +127,29 @@ workflow PREPARE_GENOME {

// Genome options
if (!params.mirgenedb) {
ch_reference_mature = params.mature ? Channel.fromPath(params.mature, checkIfExists: true).map{ it -> [ [id:it.baseName], it ] }.collect() : { exit 1, "Mature miRNA fasta file not found: ${params.mature}" }
ch_reference_hairpin = params.hairpin ? Channel.fromPath(params.hairpin, checkIfExists: true).map{ it -> [ [id:it.baseName], it ] }.collect() : { exit 1, "Hairpin miRNA fasta file not found: ${params.hairpin}" }
ch_reference_mature = params.mature ? Channel.fromPath(params.mature, checkIfExists: true).map{ it -> [ [id:it.baseName], it ] }.collect() : { exit 1, "Specify a Mature miRNA fasta file via '--mature'" }
ch_reference_hairpin = params.hairpin ? Channel.fromPath(params.hairpin, checkIfExists: true).map{ it -> [ [id:it.baseName], it ] }.collect() : { exit 1, "Specify a Hairpin miRNA fasta file via '--hairpin'" }
} else {
if (!params.mirgenedb_species) {
exit 1, "MirGeneDB species not set, please specify via the --mirgenedb_species parameter"
}
ch_reference_mature = params.mirgenedb_mature ? Channel.fromPath(params.mirgenedb_mature, checkIfExists: true).map{ it -> [ [id:it.baseName], it ] }.collect() : { exit 1, "Mature miRNA fasta file not found via --mirgenedb_mature: ${params.mirgenedb_mature}" }
ch_reference_hairpin = params.mirgenedb_hairpin ? Channel.fromPath(params.mirgenedb_hairpin, checkIfExists: true).map{ it -> [ [id:it.baseName], it ] }.collect() : { exit 1, "Hairpin miRNA fasta file not found via --mirgenedb_hairpin: ${params.mirgenedb_hairpin}" }
ch_mirna_gtf = params.mirgenedb_gff ? Channel.fromPath(params.mirgenedb_gff, checkIfExists: true).map{ it -> [ [id:it.baseName], it ] }.collect() : { exit 1, "MirGeneDB gff file not found via --mirgenedb_gff: ${params.mirgenedb_gff}"}
ch_reference_mature = params.mirgenedb_mature ? Channel.fromPath(params.mirgenedb_mature, checkIfExists: true).map{ it -> [ [id:it.baseName], it ] }.collect() : { exit 1, "Specify a mirgenedb Mature miRNA fasta file via '--mirgenedb_mature'" }
ch_reference_hairpin = params.mirgenedb_hairpin ? Channel.fromPath(params.mirgenedb_hairpin, checkIfExists: true).map{ it -> [ [id:it.baseName], it ] }.collect() : { exit 1, "Specify a mirgenedb Hairpin miRNA fasta file via '--mirgenedb_hairpin'" }
ch_mirna_gtf = params.mirgenedb_gff ? Channel.fromPath(params.mirgenedb_gff, checkIfExists: true).map{ it -> [ [id:it.baseName], it ] }.collect() : { exit 1, "Specify a MirGeneDB gff file via '--mirgenedb_gff'"}

// Create a channel for mirgenedb_species
ch_mirgenedb_species = Channel.value(params.mirgenedb_species)

// Add species of the gtf
// When mirgenedb workflow is not indicated, species defaults to val_mirtrace_species.
// If mirgenedb workflow parameters are indicated, the params.mirgenedb_species is used instead.
// If both mirgenedb workflow parameters and mirtrace_species (or mirna_gtf) are provided, params.mirgenedb_species is used as species value
ch_mirna_gtf = ch_mirna_gtf
.combine(ch_mirgenedb_species)
.map { meta, gtf, species ->
def new_meta = meta.clone() + [species: species]
[new_meta, gtf]
}
}

emit:
Expand Down
2 changes: 1 addition & 1 deletion tests/test_mirgenedb.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ nextflow_pipeline {
assertAll(
{ assert workflow.success },
{ assert snapshot(UTILS.removeNextflowVersion("$outputDir")).match("software_versions") },
{ assert workflow.trace.succeeded().size() == 90 },
{ assert workflow.trace.succeeded().size() == 104 },
{ assert workflow.trace.failed().size() == 1 },

{ assert snapshot(
Expand Down
60 changes: 30 additions & 30 deletions tests/test_mirgenedb.nf.test.snap
Original file line number Diff line number Diff line change
@@ -1,31 +1,31 @@
{
"genome_quant_bam": {
"content": [
"Clone9_N1_mature_hairpin_genome.sorted.stats:md5,6b1d2b924593096358494dda37b46770",
"Clone9_N1_mature_hairpin_genome.sorted.idxstats:md5,aa37c5da7c2b4505ce58c3a21f97121c",
"Clone1_N1_mature_hairpin_genome.sorted.stats:md5,3c5f51cd7136eed5e97847ad7b857d23",
"Control_N1_mature_hairpin_genome.sorted.stats:md5,a7f2dd17a34c8f0b669a774404247394",
"Clone1_N1_mature_hairpin_genome.sorted.flagstat:md5,5bb521c495f1c450835299b1eb88dc84",
"Clone9_N1_mature_hairpin_genome.sorted.flagstat:md5,6a8ad3be2ca0fa924fd32a04293d4ce4",
"Clone1_N1_mature_hairpin_genome.sorted.idxstats:md5,d92f9eae7657418858e6d2b69436f74f",
"Control_N1_mature_hairpin_genome.sorted.idxstats:md5,a11f543771cea6b383fb596f60e998c3",
"Control_N1_mature_hairpin_genome.sorted.flagstat:md5,df2a57ac3b36f5d40793d3105a4bb2d1"
"Clone9_N1_mature_hairpin_genome.sorted.stats:md5,e81a1ccd658e49ae3e30a26f6d7ffa07",
"Clone9_N1_mature_hairpin_genome.sorted.idxstats:md5,8442f0f892abec766eb448b83311f46e",
"Clone1_N1_mature_hairpin_genome.sorted.stats:md5,951f3a5e3177dcba8aa264d8d5f6fa65",
"Control_N1_mature_hairpin_genome.sorted.stats:md5,d7d5f8f404858a052dc03b9b6f769248",
"Clone1_N1_mature_hairpin_genome.sorted.flagstat:md5,6971caaa71c465e63e844460f0a7d023",
"Clone9_N1_mature_hairpin_genome.sorted.flagstat:md5,82126b512243a091f7ddc94fa9dafb1d",
"Clone1_N1_mature_hairpin_genome.sorted.idxstats:md5,a2c5c718ff07f4bddcd216798b7c405a",
"Control_N1_mature_hairpin_genome.sorted.idxstats:md5,85d1e77dc9a3a0a3b173ba7db3fca34b",
"Control_N1_mature_hairpin_genome.sorted.flagstat:md5,81b7cd0fc5d8949a36089d2e6286338f"
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.4"
"nextflow": "24.10.0"
},
"timestamp": "2024-10-08T23:23:13.57712901"
"timestamp": "2024-11-11T13:44:14.743099344"
},
"software_versions": {
"content": [
"{BOWTIE_MAP_GENOME={bowtie=1.3.0, samtools=1.16.1}, BOWTIE_MAP_HAIRPIN={bowtie=1.3.0, samtools=1.16.1}, BOWTIE_MAP_MATURE={bowtie=1.3.0, samtools=1.16.1}, BOWTIE_MAP_SEQCLUSTER={bowtie=1.3.0, samtools=1.16.1}, FASTP={fastp=0.23.4}, FASTQC_RAW={fastqc=0.12.1}, FASTQC_TRIM={fastqc=0.12.1}, FORMAT_HAIRPIN={fastx_toolkit=0.0.14}, FORMAT_MATURE={fastx_toolkit=0.0.14}, INDEX_HAIRPIN={bowtie=1.3.0}, INDEX_MATURE={bowtie=1.3.0}, MIRDEEP2_MAPPER={mirdeep2=2.0.1}, MIRDEEP2_MIRDEEP2={mirdeep2=2.0.1}, PARSE_HAIRPIN={seqkit=2.6.1}, PARSE_MATURE={seqkit=2.6.1}, SAMTOOLS_FLAGSTAT={samtools=1.21}, SAMTOOLS_IDXSTATS={samtools=1.21}, SAMTOOLS_INDEX={samtools=1.21}, SAMTOOLS_SORT={samtools=1.21}, SAMTOOLS_STATS={samtools=1.21}, SEQCLUSTER_COLLAPSE={seqcluster=1.2.9}, SEQKIT_FQ2FA={seqkit=2.8.0}, SEQKIT_REPLACE={seqkit=2.8.0}, Workflow={nf-core/smrnaseq=v2.4.0}}"
"{BOWTIE_MAP_GENOME={bowtie=1.3.0, samtools=1.16.1}, BOWTIE_MAP_HAIRPIN={bowtie=1.3.0, samtools=1.16.1}, BOWTIE_MAP_MATURE={bowtie=1.3.0, samtools=1.16.1}, BOWTIE_MAP_SEQCLUSTER={bowtie=1.3.0, samtools=1.16.1}, CSVTK_JOIN={csvtk=0.30.0}, DATATABLE_MERGE={r-base=3.6.2}, FASTP={fastp=0.23.4}, FASTQC_RAW={fastqc=0.12.1}, FASTQC_TRIM={fastqc=0.12.1}, FORMAT_HAIRPIN={fastx_toolkit=0.0.14}, FORMAT_MATURE={fastx_toolkit=0.0.14}, INDEX_HAIRPIN={bowtie=1.3.0}, INDEX_MATURE={bowtie=1.3.0}, MIRDEEP2_MAPPER={mirdeep2=2.0.1}, MIRDEEP2_MIRDEEP2={mirdeep2=2.0.1}, MIRTOP_COUNTS={mirtop=0.4.28}, MIRTOP_EXPORT={mirtop=0.4.28}, MIRTOP_GFF={mirtop=0.4.28}, MIRTOP_STATS={mirtop=0.4.28}, PARSE_HAIRPIN={seqkit=2.6.1}, PARSE_MATURE={seqkit=2.6.1}, SAMTOOLS_FLAGSTAT={samtools=1.21}, SAMTOOLS_IDXSTATS={samtools=1.21}, SAMTOOLS_INDEX={samtools=1.21}, SAMTOOLS_SORT={samtools=1.21}, SAMTOOLS_STATS={samtools=1.21}, SEQCLUSTER_COLLAPSE={seqcluster=1.2.9}, SEQKIT_FQ2FA={seqkit=2.8.0}, SEQKIT_REPLACE={seqkit=2.8.0}, Workflow={nf-core/smrnaseq=v2.4.0}}"
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.4"
"nextflow": "24.10.0"
},
"timestamp": "2024-10-08T23:23:13.407922799"
"timestamp": "2024-11-11T13:44:14.583324793"
},
"mirna_quant_bam": {
"content": [
Expand All @@ -38,21 +38,21 @@
"Clone9_N1_mature.sorted.stats:md5,873e4f40e377cc445ace1ac48354729d",
"Control_N1_mature.sorted.idxstats:md5,b7a382b1d0f5cba6cb94b3b5a6b18f84",
true,
"Control_N1_mature_hairpin.sorted.idxstats:md5,79dc5e82ff88e7379c893549224cd87f",
"Control_N1_mature_hairpin.sorted.flagstat:md5,1dc7b98f0014a99a20de7c09a6b95340",
"Clone9_N1_mature_hairpin.sorted.idxstats:md5,f3ed5bf23f73d41c42d3da0bf30f89ea",
"Clone9_N1_mature_hairpin.sorted.stats:md5,c306ef4c5b1e23a3d032b532cf916fc1",
"Control_N1_mature_hairpin.sorted.idxstats:md5,25305882d997c5801388c5c881518296",
"Control_N1_mature_hairpin.sorted.flagstat:md5,569a6748d38176a240c351ba8b30eca8",
"Clone9_N1_mature_hairpin.sorted.idxstats:md5,265696d182c70f57bd7be26971def403",
"Clone9_N1_mature_hairpin.sorted.stats:md5,30266af34f3df2b8d807a1e620921ee6",
true,
true,
true,
"Control_N1_mature_hairpin.sorted.stats:md5,3e82fa30bfafcab2e8fb2f247e591959",
"Clone9_N1_mature_hairpin.sorted.flagstat:md5,678f4f9e98c3e1fcc5af54e8dd06fbbc"
"Control_N1_mature_hairpin.sorted.stats:md5,3bf3fdb23ef91c933887c71ad603999a",
"Clone9_N1_mature_hairpin.sorted.flagstat:md5,5729505d4df901b0c0cd93ab330526b5"
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.4"
"nextflow": "24.10.0"
},
"timestamp": "2024-10-08T23:23:13.503940378"
"timestamp": "2024-11-11T13:44:14.657194144"
},
"mirdeep2": {
"content": [
Expand Down Expand Up @@ -102,13 +102,13 @@
true,
"fastqc-1_sequence_counts_plot.txt:md5,59faee895ea86c12a4124d417e3bbd63",
"fastqc-1_per_sequence_gc_content_plot_Percentages.txt:md5,0a4b4285f2c53dca216c107decc9921f",
"multiqc_citations.txt:md5,57db2426be011862828d18f767d25b57",
"samtools-stats-dp.txt:md5,45c0315bade3f07942ded1ead37c1489",
"multiqc_citations.txt:md5,ea6d63393b7f47815a949fc58ee0caf8",
"samtools-stats-dp.txt:md5,c34196013e37d0c48671dbb70055c228",
"fastqc_sequence_length_distribution_plot.txt:md5,8b5cf1e3429a1ea0b3c63cfb176e1014",
"fastp-seq-content-n-plot_Read_1_Before_filtering.txt:md5,a0502dd4f701c9deb646ffbec80c09de",
"fastqc-1_sequence_duplication_levels_plot.txt:md5,2072cda513c8884047d9d11c8aacbf33",
"fastqc-1_per_base_sequence_quality_plot.txt:md5,cafad80f4e07df53590cbabbbd024629",
"multiqc_general_stats.txt:md5,950d3fb06c211e984084e6de9dad6bb3",
"multiqc_general_stats.txt:md5,7064887136896292880f7ed09d256225",
"fastqc-1_per_base_n_content_plot.txt:md5,a0502dd4f701c9deb646ffbec80c09de",
"fastqc_per_base_n_content_plot.txt:md5,d907ac1ac9a4f19908b7b025eb75abfe",
"fastp-seq-quality-plot_Read_1_After_filtering.txt:md5,0742b9813dcc95d4c62c52c83dec390c",
Expand All @@ -119,7 +119,7 @@
"fastqc-1-status-check-heatmap.txt:md5,d9c3ce24536a948e1fe9b84c55421ab7",
"fastqc_sequence_counts_plot.txt:md5,4861f0dc120e57e0359c53f417756b0c",
"fastp-seq-quality-plot_Read_1_Before_filtering.txt:md5,e9d8e3289f84f5a1ae6775813ec5a9b4",
"samtools_alignment_plot.txt:md5,b841ffce110bde994ccc6e977d2f856e",
"samtools_alignment_plot.txt:md5,3cc8b10a9d2a2317d7ac769d56bf5eb3",
"fastqc_per_base_sequence_quality_plot.txt:md5,a8adbff96d9adb317079e6becd7a80f6",
"fastp-seq-content-n-plot_Read_1_After_filtering.txt:md5,a0502dd4f701c9deb646ffbec80c09de",
"fastqc_adapter_content_plot.txt:md5,bd0fdc9c856c55598976b5a46c23a677",
Expand All @@ -133,10 +133,10 @@
"fastp-seq-content-gc-plot_Read_1_After_filtering.txt:md5,585ec288b2514de54e8fb6251d1e0f98"
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "24.04.4"
"nf-test": "0.9.0",
"nextflow": "24.10.0"
},
"timestamp": "2024-09-19T03:58:28.495279269"
"timestamp": "2024-11-11T13:44:14.815270775"
},
"multiqc": {
"content": [
Expand All @@ -148,4 +148,4 @@
},
"timestamp": "2024-08-30T20:30:51.144222162"
}
}
}
Loading