Skip to content

Commit

Permalink
Merge pull request #388 from nschcolnicov/fix_igenome
Browse files Browse the repository at this point in the history
Updating workflow files to work with Igenome
  • Loading branch information
nschcolnicov authored Aug 28, 2024
2 parents 17c027e + 7d534cd commit 228212c
Show file tree
Hide file tree
Showing 8 changed files with 46 additions and 33 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- [[#383]](https://github.com/nf-core/smrnaseq/pull/383) - Fix [parameter `--skip_fastp` throws an error](https://github.com/nf-core/smrnaseq/issues/263) - Fix parameter --skip_fastp.
- [[#384]](https://github.com/nf-core/smrnaseq/pull/384) - Fix [filter status bug fix](https://github.com/nf-core/smrnaseq/issues/360) - Fix filter stats module and add filter contaminants test profile.
- [[#387]](https://github.com/nf-core/smrnaseq/pull/387) - Add nf-test to local module `blat_mirna` and fixes [contaminant filter failure because the Docker image for BLAT cannot be pulled](https://github.com/nf-core/smrnaseq/issues/354). Adds a small test profile to test contaminant filter results.
- [[#388]](https://github.com/nf-core/smrnaseq/pull/388) - Fix [igenomes fix](https://github.com/nf-core/smrnaseq/issues/360) - Fix workflow scripts so that they can use igenome parameters.
- [[#391]](https://github.com/nf-core/smrnaseq/pull/391) - Change `.bai` index for `.csi` index in `samtools_index` to fix [error because of large chromosomes](https://github.com/nf-core/smrnaseq/issues/132).

## v2.3.1 - 2024-04-18 - Gray Zinc Dalmation Patch
Expand Down
3 changes: 3 additions & 0 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,9 @@ workflow {
NFCORE_SMRNASEQ (
Channel.of(file(params.input, checkIfExists: true)),
PIPELINE_INITIALISATION.out.samplesheet,
params.fasta,
params.mirtrace_species,
params.bowtie_index,
ch_versions
)

Expand Down
3 changes: 2 additions & 1 deletion modules/local/mirtop_quant.nf
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ process MIRTOP_QUANT {
path ("bams/*")
path hairpin
path gtf
val mirtrace_species

output:
path "mirtop/mirtop.gff" , emit: mirtop_gff
Expand All @@ -22,7 +23,7 @@ process MIRTOP_QUANT {
task.ext.when == null || task.ext.when

script:
def filter_species = params.mirgenedb ? params.mirgenedb_species : params.mirtrace_species
def filter_species = params.mirgenedb ? params.mirgenedb_species : mirtrace_species
"""
#Cleanup the GTF if mirbase html form is broken
GTF="$gtf"
Expand Down
3 changes: 2 additions & 1 deletion modules/local/mirtrace.nf
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ process MIRTRACE_RUN {
input:
tuple val(adapter), val(ids), path(reads)
path(mirtrace_config)
val mirtrace_species

output:
path "mirtrace/*" , emit: mirtrace
Expand All @@ -30,7 +31,7 @@ process MIRTRACE_RUN {
export mirtracejar=\$(dirname \$(which mirtrace))
java $java_mem -jar \$mirtracejar/mirtrace.jar --mirtrace-wrapper-name mirtrace qc \\
--species $params.mirtrace_species \\
--species $mirtrace_species \\
$protocol \\
--config $mirtrace_config \\
--write-fasta \\
Expand Down
2 changes: 1 addition & 1 deletion modules/local/parse_fasta_mirna.nf
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ process PARSE_FASTA_MIRNA {

input:
tuple val(meta2), path(fasta)
val filter_species

output:
tuple val(meta2), path('*_igenome.fa'), emit: parsed_fasta
Expand All @@ -17,7 +18,6 @@ process PARSE_FASTA_MIRNA {
task.ext.when == null || task.ext.when

script:
def filter_species = params.mirgenedb ? params.mirgenedb_species : params.mirtrace_species
"""
# Uncompress FASTA reference files if necessary
FASTA="$fasta"
Expand Down
19 changes: 10 additions & 9 deletions subworkflows/local/mirna_quant.nf
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,16 @@ include { EDGER_QC } from '../../modules/local/edger_qc/edger_qc.nf'

workflow MIRNA_QUANT {
take:
mature // channel: [ val(meta), fasta file]
hairpin // channel: [ val(meta), fasta file]
gtf // channle: GTF file
reads // channel: [ val(meta), [ reads ] ]
mature // channel: [ val(meta), fasta file]
hairpin // channel: [ val(meta), fasta file]
gtf // channel: path GTF file
reads // channel: [ val(meta), [ reads ] ]
mirtrace_species // val: params.mirtrace_species

main:
ch_versions = Channel.empty()

PARSE_MATURE ( mature ).parsed_fasta.set { mirna_parsed }
parse_species_input = params.mirgenedb ? Channel.value(params.mirgenedb_species) : Channel.value(mirtrace_species)
PARSE_MATURE ( mature, parse_species_input ).parsed_fasta.set { mirna_parsed }
ch_versions = ch_versions.mix(PARSE_MATURE.out.versions)

FORMAT_MATURE ( mirna_parsed )
Expand All @@ -58,7 +59,7 @@ workflow MIRNA_QUANT {
BAM_STATS_MATURE ( BOWTIE_MAP_MATURE.out.bam, FORMAT_MATURE.out.formatted_fasta )
ch_versions = ch_versions.mix(BAM_STATS_MATURE.out.versions)

PARSE_HAIRPIN ( hairpin ).parsed_fasta.set { hairpin_parsed }
PARSE_HAIRPIN ( hairpin, parse_species_input ).parsed_fasta.set { hairpin_parsed }
ch_versions = ch_versions.mix(PARSE_HAIRPIN.out.versions)

FORMAT_HAIRPIN ( hairpin_parsed )
Expand Down Expand Up @@ -95,8 +96,8 @@ workflow MIRNA_QUANT {
ch_versions = ch_versions.mix(BOWTIE_MAP_SEQCLUSTER.out.versions)

ch_mirtop_logs = Channel.empty()
if (params.mirtrace_species){
MIRTOP_QUANT ( BOWTIE_MAP_SEQCLUSTER.out.bam.collect{it[1]}, FORMAT_HAIRPIN.out.formatted_fasta.collect{it[1]}, gtf )
if (mirtrace_species){
MIRTOP_QUANT ( BOWTIE_MAP_SEQCLUSTER.out.bam.collect{it[1]}, FORMAT_HAIRPIN.out.formatted_fasta.collect{it[1]}, gtf, Channel.value(mirtrace_species) )
ch_mirtop_logs = MIRTOP_QUANT.out.logs
ch_versions = ch_versions.mix(MIRTOP_QUANT.out.versions)

Expand Down
6 changes: 4 additions & 2 deletions subworkflows/local/mirtrace.nf
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ include { MIRTRACE_RUN } from '../../modules/local/mirtrace'

workflow MIRTRACE {
take:
reads // channel: [ val(adapterseq), [ val(ids) ], [ path(reads) ] ]
reads // channel: [ val(adapterseq), [ val(ids) ], [ path(reads) ] ]
mirtrace_species // value : params.mirtrace_species

main:

Expand All @@ -20,7 +21,8 @@ workflow MIRTRACE {

MIRTRACE_RUN (
reads,
ch_mirtrace_config
ch_mirtrace_config,
mirtrace_species
)

emit:
Expand Down
42 changes: 23 additions & 19 deletions workflows/smrnaseq.nf
Original file line number Diff line number Diff line change
Expand Up @@ -41,20 +41,23 @@ ch_fastp_adapters = Channel.fromPath(params.fastp_known_mirn
workflow NFCORE_SMRNASEQ {

take:
ch_input // channel: samplesheet file as specified to --input
ch_samplesheet // channel: sample fastqs parsed from --input
ch_versions // channel: [ path(versions.yml) ]
ch_input // channel: samplesheet file as specified to --input
ch_samplesheet // channel: sample fastqs parsed from --input
val_fasta // params.fasta
val_mirtrace_species // params.mirtrace_species
bowtie_index // params.bowtie_index
ch_versions // channel: [ path(versions.yml) ]

main:
//Config checks
// Check optional parameters
if (!params.mirgenedb && !params.mirtrace_species) {
if (!params.mirgenedb && !val_mirtrace_species) {
exit 1, "Reference species for miRTrace is not defined via the --mirtrace_species parameter."
}

// Genome options
def mirna_gtf_from_species = params.mirtrace_species ? (params.mirtrace_species == 'hsa' ? "https://github.com/nf-core/test-datasets/raw/smrnaseq/miRBase/hsa.gff3" : "https://mirbase.org/download/CURRENT/genomes/${params.mirtrace_species}.gff3") : false
def mirna_gtf = params.mirna_gtf ?: mirna_gtf_from_species
mirna_gtf_from_species = val_mirtrace_species ? (val_mirtrace_species == 'hsa' ? "https://github.com/nf-core/test-datasets/raw/smrnaseq/miRBase/hsa.gff3" : "https://mirbase.org/download/CURRENT/genomes/${val_mirtrace_species}.gff3") : false
mirna_gtf = params.mirna_gtf ?: mirna_gtf_from_species

if (!params.mirgenedb) {
if (params.mature) { reference_mature = file(params.mature, checkIfExists: true) } else { exit 1, "Mature miRNA fasta file not found: ${params.mature}" }
Expand Down Expand Up @@ -108,23 +111,23 @@ workflow NFCORE_SMRNASEQ {
)
ch_versions = ch_versions.mix(FASTQ_FASTQC_UMITOOLS_FASTP.out.versions)

ch_fasta = params.fasta ? file(params.fasta): []
ch_fasta = val_fasta ? file(val_fasta): []
ch_reads_for_mirna = FASTQ_FASTQC_UMITOOLS_FASTP.out.reads

// even if bowtie index is specified, there still needs to be a fasta.
// without fasta, no genome analysis.
if(params.fasta) {
if(val_fasta) {
//Prepare bowtie index, unless specified
//This needs to be done here as the index is used by GENOME_QUANT
if(params.bowtie_index) {
ch_fasta = Channel.fromPath(params.fasta)
ch_fasta = Channel.fromPath(val_fasta)
if (params.bowtie_index.endsWith(".tar.gz")) {
UNTAR_BOWTIE_INDEX ( [ [], params.bowtie_index ]).files.map { it[1] }.set {ch_bowtie_index}
ch_versions = ch_versions.mix(UNTAR_BOWTIE_INDEX.out.versions)
} else {
Channel.fromPath("${params.bowtie_index}**ebwt", checkIfExists: true).ifEmpty{ error "Bowtie1 index directory not found: ${params.bowtie_index}" }.filter { it != null }.set { ch_bowtie_index }
}
} else {
} else {
INDEX_GENOME ( [ [:], ch_fasta ] )
ch_versions = ch_versions.mix(INDEX_GENOME.out.versions)
ch_bowtie_index = INDEX_GENOME.out.index
Expand Down Expand Up @@ -181,8 +184,8 @@ workflow NFCORE_SMRNASEQ {
//
// SUBWORKFLOW: MIRTRACE
//
if (params.mirtrace_species) {
MIRTRACE(ch_mirtrace_inputs)
if (val_mirtrace_species) {
MIRTRACE(ch_mirtrace_inputs, val_mirtrace_species)
ch_versions = ch_versions.mix(MIRTRACE.out.versions)
} else {
log.warn "The parameter --mirtrace_species is absent. MIRTRACE quantification skipped."
Expand All @@ -209,20 +212,21 @@ workflow NFCORE_SMRNASEQ {
ch_reads_for_mirna = CONTAMINANT_FILTER.out.filtered_reads

}

//MIRNA_QUANT process should still run even if mirtrace_species is null when mirgendb is true
MIRNA_QUANT (
[ [:], reference_mature],
[ [:], reference_hairpin],
mirna_gtf,
ch_reads_for_mirna
[ [:], reference_mature],
[ [:], reference_hairpin],
mirna_gtf,
ch_reads_for_mirna,
val_mirtrace_species
)
ch_versions = ch_versions.mix(MIRNA_QUANT.out.versions)

//
// GENOME
//
genome_stats = Channel.empty()
if (params.fasta){
if (val_fasta){
GENOME_QUANT ( ch_bowtie_index, ch_fasta, MIRNA_QUANT.out.unmapped )
genome_stats = GENOME_QUANT.out.stats
ch_versions = ch_versions.mix(GENOME_QUANT.out.versions)
Expand Down Expand Up @@ -306,7 +310,7 @@ workflow NFCORE_SMRNASEQ {
ch_multiqc_files = ch_multiqc_files.mix(MIRNA_QUANT.out.mature_stats.collect({it[1]}).ifEmpty([]))
ch_multiqc_files = ch_multiqc_files.mix(MIRNA_QUANT.out.hairpin_stats.collect({it[1]}).ifEmpty([]))
ch_multiqc_files = ch_multiqc_files.mix(MIRNA_QUANT.out.mirtop_logs.collect().ifEmpty([]))
if (params.mirtrace_species) {
if (val_mirtrace_species) {
ch_multiqc_files = ch_multiqc_files.mix(MIRTRACE.out.results.collect().ifEmpty([]))
}

Expand Down

0 comments on commit 228212c

Please sign in to comment.