diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 903f1b03..55ae6171 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -30,6 +30,7 @@ jobs: - "test" - "test_no_genome" - "test_umi" + - "test_index" steps: - name: Check out pipeline code uses: actions/checkout@v4 diff --git a/conf/test_index.config b/conf/test_index.config new file mode 100644 index 00000000..bb9f4707 --- /dev/null +++ b/conf/test_index.config @@ -0,0 +1,35 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/smrnaseq -profile test_index, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'Test index profile' + config_profile_description = 'Minimal test dataset to check pipeline function with bowtie index' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '6.GB' + max_time = '6.h' + + // Input data + + input = 'https://github.com/nf-core/test-datasets/raw/smrnaseq/samplesheet/v2.0/samplesheet.csv' + fasta = 'https://github.com/nf-core/test-datasets/raw/smrnaseq/reference/genome.fa' + bowtie_index = 'https://github.com/nf-core/test-datasets/raw/smrnaseq/reference/bowtie_index.tar.gz' + + mirtrace_species = 'hsa' + protocol = 'illumina' + skip_mirdeep = true + save_merged = false + save_aligned_mirna_quant = false + + cleanup = true //Otherwise tests dont run through properly. +} diff --git a/conf/test_no_genome.config b/conf/test_no_genome.config index 485870de..aae8ce91 100644 --- a/conf/test_no_genome.config +++ b/conf/test_no_genome.config @@ -21,9 +21,9 @@ params { // Input data input = 'https://github.com/nf-core/test-datasets/raw/smrnaseq/samplesheet/v2.0/samplesheet.csv' - mature = 'https://github.com/nf-core/test-datasets/raw/smrnaseq-better-input/reference/mature.fa' - hairpin = 'https://github.com/nf-core/test-datasets/raw/smrnaseq-better-input/reference/hairpin.fa' - mirna_gtf = 'https://github.com/nf-core/test-datasets/raw/smrnaseq-better-input/reference/hsa.gff3' + mature = 'https://github.com/nf-core/test-datasets/raw/smrnaseq/reference/mature.fa' + hairpin = 'https://github.com/nf-core/test-datasets/raw/smrnaseq/reference/hairpin.fa' + mirna_gtf = 'https://github.com/nf-core/test-datasets/raw/smrnaseq/reference/hsa.gff3' mirtrace_species = 'hsa' skip_mirdeep = true protocol = 'illumina' diff --git a/modules.json b/modules.json index b7261f61..bdad7a91 100644 --- a/modules.json +++ b/modules.json @@ -69,6 +69,11 @@ "branch": "master", "git_sha": "9e56d7a647fbf6f7e45ef123bc916ad66b6f7c9d", "installed_by": ["fastq_fastqc_umitools_fastp", "modules"] + }, + "untarfiles": { + "branch": "master", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "installed_by": ["modules"] } } }, diff --git a/modules/nf-core/untarfiles/environment.yml b/modules/nf-core/untarfiles/environment.yml new file mode 100644 index 00000000..e479f80d --- /dev/null +++ b/modules/nf-core/untarfiles/environment.yml @@ -0,0 +1,9 @@ +name: untarfiles +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - conda-forge::sed=4.7 + - bioconda::grep=3.4 + - conda-forge::tar=1.34 diff --git a/modules/nf-core/untarfiles/main.nf b/modules/nf-core/untarfiles/main.nf new file mode 100644 index 00000000..de27e67c --- /dev/null +++ b/modules/nf-core/untarfiles/main.nf @@ -0,0 +1,52 @@ +process UNTARFILES { + tag "$archive" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'nf-core/ubuntu:20.04' }" + + input: + tuple val(meta), path(archive) + + output: + tuple val(meta), path("${prefix}/**") , emit: files + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + prefix = task.ext.prefix ?: ( meta.id ? "${meta.id}" : archive.baseName.toString().replaceFirst(/\.tar$/, "")) + + """ + mkdir $prefix + + tar \\ + -C $prefix \\ + -xavf \\ + $args \\ + $archive \\ + $args2 + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + untar: \$(echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//') + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + """ + mkdir $prefix + touch ${prefix}/file.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + untar: \$(echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/untarfiles/meta.yml b/modules/nf-core/untarfiles/meta.yml new file mode 100644 index 00000000..38108826 --- /dev/null +++ b/modules/nf-core/untarfiles/meta.yml @@ -0,0 +1,48 @@ +name: untarfiles +description: Extract files. +keywords: + - untar + - uncompress + - files +tools: + - untar: + description: | + Extract tar.gz files. + documentation: https://www.gnu.org/software/tar/manual/ + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - archive: + type: file + description: File to be untar + pattern: "*.{tar}.{gz}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - files: + type: string + description: A list containing references to individual archive files + pattern: "*/**" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" + - "@matthdsm" + - "@jfy133" + - "@pinin4fjords" +maintainers: + - "@joseespinosa" + - "@drpatelh" + - "@matthdsm" + - "@jfy133" + - "@pinin4fjords" diff --git a/nextflow.config b/nextflow.config index 8af960f8..0f802b0b 100644 --- a/nextflow.config +++ b/nextflow.config @@ -233,6 +233,7 @@ profiles { test_umi { includeConfig 'conf/test_umi.config' } test_no_genome { includeConfig 'conf/test_no_genome.config' } test_full { includeConfig 'conf/test_full.config' } + test_index { includeConfig 'conf/test_index.config' } } // Set default registry for Apptainer, Docker, Podman and Singularity independent of -profile diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf index 21533ec6..39e402f8 100644 --- a/workflows/smrnaseq.nf +++ b/workflows/smrnaseq.nf @@ -67,6 +67,7 @@ if (!params.mirgenedb) { include { INPUT_CHECK } from '../subworkflows/local/input_check' include { FASTQ_FASTQC_UMITOOLS_FASTP } from '../subworkflows/nf-core/fastq_fastqc_umitools_fastp' include { FASTP as FASTP_LENGTH_FILTER } from '../modules/nf-core/fastp' +include { UNTARFILES as UNTAR_BOWTIE_INDEX } from '../modules/nf-core/untarfiles' include { CONTAMINANT_FILTER } from '../subworkflows/local/contaminant_filter' include { MIRNA_QUANT } from '../subworkflows/local/mirna_quant' include { GENOME_QUANT } from '../subworkflows/local/genome_quant' @@ -153,7 +154,7 @@ workflow SMRNASEQ { ) ch_versions = ch_versions.mix(FASTQ_FASTQC_UMITOOLS_FASTP.out.versions) - ch_fasta = params.fasta ? file(params.fasta): [] + ch_fasta = params.fasta ? file(params.fasta) : [] ch_reads_for_mirna = FASTQ_FASTQC_UMITOOLS_FASTP.out.reads // even if bowtie index is specified, there still needs to be a fasta. @@ -162,7 +163,13 @@ workflow SMRNASEQ { //Prepare bowtie index, unless specified //This needs to be done here as the index is used by GENOME_QUANT if(params.bowtie_index) { - ch_bowtie_index = Channel.fromPath("${index}**ebwt", checkIfExists: true).ifEmpty { error "Bowtie1 index directory not found: ${index}" } + ch_fasta = Channel.fromPath(params.fasta) + if (params.bowtie_index.endsWith(".tar.gz")) { + UNTAR_BOWTIE_INDEX ( [ [], params.bowtie_index ]).files.map { it[1] }.set {ch_bowtie_index} + ch_versions = ch_versions.mix(UNTAR_BOWTIE_INDEX.out.versions) + } else { + Channel.fromPath("${params.bowtie_index}**ebwt", checkIfExists: true).ifEmpty{ error "Bowtie1 index directory not found: ${params.bowtie_index}" }.filter { it != null }.set { ch_bowtie_index } + } } else { INDEX_GENOME ( [ [:], ch_fasta ] ) ch_versions = ch_versions.mix(INDEX_GENOME.out.versions)