From 84c42d4df3a057545e63062565820f75dc350f01 Mon Sep 17 00:00:00 2001 From: Christopher Mohr Date: Fri, 16 Feb 2024 08:16:36 +0000 Subject: [PATCH 01/10] fix index ref and fasta init --- workflows/smrnaseq.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf index 21533ec6..2527f98e 100644 --- a/workflows/smrnaseq.nf +++ b/workflows/smrnaseq.nf @@ -153,7 +153,7 @@ workflow SMRNASEQ { ) ch_versions = ch_versions.mix(FASTQ_FASTQC_UMITOOLS_FASTP.out.versions) - ch_fasta = params.fasta ? file(params.fasta): [] + ch_fasta = params.fasta ? Channel.value(file(params.fasta)) : [] ch_reads_for_mirna = FASTQ_FASTQC_UMITOOLS_FASTP.out.reads // even if bowtie index is specified, there still needs to be a fasta. @@ -162,7 +162,7 @@ workflow SMRNASEQ { //Prepare bowtie index, unless specified //This needs to be done here as the index is used by GENOME_QUANT if(params.bowtie_index) { - ch_bowtie_index = Channel.fromPath("${index}**ebwt", checkIfExists: true).ifEmpty { error "Bowtie1 index directory not found: ${index}" } + Channel.fromPath("${params.bowtie_index}**ebwt", checkIfExists: true).ifEmpty{ error "Bowtie1 index directory not found: ${index}" }.set { ch_bowtie_index } } else { INDEX_GENOME ( [ [:], ch_fasta ] ) ch_versions = ch_versions.mix(INDEX_GENOME.out.versions) From 14e08c17ba98a748d95dd056a75ad7b9559d5cac Mon Sep 17 00:00:00 2001 From: Christopher Mohr Date: Fri, 16 Feb 2024 08:35:41 +0000 Subject: [PATCH 02/10] revert fasta channel change --- workflows/smrnaseq.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf index 2527f98e..9b3d66aa 100644 --- a/workflows/smrnaseq.nf +++ b/workflows/smrnaseq.nf @@ -153,7 +153,7 @@ workflow SMRNASEQ { ) ch_versions = ch_versions.mix(FASTQ_FASTQC_UMITOOLS_FASTP.out.versions) - ch_fasta = params.fasta ? Channel.value(file(params.fasta)) : [] + ch_fasta = params.fasta ? file(params.fasta) : [] ch_reads_for_mirna = FASTQ_FASTQC_UMITOOLS_FASTP.out.reads // even if bowtie index is specified, there still needs to be a fasta. From 0bf058afc02ee7cc7bb242616063b01072d28cfe Mon Sep 17 00:00:00 2001 From: Christopher Mohr Date: Fri, 16 Feb 2024 09:13:40 +0000 Subject: [PATCH 03/10] use main smrnaseq test data branch for test --- conf/test_no_genome.config | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/conf/test_no_genome.config b/conf/test_no_genome.config index 485870de..aae8ce91 100644 --- a/conf/test_no_genome.config +++ b/conf/test_no_genome.config @@ -21,9 +21,9 @@ params { // Input data input = 'https://github.com/nf-core/test-datasets/raw/smrnaseq/samplesheet/v2.0/samplesheet.csv' - mature = 'https://github.com/nf-core/test-datasets/raw/smrnaseq-better-input/reference/mature.fa' - hairpin = 'https://github.com/nf-core/test-datasets/raw/smrnaseq-better-input/reference/hairpin.fa' - mirna_gtf = 'https://github.com/nf-core/test-datasets/raw/smrnaseq-better-input/reference/hsa.gff3' + mature = 'https://github.com/nf-core/test-datasets/raw/smrnaseq/reference/mature.fa' + hairpin = 'https://github.com/nf-core/test-datasets/raw/smrnaseq/reference/hairpin.fa' + mirna_gtf = 'https://github.com/nf-core/test-datasets/raw/smrnaseq/reference/hsa.gff3' mirtrace_species = 'hsa' skip_mirdeep = true protocol = 'illumina' From fff13659c2d63d8f272fc75feed771b075d9eff1 Mon Sep 17 00:00:00 2001 From: Christopher Mohr Date: Fri, 16 Feb 2024 09:19:05 +0000 Subject: [PATCH 04/10] add test profile for testing provided bowtie index --- .github/workflows/ci.yml | 1 + conf/test_index.config | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+) create mode 100644 conf/test_index.config diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 903f1b03..55ae6171 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -30,6 +30,7 @@ jobs: - "test" - "test_no_genome" - "test_umi" + - "test_index" steps: - name: Check out pipeline code uses: actions/checkout@v4 diff --git a/conf/test_index.config b/conf/test_index.config new file mode 100644 index 00000000..64ae5766 --- /dev/null +++ b/conf/test_index.config @@ -0,0 +1,35 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/smrnaseq -profile test_index, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'Test index profile' + config_profile_description = 'Minimal test dataset to check pipeline function with bowtie index' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '6.GB' + max_time = '6.h' + + // Input data + + input = 'https://github.com/nf-core/test-datasets/raw/smrnaseq/samplesheet/v2.0/samplesheet.csv' + fasta = 'https://github.com/nf-core/test-datasets/raw/smrnaseq/reference/genome.fa' + bowtie_index = 'https://github.com/nf-core/test-datasets/raw/smrnaseq/reference/' + + mirtrace_species = 'hsa' + protocol = 'illumina' + skip_mirdeep = true + save_merged = false + save_aligned_mirna_quant = false + + cleanup = true //Otherwise tests dont run through properly. +} From c951cc1b46af51deae6e4e26a8a2e9a42e8f020c Mon Sep 17 00:00:00 2001 From: Christopher Mohr Date: Fri, 16 Feb 2024 09:31:48 +0000 Subject: [PATCH 05/10] add new test profile to config --- nextflow.config | 1 + 1 file changed, 1 insertion(+) diff --git a/nextflow.config b/nextflow.config index 8af960f8..0f802b0b 100644 --- a/nextflow.config +++ b/nextflow.config @@ -233,6 +233,7 @@ profiles { test_umi { includeConfig 'conf/test_umi.config' } test_no_genome { includeConfig 'conf/test_no_genome.config' } test_full { includeConfig 'conf/test_full.config' } + test_index { includeConfig 'conf/test_index.config' } } // Set default registry for Apptainer, Docker, Podman and Singularity independent of -profile From 0074162b4ff14c1f3160e871803e922270afc1f1 Mon Sep 17 00:00:00 2001 From: Christopher Mohr Date: Fri, 16 Feb 2024 09:42:56 +0000 Subject: [PATCH 06/10] replace index variable --- workflows/smrnaseq.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf index 9b3d66aa..c509d59c 100644 --- a/workflows/smrnaseq.nf +++ b/workflows/smrnaseq.nf @@ -162,7 +162,7 @@ workflow SMRNASEQ { //Prepare bowtie index, unless specified //This needs to be done here as the index is used by GENOME_QUANT if(params.bowtie_index) { - Channel.fromPath("${params.bowtie_index}**ebwt", checkIfExists: true).ifEmpty{ error "Bowtie1 index directory not found: ${index}" }.set { ch_bowtie_index } + Channel.fromPath("${params.bowtie_index}**ebwt", checkIfExists: true).ifEmpty{ error "Bowtie1 index directory not found: ${params.bowtie_index}" }.set { ch_bowtie_index } } else { INDEX_GENOME ( [ [:], ch_fasta ] ) ch_versions = ch_versions.mix(INDEX_GENOME.out.versions) From 134d266f8848248ca21d53997defc8fa035874bb Mon Sep 17 00:00:00 2001 From: Christopher Mohr Date: Mon, 19 Feb 2024 15:41:25 +0000 Subject: [PATCH 07/10] initialize fasta channel depending on bowtie_index --- workflows/smrnaseq.nf | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf index c509d59c..d86b0280 100644 --- a/workflows/smrnaseq.nf +++ b/workflows/smrnaseq.nf @@ -162,7 +162,8 @@ workflow SMRNASEQ { //Prepare bowtie index, unless specified //This needs to be done here as the index is used by GENOME_QUANT if(params.bowtie_index) { - Channel.fromPath("${params.bowtie_index}**ebwt", checkIfExists: true).ifEmpty{ error "Bowtie1 index directory not found: ${params.bowtie_index}" }.set { ch_bowtie_index } + Channel.fromPath("${params.bowtie_index}**ebwt", checkIfExists: true).ifEmpty{ error "Bowtie1 index directory not found: ${params.bowtie_index}" }.filter { it != null }.set { ch_bowtie_index } + ch_fasta = Channel.fromPath(params.fasta) } else { INDEX_GENOME ( [ [:], ch_fasta ] ) ch_versions = ch_versions.mix(INDEX_GENOME.out.versions) From b27b98b6207bb33f48f0fe31826edfeb967b9fed Mon Sep 17 00:00:00 2001 From: Christopher Mohr Date: Wed, 21 Feb 2024 11:04:43 +0000 Subject: [PATCH 08/10] add functionality to process archived bowtie index --- modules.json | 5 +++ modules/nf-core/untarfiles/environment.yml | 9 ++++ modules/nf-core/untarfiles/main.nf | 52 ++++++++++++++++++++++ modules/nf-core/untarfiles/meta.yml | 48 ++++++++++++++++++++ workflows/smrnaseq.nf | 9 +++- 5 files changed, 122 insertions(+), 1 deletion(-) create mode 100644 modules/nf-core/untarfiles/environment.yml create mode 100644 modules/nf-core/untarfiles/main.nf create mode 100644 modules/nf-core/untarfiles/meta.yml diff --git a/modules.json b/modules.json index b7261f61..bdad7a91 100644 --- a/modules.json +++ b/modules.json @@ -69,6 +69,11 @@ "branch": "master", "git_sha": "9e56d7a647fbf6f7e45ef123bc916ad66b6f7c9d", "installed_by": ["fastq_fastqc_umitools_fastp", "modules"] + }, + "untarfiles": { + "branch": "master", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "installed_by": ["modules"] } } }, diff --git a/modules/nf-core/untarfiles/environment.yml b/modules/nf-core/untarfiles/environment.yml new file mode 100644 index 00000000..e479f80d --- /dev/null +++ b/modules/nf-core/untarfiles/environment.yml @@ -0,0 +1,9 @@ +name: untarfiles +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - conda-forge::sed=4.7 + - bioconda::grep=3.4 + - conda-forge::tar=1.34 diff --git a/modules/nf-core/untarfiles/main.nf b/modules/nf-core/untarfiles/main.nf new file mode 100644 index 00000000..de27e67c --- /dev/null +++ b/modules/nf-core/untarfiles/main.nf @@ -0,0 +1,52 @@ +process UNTARFILES { + tag "$archive" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'nf-core/ubuntu:20.04' }" + + input: + tuple val(meta), path(archive) + + output: + tuple val(meta), path("${prefix}/**") , emit: files + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + prefix = task.ext.prefix ?: ( meta.id ? "${meta.id}" : archive.baseName.toString().replaceFirst(/\.tar$/, "")) + + """ + mkdir $prefix + + tar \\ + -C $prefix \\ + -xavf \\ + $args \\ + $archive \\ + $args2 + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + untar: \$(echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//') + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + """ + mkdir $prefix + touch ${prefix}/file.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + untar: \$(echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/untarfiles/meta.yml b/modules/nf-core/untarfiles/meta.yml new file mode 100644 index 00000000..38108826 --- /dev/null +++ b/modules/nf-core/untarfiles/meta.yml @@ -0,0 +1,48 @@ +name: untarfiles +description: Extract files. +keywords: + - untar + - uncompress + - files +tools: + - untar: + description: | + Extract tar.gz files. + documentation: https://www.gnu.org/software/tar/manual/ + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - archive: + type: file + description: File to be untar + pattern: "*.{tar}.{gz}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - files: + type: string + description: A list containing references to individual archive files + pattern: "*/**" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" + - "@matthdsm" + - "@jfy133" + - "@pinin4fjords" +maintainers: + - "@joseespinosa" + - "@drpatelh" + - "@matthdsm" + - "@jfy133" + - "@pinin4fjords" diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf index d86b0280..8c998cdb 100644 --- a/workflows/smrnaseq.nf +++ b/workflows/smrnaseq.nf @@ -67,6 +67,7 @@ if (!params.mirgenedb) { include { INPUT_CHECK } from '../subworkflows/local/input_check' include { FASTQ_FASTQC_UMITOOLS_FASTP } from '../subworkflows/nf-core/fastq_fastqc_umitools_fastp' include { FASTP as FASTP_LENGTH_FILTER } from '../modules/nf-core/fastp' +include { UNTARFILES as UNTAR_BOWTIE_INDEX } from '../modules/nf-core/untarfiles' include { CONTAMINANT_FILTER } from '../subworkflows/local/contaminant_filter' include { MIRNA_QUANT } from '../subworkflows/local/mirna_quant' include { GENOME_QUANT } from '../subworkflows/local/genome_quant' @@ -162,8 +163,14 @@ workflow SMRNASEQ { //Prepare bowtie index, unless specified //This needs to be done here as the index is used by GENOME_QUANT if(params.bowtie_index) { - Channel.fromPath("${params.bowtie_index}**ebwt", checkIfExists: true).ifEmpty{ error "Bowtie1 index directory not found: ${params.bowtie_index}" }.filter { it != null }.set { ch_bowtie_index } ch_fasta = Channel.fromPath(params.fasta) + if (params.bowtie_index.endsWith(".tar.gz")) { + UNTAR_BOWTIE_INDEX ( [ [], params.bowtie_index ]).files.map { it[1] }.set {ch_bowtie_index} + ch_versions = ch_versions.mix(UNTAR_BOWTIE_INDEX.out.versions) + } else { + Channel.fromPath("${params.bowtie_index}**ebwt", checkIfExists: true).ifEmpty{ error "Bowtie1 index directory not found: ${params.bowtie_index}" }.filter { it != null }.set { ch_bowtie_index } + }Channel + .fromPath( } else { INDEX_GENOME ( [ [:], ch_fasta ] ) ch_versions = ch_versions.mix(INDEX_GENOME.out.versions) From 6e5888cb256ba597f1d7ce2594799e7cdbee6594 Mon Sep 17 00:00:00 2001 From: Christopher Mohr Date: Wed, 21 Feb 2024 11:06:07 +0000 Subject: [PATCH 09/10] adjust test config with bowtie index --- conf/test_index.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/test_index.config b/conf/test_index.config index 64ae5766..bb9f4707 100644 --- a/conf/test_index.config +++ b/conf/test_index.config @@ -23,7 +23,7 @@ params { input = 'https://github.com/nf-core/test-datasets/raw/smrnaseq/samplesheet/v2.0/samplesheet.csv' fasta = 'https://github.com/nf-core/test-datasets/raw/smrnaseq/reference/genome.fa' - bowtie_index = 'https://github.com/nf-core/test-datasets/raw/smrnaseq/reference/' + bowtie_index = 'https://github.com/nf-core/test-datasets/raw/smrnaseq/reference/bowtie_index.tar.gz' mirtrace_species = 'hsa' protocol = 'illumina' From bf78343b0753f0a5dde1171174e05f01331d457a Mon Sep 17 00:00:00 2001 From: Christopher Mohr Date: Wed, 21 Feb 2024 11:16:13 +0000 Subject: [PATCH 10/10] fix syntax error --- workflows/smrnaseq.nf | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf index 8c998cdb..39e402f8 100644 --- a/workflows/smrnaseq.nf +++ b/workflows/smrnaseq.nf @@ -169,8 +169,7 @@ workflow SMRNASEQ { ch_versions = ch_versions.mix(UNTAR_BOWTIE_INDEX.out.versions) } else { Channel.fromPath("${params.bowtie_index}**ebwt", checkIfExists: true).ifEmpty{ error "Bowtie1 index directory not found: ${params.bowtie_index}" }.filter { it != null }.set { ch_bowtie_index } - }Channel - .fromPath( + } } else { INDEX_GENOME ( [ [:], ch_fasta ] ) ch_versions = ch_versions.mix(INDEX_GENOME.out.versions)