diff --git a/CHANGELOG.md b/CHANGELOG.md index b60a43db..0d4adda9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ ## CHAMPAGNE development version - The CHAMPAGNE nextflow workflow now has a version entry in `nextflow.config`, in compliance with nf-core. (#213, @kelly-sovacool) +- Pool input (control) reads of the same sample name by default. Any inputs that should not be pooled must have different sample names in the samplesheet. (#214, @kelly-sovacool) ## CHAMPAGNE 0.4.0 diff --git a/assets/samplesheet_human.csv b/assets/samplesheet_human.csv index 28ce2eca..96f00fc2 100644 --- a/assets/samplesheet_human.csv +++ b/assets/samplesheet_human.csv @@ -1,13 +1,13 @@ fastq_1,fastq_2,sample,rep,antibody,control -/data/CCBR_Pipeliner/testdata/CHAMPAGNE/human/SRR5129678.fastq.gz,,A549_CTCF,1,CTCF,A549_CTCF_INPUT_1 -/data/CCBR_Pipeliner/testdata/CHAMPAGNE/human/SRR5129676.fastq.gz,,A549_CTCF,2,CTCF,A549_CTCF_INPUT_2 -/data/CCBR_Pipeliner/testdata/CHAMPAGNE/human/SRR5129677.fastq.gz,,A549_CTCF,3,CTCF,A549_CTCF_INPUT_3 -/data/CCBR_Pipeliner/testdata/CHAMPAGNE/human/SRR5129560.fastq.gz,,A549_CTCF_INPUT_1,,, -/data/CCBR_Pipeliner/testdata/CHAMPAGNE/human/SRR5129561.fastq.gz,,A549_CTCF_INPUT_2,,, -/data/CCBR_Pipeliner/testdata/CHAMPAGNE/human/SRR5129562.fastq.gz,,A549_CTCF_INPUT_3,,, -/data/CCBR_Pipeliner/testdata/CHAMPAGNE/human/SRR14636612.fastq.gz,,A549_JUN,1,JUN,A549_JUN_INPUT_1 -/data/CCBR_Pipeliner/testdata/CHAMPAGNE/human/SRR14636613.fastq.gz,,A549_JUN,2,JUN,A549_JUN_INPUT_2 -/data/CCBR_Pipeliner/testdata/CHAMPAGNE/human/SRR14636614.fastq.gz,,A549_JUN,3,JUN,A549_JUN_INPUT_2 -/data/CCBR_Pipeliner/testdata/CHAMPAGNE/human/SRR14638304.fastq.gz,,A549_JUN_INPUT_1,,, -/data/CCBR_Pipeliner/testdata/CHAMPAGNE/human/SRR14638305.fastq.gz,,A549_JUN_INPUT_2,,, -/data/CCBR_Pipeliner/testdata/CHAMPAGNE/human/SRR14638306.fastq.gz,,A549_JUN_INPUT_3,,, +/data/CCBR_Pipeliner/testdata/CHAMPAGNE/human/SRR5129678.fastq.gz,,A549_CTCF,1,CTCF,A549_CTCF_INPUT +/data/CCBR_Pipeliner/testdata/CHAMPAGNE/human/SRR5129676.fastq.gz,,A549_CTCF,2,CTCF,A549_CTCF_INPUT +/data/CCBR_Pipeliner/testdata/CHAMPAGNE/human/SRR5129677.fastq.gz,,A549_CTCF,3,CTCF,A549_CTCF_INPUT +/data/CCBR_Pipeliner/testdata/CHAMPAGNE/human/SRR5129560.fastq.gz,,A549_CTCF_INPUT,1,, +/data/CCBR_Pipeliner/testdata/CHAMPAGNE/human/SRR5129561.fastq.gz,,A549_CTCF_INPUT,2,, +/data/CCBR_Pipeliner/testdata/CHAMPAGNE/human/SRR5129562.fastq.gz,,A549_CTCF_INPUT,3,, +/data/CCBR_Pipeliner/testdata/CHAMPAGNE/human/SRR14636612.fastq.gz,,A549_JUN,1,JUN,A549_JUN_INPUT +/data/CCBR_Pipeliner/testdata/CHAMPAGNE/human/SRR14636613.fastq.gz,,A549_JUN,2,JUN,A549_JUN_INPUT +/data/CCBR_Pipeliner/testdata/CHAMPAGNE/human/SRR14636614.fastq.gz,,A549_JUN,3,JUN,A549_JUN_INPUT +/data/CCBR_Pipeliner/testdata/CHAMPAGNE/human/SRR14638304.fastq.gz,,A549_JUN_INPUT,1,, +/data/CCBR_Pipeliner/testdata/CHAMPAGNE/human/SRR14638305.fastq.gz,,A549_JUN_INPUT,2,, +/data/CCBR_Pipeliner/testdata/CHAMPAGNE/human/SRR14638306.fastq.gz,,A549_JUN_INPUT,3,, diff --git a/assets/samplesheet_test.csv b/assets/samplesheet_test.csv index bf590be8..4d28e030 100644 --- a/assets/samplesheet_test.csv +++ b/assets/samplesheet_test.csv @@ -1,7 +1,7 @@ sample,rep,fastq_1,fastq_2,antibody,control -SPT5_T0,1,https://raw.githubusercontent.com/nf-core/test-datasets/atacseq/testdata/SRR1822153_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/atacseq/testdata/SRR1822153_2.fastq.gz,SPT5,SPT5_INPUT_1 -SPT5_T0,2,https://raw.githubusercontent.com/nf-core/test-datasets/atacseq/testdata/SRR1822154_1.fastq.gz,,SPT5,SPT5_INPUT_2 -SPT5_T15,1,https://raw.githubusercontent.com/nf-core/test-datasets/atacseq/testdata/SRR1822157_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/atacseq/testdata/SRR1822157_2.fastq.gz,SPT5,SPT5_INPUT_1 -SPT5_T15,2,https://raw.githubusercontent.com/nf-core/test-datasets/atacseq/testdata/SRR1822158_1.fastq.gz,,SPT5,SPT5_INPUT_2 -SPT5_INPUT_1,,https://raw.githubusercontent.com/nf-core/test-datasets/chipseq/testdata/SRR5204809_Spt5-ChIP_Input1_SacCer_ChIP-Seq_ss100k_R1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/chipseq/testdata/SRR5204809_Spt5-ChIP_Input1_SacCer_ChIP-Seq_ss100k_R2.fastq.gz,, -SPT5_INPUT_2,,https://raw.githubusercontent.com/nf-core/test-datasets/chipseq/testdata/SRR5204810_Spt5-ChIP_Input2_SacCer_ChIP-Seq_ss100k_R1.fastq.gz,,, +SPT5_T0,1,https://raw.githubusercontent.com/nf-core/test-datasets/atacseq/testdata/SRR1822153_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/atacseq/testdata/SRR1822153_2.fastq.gz,SPT5,SPT5_INPUT +SPT5_T0,2,https://raw.githubusercontent.com/nf-core/test-datasets/atacseq/testdata/SRR1822154_1.fastq.gz,,SPT5,SPT5_INPUT +SPT5_T15,1,https://raw.githubusercontent.com/nf-core/test-datasets/atacseq/testdata/SRR1822157_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/atacseq/testdata/SRR1822157_2.fastq.gz,SPT5,SPT5_INPUT +SPT5_T15,2,https://raw.githubusercontent.com/nf-core/test-datasets/atacseq/testdata/SRR1822158_1.fastq.gz,,SPT5,SPT5_INPUT +SPT5_INPUT,1,https://raw.githubusercontent.com/nf-core/test-datasets/chipseq/testdata/SRR5204809_Spt5-ChIP_Input1_SacCer_ChIP-Seq_ss100k_R1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/chipseq/testdata/SRR5204809_Spt5-ChIP_Input1_SacCer_ChIP-Seq_ss100k_R2.fastq.gz,, +SPT5_INPUT,2,https://raw.githubusercontent.com/nf-core/test-datasets/chipseq/testdata/SRR5204810_Spt5-ChIP_Input2_SacCer_ChIP-Seq_ss100k_R1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/chipseq/testdata/SRR5204810_Spt5-ChIP_Input2_SacCer_ChIP-Seq_ss100k_R2.fastq.gz,, diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py index f1b9edcd..628643cd 100755 --- a/bin/check_samplesheet.py +++ b/bin/check_samplesheet.py @@ -1,9 +1,9 @@ #!/usr/bin/env python3 """ -source: https://github.com/nf-core/chipseq/blob/51eba00b32885c4d0bec60db3cb0a45eb61e34c5/bin/check_samplesheet.py +adapted from: https://github.com/nf-core/chipseq/blob/51eba00b32885c4d0bec60db3cb0a45eb61e34c5/bin/check_samplesheet.py """ - +import collections import os import errno import argparse @@ -52,6 +52,7 @@ def check_samplesheet(file_in, file_out): """ sample_mapping_dict = {} + input_dict = collections.defaultdict(list) with open(file_in, "r", encoding="utf-8-sig") as fin: ## Check header MIN_COLS = 2 @@ -144,6 +145,7 @@ def check_samplesheet(file_in, file_out): "Line", line, ) + is_control_input = not antibody and not control ## Auto-detect paired-end/single-end if not sample or not fastq_1: @@ -172,7 +174,9 @@ def check_samplesheet(file_in, file_out): print_error("Samplesheet contains duplicate rows!", "Line", line) else: sample_mapping_dict[sample].append(sample_info) - # pprint.pprint(sample_mapping_dict) + if is_control_input: + input_dict[sample_basename].append(sample_info) + ## Write validated samplesheet with appropriate columns if len(sample_mapping_dict) > 0: out_dir = os.path.dirname(file_out) @@ -205,11 +209,12 @@ def check_samplesheet(file_in, file_out): sample, ) + # check that the control/input exists for idx, val in enumerate(sample_mapping_dict[sample]): control = val[-1] - if control and control not in sample_mapping_dict.keys(): + if control and control not in input_dict.keys(): print_error( - f"Control identifier has to match a provided sample identifier!", + "Control identifier has to match a provided sample identifier!", "Control", control, ) diff --git a/main.nf b/main.nf index 015f0e2c..3f79ddcd 100644 --- a/main.nf +++ b/main.nf @@ -21,6 +21,7 @@ log.info """\ include { FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS as DOWNLOAD_FASTQ } from './subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools' include { INPUT_CHECK } from './subworkflows/local/input_check.nf' include { PREPARE_GENOME } from './subworkflows/local/prepare_genome.nf' +include { POOL_INPUTS } from './subworkflows/local/pool_inputs/' include { FILTER_BLACKLIST } from './subworkflows/CCBR/filter_blacklist/' include { ALIGN_GENOME } from "./subworkflows/local/align.nf" include { DEDUPLICATE } from "./subworkflows/local/deduplicate.nf" @@ -74,28 +75,27 @@ workflow CHIPSEQ { INPUT_CHECK(file(params.input, checkIfExists: true), params.seq_center, contrast_sheet) INPUT_CHECK.out.reads.set { raw_fastqs } - raw_fastqs | CUTADAPT - CUTADAPT.out.reads.set{ trimmed_fastqs } + CUTADAPT(raw_fastqs).reads | POOL_INPUTS + trimmed_fastqs = POOL_INPUTS.out.reads PREPARE_GENOME() chrom_sizes = PREPARE_GENOME.out.chrom_sizes - effective_genome_size = PREPARE_GENOME.out.effective_genome_size + FILTER_BLACKLIST(trimmed_fastqs, PREPARE_GENOME.out.blacklist_index) ALIGN_GENOME(FILTER_BLACKLIST.out.reads, PREPARE_GENOME.out.reference_index) - ALIGN_GENOME.out.bam.set{ aligned_bam } + aligned_bam = ALIGN_GENOME.out.bam DEDUPLICATE(aligned_bam, chrom_sizes, effective_genome_size) - DEDUPLICATE.out.bam.set{ deduped_bam } - DEDUPLICATE.out.tag_align.set{ deduped_tagalign } + deduped_bam = DEDUPLICATE.out.bam + deduped_tagalign = DEDUPLICATE.out.tag_align - deduped_bam | PHANTOM_PEAKS - PHANTOM_PEAKS.out.fraglen | PPQT_PROCESS - PPQT_PROCESS.out.fraglen.set { frag_lengths } + PHANTOM_PEAKS(deduped_bam).fraglen | PPQT_PROCESS + frag_lengths = PPQT_PROCESS.out.fraglen ch_multiqc = Channel.of() if (params.run.qc) { - QC(raw_fastqs, trimmed_fastqs, FILTER_BLACKLIST.out.n_surviving_reads, + QC(raw_fastqs, CUTADAPT.out.reads, FILTER_BLACKLIST.out.n_surviving_reads, aligned_bam, ALIGN_GENOME.out.aligned_flagstat, ALIGN_GENOME.out.filtered_flagstat, deduped_bam, DEDUPLICATE.out.flagstat, PHANTOM_PEAKS.out.spp, frag_lengths, @@ -157,6 +157,7 @@ workflow CHIPSEQ { ) } + } if (!workflow.stubRun) { diff --git a/modules.json b/modules.json index 229c6a0f..90f9ec4e 100644 --- a/modules.json +++ b/modules.json @@ -100,6 +100,11 @@ "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", "installed_by": ["modules"] }, + "cat/cat": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, "custom/sratoolsncbisettings": { "branch": "master", "git_sha": "20e78a9868eaa69c8cac91152397def32374b807", diff --git a/modules/nf-core/cat/cat/environment.yml b/modules/nf-core/cat/cat/environment.yml new file mode 100644 index 00000000..9b01c865 --- /dev/null +++ b/modules/nf-core/cat/cat/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - conda-forge::pigz=2.3.4 diff --git a/modules/nf-core/cat/cat/main.nf b/modules/nf-core/cat/cat/main.nf new file mode 100644 index 00000000..2862c64c --- /dev/null +++ b/modules/nf-core/cat/cat/main.nf @@ -0,0 +1,78 @@ +process CAT_CAT { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/pigz:2.3.4' : + 'biocontainers/pigz:2.3.4' }" + + input: + tuple val(meta), path(files_in) + + output: + tuple val(meta), path("${prefix}"), emit: file_out + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def file_list = files_in.collect { it.toString() } + + // choose appropriate concatenation tool depending on input and output format + + // | input | output | command1 | command2 | + // |-----------|------------|----------|----------| + // | gzipped | gzipped | cat | | + // | ungzipped | ungzipped | cat | | + // | gzipped | ungzipped | zcat | | + // | ungzipped | gzipped | cat | pigz | + + // Use input file ending as default + prefix = task.ext.prefix ?: "${meta.id}${getFileSuffix(file_list[0])}" + out_zip = prefix.endsWith('.gz') + in_zip = file_list[0].endsWith('.gz') + command1 = (in_zip && !out_zip) ? 'zcat' : 'cat' + command2 = (!in_zip && out_zip) ? "| pigz -c -p $task.cpus $args2" : '' + if(file_list.contains(prefix.trim())) { + error "The name of the input file can't be the same as for the output prefix in the " + + "module CAT_CAT (currently `$prefix`). Please choose a different one." + } + """ + $command1 \\ + $args \\ + ${file_list.join(' ')} \\ + $command2 \\ + > ${prefix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + END_VERSIONS + """ + + stub: + def file_list = files_in.collect { it.toString() } + prefix = task.ext.prefix ?: "${meta.id}${file_list[0].substring(file_list[0].lastIndexOf('.'))}" + if(file_list.contains(prefix.trim())) { + error "The name of the input file can't be the same as for the output prefix in the " + + "module CAT_CAT (currently `$prefix`). Please choose a different one." + } + """ + touch $prefix + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + END_VERSIONS + """ +} + +// for .gz files also include the second to last extension if it is present. E.g., .fasta.gz +def getFileSuffix(filename) { + def match = filename =~ /^.*?((\.\w{1,5})?(\.\w{1,5}\.gz$))/ + return match ? match[0][1] : filename.substring(filename.lastIndexOf('.')) +} diff --git a/modules/nf-core/cat/cat/meta.yml b/modules/nf-core/cat/cat/meta.yml new file mode 100644 index 00000000..81778a06 --- /dev/null +++ b/modules/nf-core/cat/cat/meta.yml @@ -0,0 +1,43 @@ +name: cat_cat +description: A module for concatenation of gzipped or uncompressed files +keywords: + - concatenate + - gzip + - cat +tools: + - cat: + description: Just concatenation + documentation: https://man7.org/linux/man-pages/man1/cat.1.html + licence: ["GPL-3.0-or-later"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - files_in: + type: file + description: List of compressed / uncompressed files + pattern: "*" +output: + - file_out: + - meta: + type: file + description: Concatenated file. Will be gzipped if file_out ends with ".gz" + pattern: "${file_out}" + - ${prefix}: + type: file + description: Concatenated file. Will be gzipped if file_out ends with ".gz" + pattern: "${file_out}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@erikrikarddaniel" + - "@FriederikeHanssen" +maintainers: + - "@erikrikarddaniel" + - "@FriederikeHanssen" diff --git a/modules/nf-core/cat/cat/tests/main.nf.test b/modules/nf-core/cat/cat/tests/main.nf.test new file mode 100644 index 00000000..9cb16178 --- /dev/null +++ b/modules/nf-core/cat/cat/tests/main.nf.test @@ -0,0 +1,191 @@ +nextflow_process { + + name "Test Process CAT_CAT" + script "../main.nf" + process "CAT_CAT" + tag "modules" + tag "modules_nfcore" + tag "cat" + tag "cat/cat" + + test("test_cat_name_conflict") { + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'genome', single_end:true ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.sizes', checkIfExists: true) + ] + ] + """ + } + } + then { + assertAll( + { assert !process.success }, + { assert process.stdout.toString().contains("The name of the input file can't be the same as for the output prefix") }, + { assert snapshot(process.out.versions).match() } + ) + } + } + + test("test_cat_unzipped_unzipped") { + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.sizes', checkIfExists: true) + ] + ] + """ + } + } + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + + test("test_cat_zipped_zipped") { + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gff3.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/last/contigs.genome.maf.gz', checkIfExists: true) + ] + ] + """ + } + } + then { + def lines = path(process.out.file_out.get(0).get(1)).linesGzip + assertAll( + { assert process.success }, + { assert snapshot( + lines[0..5], + lines.size(), + process.out.versions + ).match() + } + ) + } + } + + test("test_cat_zipped_unzipped") { + config './nextflow_zipped_unzipped.config' + + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gff3.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/last/contigs.genome.maf.gz', checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("test_cat_unzipped_zipped") { + config './nextflow_unzipped_zipped.config' + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.sizes', checkIfExists: true) + ] + ] + """ + } + } + then { + def lines = path(process.out.file_out.get(0).get(1)).linesGzip + assertAll( + { assert process.success }, + { assert snapshot( + lines[0..5], + lines.size(), + process.out.versions + ).match() + } + ) + } + } + + test("test_cat_one_file_unzipped_zipped") { + config './nextflow_unzipped_zipped.config' + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + ] + """ + } + } + then { + def lines = path(process.out.file_out.get(0).get(1)).linesGzip + assertAll( + { assert process.success }, + { assert snapshot( + lines[0..5], + lines.size(), + process.out.versions + ).match() + } + ) + } + } +} diff --git a/modules/nf-core/cat/cat/tests/main.nf.test.snap b/modules/nf-core/cat/cat/tests/main.nf.test.snap new file mode 100644 index 00000000..e2381ca2 --- /dev/null +++ b/modules/nf-core/cat/cat/tests/main.nf.test.snap @@ -0,0 +1,147 @@ +{ + "test_cat_unzipped_unzipped": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fasta:md5,f44b33a0e441ad58b2d3700270e2dbe2" + ] + ], + "1": [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ], + "file_out": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fasta:md5,f44b33a0e441ad58b2d3700270e2dbe2" + ] + ], + "versions": [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2023-10-16T14:32:18.500464399" + }, + "test_cat_zipped_unzipped": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "cat.txt:md5,c439d3b60e7bc03e8802a451a0d9a5d9" + ] + ], + "1": [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ], + "file_out": [ + [ + { + "id": "test", + "single_end": true + }, + "cat.txt:md5,c439d3b60e7bc03e8802a451a0d9a5d9" + ] + ], + "versions": [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2023-10-16T14:32:49.642741302" + }, + "test_cat_zipped_zipped": { + "content": [ + [ + "MT192765.1\tGenbank\ttranscript\t259\t29667\t.\t+\t.\tID=unknown_transcript_1;geneID=orf1ab;gene_name=orf1ab", + "MT192765.1\tGenbank\tgene\t259\t21548\t.\t+\t.\tParent=unknown_transcript_1", + "MT192765.1\tGenbank\tCDS\t259\t13461\t.\t+\t0\tParent=unknown_transcript_1;exception=\"ribosomal slippage\";gbkey=CDS;gene=orf1ab;note=\"pp1ab;translated=by -1 ribosomal frameshift\";product=\"orf1ab polyprotein\";protein_id=QIK50426.1", + "MT192765.1\tGenbank\tCDS\t13461\t21548\t.\t+\t0\tParent=unknown_transcript_1;exception=\"ribosomal slippage\";gbkey=CDS;gene=orf1ab;note=\"pp1ab;translated=by -1 ribosomal frameshift\";product=\"orf1ab polyprotein\";protein_id=QIK50426.1", + "MT192765.1\tGenbank\tCDS\t21556\t25377\t.\t+\t0\tParent=unknown_transcript_1;gbkey=CDS;gene=S;note=\"structural protein\";product=\"surface glycoprotein\";protein_id=QIK50427.1", + "MT192765.1\tGenbank\tgene\t21556\t25377\t.\t+\t.\tParent=unknown_transcript_1" + ], + 78, + [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:51:46.802978" + }, + "test_cat_name_conflict": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:51:29.45394" + }, + "test_cat_one_file_unzipped_zipped": { + "content": [ + [ + ">MT192765.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/PC00101P/2020, complete genome", + "GTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGT", + "GTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGTCGTTGACAGGACACGAG", + "TAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTTGTCCGG", + "GTGTGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAACACACGTCCAACTCAGTTTGCCTGTTTT", + "ACAGGTTCGCGACGTGCTCGTACGTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTAAAG" + ], + 374, + [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:52:02.774016" + }, + "test_cat_unzipped_zipped": { + "content": [ + [ + ">MT192765.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/PC00101P/2020, complete genome", + "GTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGT", + "GTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGTCGTTGACAGGACACGAG", + "TAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTTGTCCGG", + "GTGTGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAACACACGTCCAACTCAGTTTGCCTGTTTT", + "ACAGGTTCGCGACGTGCTCGTACGTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTAAAG" + ], + 375, + [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:51:57.581523" + } +} diff --git a/modules/nf-core/cat/cat/tests/nextflow_unzipped_zipped.config b/modules/nf-core/cat/cat/tests/nextflow_unzipped_zipped.config new file mode 100644 index 00000000..ec26b0fd --- /dev/null +++ b/modules/nf-core/cat/cat/tests/nextflow_unzipped_zipped.config @@ -0,0 +1,6 @@ + +process { + withName: CAT_CAT { + ext.prefix = 'cat.txt.gz' + } +} diff --git a/modules/nf-core/cat/cat/tests/nextflow_zipped_unzipped.config b/modules/nf-core/cat/cat/tests/nextflow_zipped_unzipped.config new file mode 100644 index 00000000..fbc79783 --- /dev/null +++ b/modules/nf-core/cat/cat/tests/nextflow_zipped_unzipped.config @@ -0,0 +1,8 @@ + +process { + + withName: CAT_CAT { + ext.prefix = 'cat.txt' + } + +} diff --git a/modules/nf-core/cat/cat/tests/tags.yml b/modules/nf-core/cat/cat/tests/tags.yml new file mode 100644 index 00000000..37b578f5 --- /dev/null +++ b/modules/nf-core/cat/cat/tests/tags.yml @@ -0,0 +1,2 @@ +cat/cat: + - modules/nf-core/cat/cat/** diff --git a/nextflow.config b/nextflow.config index 1bcb5939..4b7508de 100644 --- a/nextflow.config +++ b/nextflow.config @@ -152,12 +152,10 @@ process.shell = ['/bin/bash', '-euo', 'pipefail'] def trace_timestamp = new java.util.Date().format('yyyy-MM-dd_HH-mm-ss') dag { enabled = true - overwrite = true - file = "${params.outdir}/pipeline_info/pipeline_dag_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/pipeline_dag_${trace_timestamp}.png" } report { enabled = true - overwrite = true file = "${params.outdir}/pipeline_info/execution_report_${trace_timestamp}.html" } diff --git a/subworkflows/CCBR/filter_blacklist/main.nf b/subworkflows/CCBR/filter_blacklist/main.nf index 4c11dc7e..83a29345 100644 --- a/subworkflows/CCBR/filter_blacklist/main.nf +++ b/subworkflows/CCBR/filter_blacklist/main.nf @@ -1,6 +1,6 @@ -include { BWA_MEM } from '../../../modules/CCBR/bwa/mem' +include { BWA_MEM as BWA_MEM_BLACKLIST } from '../../../modules/CCBR/bwa/mem' include { SAMTOOLS_FILTERALIGNED } from '../../../modules/CCBR/samtools/filteraligned' include { PICARD_SAMTOFASTQ } from '../../../modules/CCBR/picard/samtofastq' include { CUSTOM_COUNTFASTQ } from '../../../modules/CCBR/custom/countfastq' @@ -13,13 +13,13 @@ workflow FILTER_BLACKLIST { main: ch_versions = Channel.empty() - BWA_MEM ( ch_fastq_input, ch_blacklist_index ) - SAMTOOLS_FILTERALIGNED( BWA_MEM.out.bam ) + BWA_MEM_BLACKLIST ( ch_fastq_input, ch_blacklist_index ) + SAMTOOLS_FILTERALIGNED( BWA_MEM_BLACKLIST.out.bam ) PICARD_SAMTOFASTQ( SAMTOOLS_FILTERALIGNED.out.bam ) CUSTOM_COUNTFASTQ( PICARD_SAMTOFASTQ.out.paired ) ch_versions = ch_versions.mix( - BWA_MEM.out.versions, + BWA_MEM_BLACKLIST.out.versions, SAMTOOLS_FILTERALIGNED.out.versions, PICARD_SAMTOFASTQ.out.versions, CUSTOM_COUNTFASTQ.out.versions diff --git a/subworkflows/local/align.nf b/subworkflows/local/align.nf index e0a0385c..fcf75b94 100644 --- a/subworkflows/local/align.nf +++ b/subworkflows/local/align.nf @@ -1,4 +1,4 @@ -include { BWA_MEM } from "../../modules/CCBR/bwa/mem" +include { BWA_MEM as BWA_MEM_REF } from "../../modules/CCBR/bwa/mem" include { FILTER_QUALITY } from "../../modules/local/align.nf" include { SAMTOOLS_FLAGSTAT as SAMTOOLS_FLAGSTAT_ALIGN SAMTOOLS_FLAGSTAT as SAMTOOLS_FLAGSTAT_FILTER } from '../../modules/CCBR/samtools/flagstat' @@ -11,14 +11,14 @@ workflow ALIGN_GENOME { reference main: - BWA_MEM(reads, reference) - SAMTOOLS_FLAGSTAT_ALIGN( BWA_MEM.out.bam ) - FILTER_QUALITY( BWA_MEM.out.bam ) + BWA_MEM_REF(reads, reference) + SAMTOOLS_FLAGSTAT_ALIGN( BWA_MEM_REF.out.bam ) + FILTER_QUALITY( BWA_MEM_REF.out.bam ) SAMTOOLS_SORT( FILTER_QUALITY.out.bam ) SAMTOOLS_FLAGSTAT_FILTER( SAMTOOLS_SORT.out.bam ) ch_versions = Channel.empty().mix( - BWA_MEM.out.versions, + BWA_MEM_REF.out.versions, SAMTOOLS_FLAGSTAT_ALIGN.out.versions ) diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf index 300913f9..71c73b99 100644 --- a/subworkflows/local/input_check.nf +++ b/subworkflows/local/input_check.nf @@ -50,6 +50,7 @@ def create_fastq_channel(LinkedHashMap row, String seq_center) { meta.single_end = row.single_end.toBoolean() meta.antibody = row.antibody meta.control = row.control + meta.is_input = row.control == '' // add path(s) of the fastq file(s) to the meta map def fastq_meta = [] diff --git a/subworkflows/local/pool_inputs/main.nf b/subworkflows/local/pool_inputs/main.nf new file mode 100644 index 00000000..dc17ca67 --- /dev/null +++ b/subworkflows/local/pool_inputs/main.nf @@ -0,0 +1,28 @@ + +include { CAT_CAT as CONCAT_INPUTS } from "../../../modules/nf-core/cat/cat" + +workflow POOL_INPUTS { + take: + ch_reads + main: + ch_reads.branch { meta, reads -> + input: meta.is_input + samples: !meta.is_input + } + .set{ fastqs_branched } + + fastqs_branched.input.map { meta, reads -> + [ meta.sample_basename, meta, reads ] + } + .groupTuple() + .map{ sample_basename, metas, reads -> + [ [id: sample_basename, sample_basename: sample_basename, rep: '', single_end: metas[0].single_end, antibody: '', control: '', is_input: true], reads.flatten() ]} + | CONCAT_INPUTS + + CONCAT_INPUTS.out.file_out + .mix(fastqs_branched.samples) + .set{ mixed_reads } + + emit: + reads = mixed_reads +}