From 2dfd839141befd234757cf15fa27bd39a341725c Mon Sep 17 00:00:00 2001 From: Dan Fornika Date: Fri, 31 May 2024 18:50:57 -0700 Subject: [PATCH] fix provenance (#22) --- main.nf | 45 ++++++++++++++++++++++------------- modules/alignment_variants.nf | 6 ++++- modules/provenance.nf | 35 ++++++++++++++------------- 3 files changed, 52 insertions(+), 34 deletions(-) diff --git a/main.nf b/main.nf index 01e933b..f9f3c58 100644 --- a/main.nf +++ b/main.nf @@ -26,11 +26,15 @@ include { pipeline_provenance } from './modules/provenance.nf' include { collect_provenance } from './modules/provenance.nf' workflow { + ch_workflow_metadata = Channel.value([ + workflow.sessionId, + workflow.runName, + workflow.manifest.name, + workflow.manifest.version, + workflow.start, + ]) - ch_pipeline_name = Channel.of(workflow.manifest.name) - ch_pipeline_version = Channel.of(workflow.manifest.version) - - ch_pipeline_provenance = pipeline_provenance(ch_pipeline_name.combine(ch_pipeline_version)) + ch_pipeline_provenance = pipeline_provenance(ch_workflow_metadata) if (params.samplesheet_input != 'NO_FILE') { ch_illumina_fastq = Channel.fromPath(params.samplesheet_input).splitCsv(header: true).map{ it -> [it['ID'], it['R1'], it['R2']] }.filter{ it -> it[1] != null || it[2] != null } @@ -109,19 +113,26 @@ workflow { // [sample_id, [provenance_file_1.yml, provenance_file_2.yml, provenance_file_3.yml...]] // At each step, we add another provenance file to the list using the << operator... // ...and then concatenate them all together in the 'collect_provenance' process. - ch_provenance = ch_provenance.combine(ch_pipeline_provenance).map{ it -> [it[0], [it[1]]] } - ch_provenance = ch_provenance.join(hash_ref.out.provenance).map{ it -> [it[0], it[1] << it[2]] } - ch_provenance = ch_provenance.join(hash_fastq_short.out.provenance).map{ it -> [it[0], it[1] << it[2]] } - ch_provenance = ch_provenance.join(hash_fastq_long.out.provenance).map{ it -> [it[0], it[1] << it[2]] } - ch_provenance = ch_provenance.join(bwa_mem.out.provenance).map{ it -> [it[0], it[1] << it[2]] } - ch_provenance = ch_provenance.join(nanoq_pre_filter.out.provenance).map{ it -> [it[0], it[1] << it[2]] } - ch_provenance = ch_provenance.join(filtlong.out.provenance).map{ it -> [it[0], it[1] << it[2]] } - ch_provenance = ch_provenance.join(nanoq_post_filter.out.provenance).map{ it -> [it[0], it[1] << it[2]] } - ch_provenance = ch_provenance.join(minimap2.out.provenance).map{ it -> [it[0], it[1] << it[2]] } - ch_provenance = ch_provenance.join(qualimap_bamqc.out.provenance).map{ it -> [it[0], it[1] << it[2]] } - ch_provenance = ch_provenance.join(samtools_mpileup.out.provenance).map{ it -> [it[0], it[1] << it[2]] } - ch_provenance = ch_provenance.join(samtools_stats.out.provenance).map{ it -> [it[0], it[1] << it[2]] } - ch_provenance = ch_provenance.join(freebayes.out.provenance).map{ it -> [it[0], it[1] << it[2]] } + ch_provenance = ch_provenance.combine(ch_pipeline_provenance).map{ it -> [it[0], [it[1]]] } + ch_provenance = ch_provenance.join(hash_ref.out.provenance).map{ it -> [it[0], it[1] << it[2]] } + ch_provenance = ch_provenance.join(hash_fastq_short.out.provenance).map{ it -> [it[0], it[1] << it[2]] } + // Check the length of the collected nanopore sample IDs to determine if we need to collect nanopore provenance + // I need to pull the value out of the channel and convert it to a list to get the length + ch_illumina_sample_ids.toList().size().view() + if (false) { + ch_provenance = ch_provenance.join(hash_fastq_long.out.provenance).map{ it -> [it[0], it[1] << it[2]] } + } + ch_provenance = ch_provenance.join(bwa_mem.out.provenance).map{ it -> [it[0], it[1] << it[2]] } + if (false) { + ch_provenance = ch_provenance.join(nanoq_pre_filter.out.provenance).map{ it -> [it[0], it[1] << it[2]] } + ch_provenance = ch_provenance.join(filtlong.out.provenance).map{ it -> [it[0], it[1] << it[2]] } + ch_provenance = ch_provenance.join(nanoq_post_filter.out.provenance).map{ it -> [it[0], it[1] << it[2]] } + ch_provenance = ch_provenance.join(minimap2.out.provenance).map{ it -> [it[0], it[1] << it[2]] } + } + ch_provenance = ch_provenance.join(qualimap_bamqc.out.provenance).map{ it -> [it[0], it[1] << it[2]] } + ch_provenance = ch_provenance.join(samtools_mpileup.out.provenance).map{ it -> [it[0], it[1] << it[2]] } + ch_provenance = ch_provenance.join(samtools_stats.out.provenance).map{ it -> [it[0], it[1] << it[2]] } + ch_provenance = ch_provenance.join(freebayes.out.provenance).map{ it -> [it[0], it[1] << it[2]] } collect_provenance(ch_provenance) diff --git a/modules/alignment_variants.nf b/modules/alignment_variants.nf index 34b6f01..65639dc 100644 --- a/modules/alignment_variants.nf +++ b/modules/alignment_variants.nf @@ -54,14 +54,18 @@ process bwa_mem { printf -- " parameters:\\n" >> ${sample_id}_bwa_mem_provenance.yml printf -- " - parameter: -m\\n" >> ${sample_id}_bwa_mem_provenance.yml printf -- " value: null\\n" >> ${sample_id}_bwa_mem_provenance.yml + if [ ! "${params.skip_alignment_cleaning}" == "true" ]; then printf -- " - parameter: -r\\n" >> ${sample_id}_bwa_mem_provenance.yml printf -- " value: null\\n" >> ${sample_id}_bwa_mem_provenance.yml + fi printf -- " - tool_name: samtools\\n" >> ${sample_id}_bwa_mem_provenance.yml printf -- " tool_version: \$(samtools 2>&1 | grep 'Version' | cut -d ' ' -f 2)\\n" >> ${sample_id}_bwa_mem_provenance.yml printf -- " subcommand: markdup\\n" >> ${sample_id}_bwa_mem_provenance.yml + if [ ! "${params.skip_alignment_cleaning}" == "true" ]; then printf -- " parameters:\\n" >> ${sample_id}_bwa_mem_provenance.yml printf -- " - parameter: -r\\n" >> ${sample_id}_bwa_mem_provenance.yml printf -- " value: null\\n" >> ${sample_id}_bwa_mem_provenance.yml + fi bwa mem \ -t ${bwa_threads} \ @@ -190,7 +194,7 @@ process samtools_stats { tag { sample_id + ' / ' + short_long } - publishDir "${params.outdir}/${sample_id}", mode: 'copy', pattern: "${sample_id}_${short_long}_samtools_stats*" + publishDir "${params.outdir}/${sample_id}", mode: 'copy', pattern: "${sample_id}_${short_long}_samtools_stats*.{txt,tsv,csv}" input: tuple val(sample_id), val(short_long), path(alignment) diff --git a/modules/provenance.nf b/modules/provenance.nf index af4218d..f2a0e2d 100644 --- a/modules/provenance.nf +++ b/modules/provenance.nf @@ -5,34 +5,37 @@ process pipeline_provenance { executor 'local' input: - tuple val(pipeline_name), val(pipeline_version) + tuple val(session_id), val(run_name), val(pipeline_name), val(pipeline_version), val(timestamp_analysis_start) output: file("pipeline_provenance.yml") script: """ - printf -- "- pipeline_name: ${pipeline_name}\\n" >> pipeline_provenance.yml - printf -- " pipeline_version: ${pipeline_version}\\n" >> pipeline_provenance.yml - """ + printf -- "- pipeline_name: ${pipeline_name}\\n" >> pipeline_provenance.yml + printf -- " pipeline_version: ${pipeline_version}\\n" >> pipeline_provenance.yml + printf -- " nextflow_session_id: ${session_id}\\n" >> pipeline_provenance.yml + printf -- " nextflow_run_name: ${run_name}\\n" >> pipeline_provenance.yml + printf -- " timestamp_analysis_start: ${timestamp_analysis_start}\\n" >> pipeline_provenance.yml + """ } process collect_provenance { - tag { sample_id } - - executor 'local' + tag { sample_id } + + executor 'local' - publishDir "${params.outdir}/${sample_id}", pattern: "${sample_id}_*_provenance.yml", mode: 'copy' + publishDir "${params.outdir}/${sample_id}", pattern: "${sample_id}_*_provenance.yml", mode: 'copy' - input: - tuple val(sample_id), path(provenance_files) + input: + tuple val(sample_id), path(provenance_files) - output: - tuple val(sample_id), file("${sample_id}_*_provenance.yml") + output: + tuple val(sample_id), file("${sample_id}_*_provenance.yml") - script: - """ - cat ${provenance_files} > ${sample_id}_\$(date +%Y%m%d%H%M%S)_provenance.yml - """ + script: + """ + cat ${provenance_files} > ${sample_id}_\$(date +%Y%m%d%H%M%S)_provenance.yml + """ }