Skip to content

Commit

Permalink
fix provenance (#22)
Browse files Browse the repository at this point in the history
  • Loading branch information
dfornika authored Jun 1, 2024
1 parent fc4684e commit 2dfd839
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 34 deletions.
45 changes: 28 additions & 17 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,15 @@ include { pipeline_provenance } from './modules/provenance.nf'
include { collect_provenance } from './modules/provenance.nf'

workflow {
ch_workflow_metadata = Channel.value([
workflow.sessionId,
workflow.runName,
workflow.manifest.name,
workflow.manifest.version,
workflow.start,
])

ch_pipeline_name = Channel.of(workflow.manifest.name)
ch_pipeline_version = Channel.of(workflow.manifest.version)

ch_pipeline_provenance = pipeline_provenance(ch_pipeline_name.combine(ch_pipeline_version))
ch_pipeline_provenance = pipeline_provenance(ch_workflow_metadata)

if (params.samplesheet_input != 'NO_FILE') {
ch_illumina_fastq = Channel.fromPath(params.samplesheet_input).splitCsv(header: true).map{ it -> [it['ID'], it['R1'], it['R2']] }.filter{ it -> it[1] != null || it[2] != null }
Expand Down Expand Up @@ -109,19 +113,26 @@ workflow {
// [sample_id, [provenance_file_1.yml, provenance_file_2.yml, provenance_file_3.yml...]]
// At each step, we add another provenance file to the list using the << operator...
// ...and then concatenate them all together in the 'collect_provenance' process.
ch_provenance = ch_provenance.combine(ch_pipeline_provenance).map{ it -> [it[0], [it[1]]] }
ch_provenance = ch_provenance.join(hash_ref.out.provenance).map{ it -> [it[0], it[1] << it[2]] }
ch_provenance = ch_provenance.join(hash_fastq_short.out.provenance).map{ it -> [it[0], it[1] << it[2]] }
ch_provenance = ch_provenance.join(hash_fastq_long.out.provenance).map{ it -> [it[0], it[1] << it[2]] }
ch_provenance = ch_provenance.join(bwa_mem.out.provenance).map{ it -> [it[0], it[1] << it[2]] }
ch_provenance = ch_provenance.join(nanoq_pre_filter.out.provenance).map{ it -> [it[0], it[1] << it[2]] }
ch_provenance = ch_provenance.join(filtlong.out.provenance).map{ it -> [it[0], it[1] << it[2]] }
ch_provenance = ch_provenance.join(nanoq_post_filter.out.provenance).map{ it -> [it[0], it[1] << it[2]] }
ch_provenance = ch_provenance.join(minimap2.out.provenance).map{ it -> [it[0], it[1] << it[2]] }
ch_provenance = ch_provenance.join(qualimap_bamqc.out.provenance).map{ it -> [it[0], it[1] << it[2]] }
ch_provenance = ch_provenance.join(samtools_mpileup.out.provenance).map{ it -> [it[0], it[1] << it[2]] }
ch_provenance = ch_provenance.join(samtools_stats.out.provenance).map{ it -> [it[0], it[1] << it[2]] }
ch_provenance = ch_provenance.join(freebayes.out.provenance).map{ it -> [it[0], it[1] << it[2]] }
ch_provenance = ch_provenance.combine(ch_pipeline_provenance).map{ it -> [it[0], [it[1]]] }
ch_provenance = ch_provenance.join(hash_ref.out.provenance).map{ it -> [it[0], it[1] << it[2]] }
ch_provenance = ch_provenance.join(hash_fastq_short.out.provenance).map{ it -> [it[0], it[1] << it[2]] }
// Check the length of the collected nanopore sample IDs to determine if we need to collect nanopore provenance
// I need to pull the value out of the channel and convert it to a list to get the length
ch_illumina_sample_ids.toList().size().view()
if (false) {
ch_provenance = ch_provenance.join(hash_fastq_long.out.provenance).map{ it -> [it[0], it[1] << it[2]] }
}
ch_provenance = ch_provenance.join(bwa_mem.out.provenance).map{ it -> [it[0], it[1] << it[2]] }
if (false) {
ch_provenance = ch_provenance.join(nanoq_pre_filter.out.provenance).map{ it -> [it[0], it[1] << it[2]] }
ch_provenance = ch_provenance.join(filtlong.out.provenance).map{ it -> [it[0], it[1] << it[2]] }
ch_provenance = ch_provenance.join(nanoq_post_filter.out.provenance).map{ it -> [it[0], it[1] << it[2]] }
ch_provenance = ch_provenance.join(minimap2.out.provenance).map{ it -> [it[0], it[1] << it[2]] }
}
ch_provenance = ch_provenance.join(qualimap_bamqc.out.provenance).map{ it -> [it[0], it[1] << it[2]] }
ch_provenance = ch_provenance.join(samtools_mpileup.out.provenance).map{ it -> [it[0], it[1] << it[2]] }
ch_provenance = ch_provenance.join(samtools_stats.out.provenance).map{ it -> [it[0], it[1] << it[2]] }
ch_provenance = ch_provenance.join(freebayes.out.provenance).map{ it -> [it[0], it[1] << it[2]] }

collect_provenance(ch_provenance)

Expand Down
6 changes: 5 additions & 1 deletion modules/alignment_variants.nf
Original file line number Diff line number Diff line change
Expand Up @@ -54,14 +54,18 @@ process bwa_mem {
printf -- " parameters:\\n" >> ${sample_id}_bwa_mem_provenance.yml
printf -- " - parameter: -m\\n" >> ${sample_id}_bwa_mem_provenance.yml
printf -- " value: null\\n" >> ${sample_id}_bwa_mem_provenance.yml
if [ ! "${params.skip_alignment_cleaning}" == "true" ]; then
printf -- " - parameter: -r\\n" >> ${sample_id}_bwa_mem_provenance.yml
printf -- " value: null\\n" >> ${sample_id}_bwa_mem_provenance.yml
fi
printf -- " - tool_name: samtools\\n" >> ${sample_id}_bwa_mem_provenance.yml
printf -- " tool_version: \$(samtools 2>&1 | grep 'Version' | cut -d ' ' -f 2)\\n" >> ${sample_id}_bwa_mem_provenance.yml
printf -- " subcommand: markdup\\n" >> ${sample_id}_bwa_mem_provenance.yml
if [ ! "${params.skip_alignment_cleaning}" == "true" ]; then
printf -- " parameters:\\n" >> ${sample_id}_bwa_mem_provenance.yml
printf -- " - parameter: -r\\n" >> ${sample_id}_bwa_mem_provenance.yml
printf -- " value: null\\n" >> ${sample_id}_bwa_mem_provenance.yml
fi
bwa mem \
-t ${bwa_threads} \
Expand Down Expand Up @@ -190,7 +194,7 @@ process samtools_stats {

tag { sample_id + ' / ' + short_long }

publishDir "${params.outdir}/${sample_id}", mode: 'copy', pattern: "${sample_id}_${short_long}_samtools_stats*"
publishDir "${params.outdir}/${sample_id}", mode: 'copy', pattern: "${sample_id}_${short_long}_samtools_stats*.{txt,tsv,csv}"

input:
tuple val(sample_id), val(short_long), path(alignment)
Expand Down
35 changes: 19 additions & 16 deletions modules/provenance.nf
Original file line number Diff line number Diff line change
Expand Up @@ -5,34 +5,37 @@ process pipeline_provenance {
executor 'local'

input:
tuple val(pipeline_name), val(pipeline_version)
tuple val(session_id), val(run_name), val(pipeline_name), val(pipeline_version), val(timestamp_analysis_start)

output:
file("pipeline_provenance.yml")

script:
"""
printf -- "- pipeline_name: ${pipeline_name}\\n" >> pipeline_provenance.yml
printf -- " pipeline_version: ${pipeline_version}\\n" >> pipeline_provenance.yml
"""
printf -- "- pipeline_name: ${pipeline_name}\\n" >> pipeline_provenance.yml
printf -- " pipeline_version: ${pipeline_version}\\n" >> pipeline_provenance.yml
printf -- " nextflow_session_id: ${session_id}\\n" >> pipeline_provenance.yml
printf -- " nextflow_run_name: ${run_name}\\n" >> pipeline_provenance.yml
printf -- " timestamp_analysis_start: ${timestamp_analysis_start}\\n" >> pipeline_provenance.yml
"""
}

process collect_provenance {

tag { sample_id }

executor 'local'
tag { sample_id }
executor 'local'

publishDir "${params.outdir}/${sample_id}", pattern: "${sample_id}_*_provenance.yml", mode: 'copy'
publishDir "${params.outdir}/${sample_id}", pattern: "${sample_id}_*_provenance.yml", mode: 'copy'

input:
tuple val(sample_id), path(provenance_files)
input:
tuple val(sample_id), path(provenance_files)

output:
tuple val(sample_id), file("${sample_id}_*_provenance.yml")
output:
tuple val(sample_id), file("${sample_id}_*_provenance.yml")

script:
"""
cat ${provenance_files} > ${sample_id}_\$(date +%Y%m%d%H%M%S)_provenance.yml
"""
script:
"""
cat ${provenance_files} > ${sample_id}_\$(date +%Y%m%d%H%M%S)_provenance.yml
"""
}

0 comments on commit 2dfd839

Please sign in to comment.