Skip to content

Commit

Permalink
Merge pull request #28 from karinlag/202207-v2
Browse files Browse the repository at this point in the history
202207 v2
  • Loading branch information
karinlag authored Aug 21, 2022
2 parents 1252383 + 7ad3455 commit 7bc9dce
Show file tree
Hide file tree
Showing 5 changed files with 248 additions and 45 deletions.
27 changes: 0 additions & 27 deletions asm_annot.nf
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,6 @@ else {
version = "v0.2.0 local"
}

// TODO I need to incorporate some options for prokka here
// Also strip genome

log.info ''
log.info "================================================="
Expand Down Expand Up @@ -231,7 +229,6 @@ process run_pilon {

output:
set pair_id, file("${pair_id}_pilon_spades.*") into pilon_results
set pair_id, file("${pair_id}_pilon_spades.fasta") into to_prokka
file "${pair_id}_pilon_spades.fasta" into asms_for_quast

"""
Expand All @@ -242,29 +239,6 @@ process run_pilon {
"""
}

/*
* Annotation using PROKKA
*/
process run_prokka {
publishDir "${params.out_dir}/prokka", mode: "${params.savemode}"
tag { pair_id }

input:
set pair_id, file("${pair_id}_pilon_spades.fasta") from to_prokka

output:
set pair_id, file("${pair_id}.*") into annotation_results
file "${pair_id}.*" into annotation_multiqc

"""
prokka --compliant --force --usegenus --cpus $task.cpus \
--centre ${params.centre} --prefix ${pair_id} --locustag ${params.locustag} \
--genus ${params.genus} --species ${params.species} \
--kingdom ${params.kingdom} --strain ${pair_id}_prokka_info ${params.prokka_additional} \
--outdir . ${pair_id}_pilon_spades.fasta
"""
}

/*
* Evaluate ALL assemblies with QUAST
*/
Expand Down Expand Up @@ -297,7 +271,6 @@ process run_multiqc_final {
file "bbduk/*" from bbduk_stats_stripped_multiqc.collect()
file "bbduk_trimmed/*" from bbduk_trimmed_multiqc.collect()
file "bbduk_trimmed_fastqc/*" from fastqc_bbduk_trimmed_multiqc.collect()
file "prokka/*" from annotation_multiqc.collect()
file quast_evaluation_all from quast_multiqc

output:
Expand Down
247 changes: 247 additions & 0 deletions assm_annot_noprokka.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,247 @@
#!/usr/bin/env nextflow

// This script is part of the Bifrost pipeline. Please see
// the accompanying LICENSE document for licensing issues,
// and the WIKI for this repo for instructions.

// Which version do we have?
if (workflow.commitId) {
version = "v0.2.0 $workflow.revision"
}
else {
version = "v0.2.0 local"
}

// TODO I need to incorporate some options for prokka here
// Also strip genome

log.info ''
log.info "================================================="
log.info " Bifrost assembly module version ${version}"
log.info "================================================="
log.info "Reads : ${params.reads}"
log.info "#files in read set : ${params.setsize}"
log.info "Results can be found in : ${params.out_dir}"
log.info "================================================="
log.info ""

// First, define the input data that go into input channels
Channel
.fromFilePairs( params.reads, size:params.setsize )
.ifEmpty { error "Cannot find any reads matching: ${params.reads}" }
.into{fastqc_reads; read_pairs}

// run_fastq and run_multiqc are exactly the same as qc_track
process run_fastqc {
publishDir "${params.out_dir}/fastqc", mode: "${params.savemode}"
tag { pair_id }
label 'one'

input:
set pair_id, file(reads) from fastqc_reads

output:
file "$pair_id" into fastqc_results

"""
mkdir ${pair_id}
fastqc -q ${reads} -o ${pair_id} -t $task.cpus
"""
}

process run_multiqc {
publishDir "${params.out_dir}/multiqc", mode: "${params.savemode}"
tag {"multiqc"}
label 'one'

input:
file "fastqc_output/*" from fastqc_results.toSortedList()

output:
file "multiqc_report.html" into multiqc_report

"""
multiqc fastqc_output
"""
}

// if there are more than two data files, we need to cat them together
// because spades becomes complicated with more than two files
process collate_data {
// Note, not publishing these because that would mean
// triple copies of the files on the system
tag {pair_id}
label 'one'

input:
set pair_id, file(reads) from read_pairs

output:
set pair_id, file("${pair_id}*_concat.fq.gz") into (reads, pilon_reads)

"""
cat ${pair_id}*R1* > ${pair_id}_R1_concat.fq.gz
cat ${pair_id}*R2* > ${pair_id}_R2_concat.fq.gz
"""
}


/*
* Strip PhiX with bbmap
*/
process run_strip {

publishDir "${params.out_dir}/bbduk", mode: "${params.savemode}"
tag { pair_id }

input:
set pair_id, file(reads) from reads

output:
set pair_id, file("${pair_id}*_concat_stripped.fq.gz") into reads_stripped
file "${pair_id}_bbduk_output.log"

"""
bbduk.sh threads=$task.cpus ref=${params.stripgenome} \
in1=${pair_id}_R1_concat.fq.gz \
in2=${pair_id}_R2_concat.fq.gz \
outm=${pair_id}_matched.fq.gz \
out1=${pair_id}_R1_concat_stripped.fq.gz \
out2=${pair_id}_R2_concat_stripped.fq.gz \
k=31 hdist=1 stats=stats.txt &> ${pair_id}_bbduk_output.log
"""
}


/*
* Remove adapter sequences and low quality base pairs with Trimmomatic
*/
process run_trim {
publishDir "${params.out_dir}/bbduk_trimmed", mode: "${params.savemode}"
tag { pair_id }

input:
set pair_id, file(reads) from reads_stripped

output:
set pair_id, file("${pair_id}*_concat_stripped_trimmed.fq.gz") into reads_trimmed
file "${pair_id}_concat_stripped_trimmed.log"

"""
trimmomatic PE -threads $task.cpus -trimlog ${pair_id}_concat_stripped_trimmed.log ${pair_id}*_concat_stripped.fq.gz \
-baseout ${pair_id}_trimmed.fq.gz ILLUMINACLIP:${params.adapter_dir}/${params.adapters}:${params.illuminaClipOptions} \
SLIDINGWINDOW:${params.slidingwindow} \
LEADING:${params.leading} TRAILING:${params.trailing} \
MINLEN:${params.minlen} &> ${pair_id}_run.log
mv ${pair_id}_trimmed_1P.fq.gz ${pair_id}_R1_concat_stripped_trimmed.fq.gz
mv ${pair_id}_trimmed_2P.fq.gz ${pair_id}_R2_concat_stripped_trimmed.fq.gz
cat ${pair_id}_trimmed_1U.fq.gz ${pair_id}_trimmed_2U.fq.gz > ${pair_id}_S_concat_stripped_trimmed.fq.gz
"""
}


/*
* Build assembly with SPAdes
*/
process run_spadesasm {
publishDir "${params.out_dir}/spades", mode: "${params.savemode}"
tag { pair_id }
label 'longtime'

input:
set pair_id, file(reads) from reads_trimmed

output:
set pair_id, file("${pair_id}_spades_scaffolds_min${params.min_contig_len}.fasta") \
into (assembly_results, tobwa_results)
file "${pair_id}_spades_scaffolds.fasta"
file "${pair_id}_spades.log"

"""
spades.py ${params.careful} --cov-cutoff=${params.cov_cutoff} \
-1 ${pair_id}_R1_concat_stripped_trimmed.fq.gz \
-2 ${pair_id}_R2_concat_stripped_trimmed.fq.gz \
-s ${pair_id}_S_concat_stripped_trimmed.fq.gz -t $task.cpus -o ${pair_id}_spades
filter_fasta_length.py -i ${pair_id}_spades/scaffolds.fasta \
-o ${pair_id}_spades_scaffolds_min${params.min_contig_len}.fasta \
-m ${params.min_contig_len}
cp ${pair_id}_spades/scaffolds.fasta ${pair_id}_spades_scaffolds.fasta
cp ${pair_id}_spades/spades.log ${pair_id}_spades.log
"""
}

// integrate pilon. I need to have a mapping step, followed by a pilon
// step.

/*
* Map reads to the spades assembly
*/
process run_bwamem {
publishDir "${params.out_dir}/bwamem", mode: "${params.savemode}"
tag { pair_id }
label 'longtime'

input:
set pair_id, file("${pair_id}_spades_scaffolds_min${params.min_contig_len}.fasta"), \
file(reads) from tobwa_results.join(pilon_reads)

output:
set pair_id, file("${pair_id}_mapped_sorted.bam"), \
file("${pair_id}_mapped_sorted.bam.bai") into bwamem_results

"""
bwa index ${pair_id}_spades_scaffolds_min${params.min_contig_len}.fasta
bwa mem -t $task.cpus ${pair_id}_spades_scaffolds_min${params.min_contig_len}.fasta \
*.fq.gz | samtools sort -o ${pair_id}_mapped_sorted.bam -
samtools index ${pair_id}_mapped_sorted.bam
"""
}

/*
* Incorporating pilon_reads
*/

process run_pilon {
publishDir "${params.out_dir}/pilon", mode: "${params.savemode}"
tag { pair_id }
label 'longtime'

input:
set pair_id, file("${pair_id}_mapped_sorted.bam"), \
file("${pair_id}_mapped_sorted.bam.bai"), \
file("${pair_id}_spades_scaffolds_min${params.min_contig_len}.fasta") \
from bwamem_results.join(assembly_results)

output:
set pair_id, file("${pair_id}_pilon_spades.*") into pilon_results
file "${pair_id}_pilon_spades.fasta" into asms_for_quast

"""
export _JAVA_OPTIONS=$task.javaopts
pilon --threads $task.cpus --genome ${pair_id}_spades_scaffolds_min${params.min_contig_len}.fasta \
--bam ${pair_id}_mapped_sorted.bam --output ${pair_id}_pilon_spades \
--changes --vcfqe &> ${pair_id}_pilon_spades.log
"""
}

/*
* Evaluate ALL assemblies with QUAST
*/
process quast_eval {
// The output here is a directory in and of itself
// thus not creating a new one
publishDir "${params.out_dir}/", mode: "${params.savemode}"
tag { pair_id }

input:
file asm_list from asms_for_quast.toSortedList()

//TODO: fix this, is why output is not going anywhere
output:
file quast_evaluation_all into quast_evaluation_all

"""
quast --threads $task.cpus -o quast_evaluation_all \
-g ${params.quast_genes} -R ${params.quast_ref} ${asm_list}
"""
}
2 changes: 0 additions & 2 deletions bin/printversions.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ if [[ "$STR" == *"$SUBSTR"* ]]; then
conda list -n bifrost2022-spades >> ${output_file}
conda list -n bifrost2022-bwa >> ${output_file}
conda list -n bifrost2022-pilon >> ${output_file}
conda list -n bifrost2022-prokka >> ${output_file}
conda list -n bifrost2022-quast >> ${output_file}
conda list -n bifrost2022-ariba >> ${output_file}

Expand All @@ -28,7 +27,6 @@ if [[ "$STR" == *"$SUBSTR"* ]]; then
rm tmpfile
samtools --version |head -1 >> ${output_file}
pilon --version >> ${output_file}
prokka -v >> ${output_file}
quast -v >> ${output_file}
ariba version |head -1 >> ${output_file}

Expand Down
13 changes: 1 addition & 12 deletions conf/asm_annot_template.config
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ params.savemode = "copy"

// Directory to where data is stored
params.data_dir = "/cluster/projects/nn9305k/vi_pipeline_data/bifrost_data"
params.multiqc_config = "/cluster/projects/nn9305k/Bifrost22/conf/multiqc_config.yaml"
params.multiqc_config = "/cluster/projects/nn9305k/vi_src/Bifrost22/conf/multiqc_config.yaml"

// BBDuk params, has to be absolute paths
params.stripgenome = "${params.data_dir}/genome_references/genomes/PhiX/PhiX.fasta"
Expand All @@ -47,17 +47,6 @@ params.assembly = "spades_asm"
params.cov_cutoff = "auto"
params.min_contig_len = "500"

// PROKKA configuration variables
params.locustag = "locustag"
params.genus = "Escherichia"
params.species = "coli"
params.kingdom = "Bacteria"
// If you have additional options to set, put them here.
params.prokka_additional = ""
params.centre = "NVI"



// QUAST variables
params.quast_ref = "${params.data_dir}/genome_references/genomes/ecoli/GCF_000005845.2_ASM584v2_genomic.fna"
params.quast_genes = "${params.data_dir}/genome_references/genomes/ecoli/GCF_000005845.2_ASM584v2_genomic.gff"
4 changes: 0 additions & 4 deletions conf/conda.config
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,6 @@ process {
conda = "${params.condahome}/bifrost2022-pilon"
}

withName: run_prokka {
conda = "${params.condahome}/bifrost2022-prokka"
}

withName: run_quast {
conda = "${params.condahome}/bifrost2022-quast"
}
Expand Down

0 comments on commit 7bc9dce

Please sign in to comment.