Skip to content

Commit

Permalink
improve prepare_genome subworkflow
Browse files Browse the repository at this point in the history
  • Loading branch information
maxulysse committed Sep 19, 2023
1 parent 519f412 commit b3427df
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 117 deletions.
3 changes: 2 additions & 1 deletion modules/local/gtf2bed/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,9 @@ process GTF2BED {
'biocontainers/r-base:3.5.0'}"

input:
path gtf
tuple val(meta), path(gtf)
val feature_type

output:
path '*.bed' , emit: bed
path "versions.yml", emit: versions
Expand Down
131 changes: 22 additions & 109 deletions subworkflows/local/prepare_genome/main.nf
Original file line number Diff line number Diff line change
@@ -1,136 +1,49 @@
//
// Uncompress and prepare reference genome files
// Prepare reference genome files
//

include { BEDTOOLS_MERGE } from '../../../modules/nf-core/bedtools/merge/main'
include { BEDTOOLS_SORT } from '../../../modules/nf-core/bedtools/sort/main'
include { GATK4_CREATESEQUENCEDICTIONARY } from '../../../modules/nf-core/gatk4/createsequencedictionary/main'
include { GFFREAD } from '../../../modules/nf-core/gffread/main'
include { GTF2BED } from '../../../modules/local/gtf2bed'
include { GUNZIP as GUNZIP_FASTA } from '../../../modules/nf-core/gunzip/main'
include { GUNZIP as GUNZIP_GENE_BED } from '../../../modules/nf-core/gunzip/main'
include { GUNZIP as GUNZIP_GFF } from '../../../modules/nf-core/gunzip/main'
include { GUNZIP as GUNZIP_GTF } from '../../../modules/nf-core/gunzip/main'
include { SAMTOOLS_FAIDX } from '../../../modules/nf-core/samtools/faidx/main'
include { STAR_GENOMEGENERATE } from '../../../modules/nf-core/star/genomegenerate/main'
include { UNTAR as UNTAR_STAR_INDEX } from '../../../modules/nf-core/untar/main'

workflow PREPARE_GENOME {
take:
fasta // file: /path/to/genome.fasta
// gtf // file: /path/to/genome.gtf
// gff // file: /path/to/genome.gff
// exon_bed // file: /path/to/gene.bed
ch_exon_bed // file: /path/to/gene.bed
ch_fasta // file: /path/to/genome.fasta
ch_gff // file: /path/to/genome.gff
ch_gtf // file: /path/to/genome.gtf
feature_type
// prepare_tool_indices
// feature_type

main:
ch_versions = Channel.empty()

ch_fasta = fasta.map{ fasta -> [ [ id:fasta.baseName ], fasta ] }

//
// Uncompress genome fasta file if required
//
// if (fasta.endsWith('.gz')) {
// ch_fasta = GUNZIP_FASTA([[:], fasta]).gunzip.map{ meta, fasta -> fasta }
// ch_versions = ch_versions.mix(GUNZIP_FASTA.out.versions)
// } else {
// ch_fasta = Channel.value(file(fasta))
// }

//
// Uncompress GTF annotation file or create from GFF3 if required
//
// if (gtf) {
// if (gtf.endsWith('.gz')) {
// ch_gtf = GUNZIP_GTF([[:], gtf]).gunzip.map{ meta, gtf -> gtf }
// ch_versions = ch_versions.mix(GUNZIP_GTF.out.versions)
// } else ch_gtf = Channel.value(file(gtf))
// } else if (gff) {
// if (gff.endsWith('.gz')) {
// ch_gff = GUNZIP_GFF([[:], gff]).gunzip.map{ meta, gff -> gff }
// ch_versions = ch_versions.mix(GUNZIP_GFF.out.versions)
// } else ch_gff = Channel.value(file(gff))
// ch_gtf = GFFREAD(ch_gff).gtf
// ch_versions = ch_versions.mix(GFFREAD.out.versions)
// }

//
// Uncompress exon BED annotation file or create from GTF if required
//
// if (exon_bed) {
// if (exon_bed.endsWith('.gz')) {
// exonGENE_BED(
// Channel.fromPath(exon_bed).map{ it -> [[id:it[0].baseName], it] }
// )
// ch_exon_bed = GUNZIP_GENE_BED.out.gunzip.map{ meta, bed -> [bed] }.collect()
// ch_versions = ch_versions.mix(GUNZIP_GENE_BED.out.versions)
// } else {
// ch_exon_bed = Channel.fromPath(exon_bed).collect()
// }
// } else {
// ch_exon_bed = GTF2BED( ch_gtf , feature_type).bed.collect()
// ch_versions = ch_versions.mix(GTF2BED.out.versions)
// }

//ch_exon_bed.view()
//ch_exon_bed.map{ it -> [[id:'exome'], it] }
//ch_exon_bed.view()
// Bedtools sort
// ch_bedtools_sort = BEDTOOLS_SORT(ch_exon_bed.map{ it -> [[id:'exome'], it] }, 'sorted').sorted.collect()
// ch_versions = ch_versions.mix(BEDTOOLS_SORT.out.versions)


// // Bedtools merge
// ch_bedtools_merge = BEDTOOLS_MERGE(ch_bedtools_sort).bed
// ch_versions = ch_versions.mix(BEDTOOLS_MERGE.out.versions)


// Index the genome fasta
GATK4_CREATESEQUENCEDICTIONARY(ch_fasta)
GFFREAD(ch_gff)
SAMTOOLS_FAIDX(ch_fasta, [['id':null], []])

ch_versions = ch_versions.mix(SAMTOOLS_FAIDX.out.versions)
ch_gtf = ch_gtf.mix(GFFREAD.out.gtf)

// Create dictionary file for the genome fasta
// ch_fasta_dict = Channel.empty()
// if (params.dict) ch_fasta_dict = Channel.fromPath(params.dict).collect()
// else ch_fasta_dict = GATK4_CREATESEQUENCEDICTIONARY(ch_fasta).dict

//
// Uncompress STAR index or generate from scratch if required
//
// ch_star_index = Channel.empty()
// if ('star' in prepare_tool_indices) {
// if (params.star_index) {
// if (params.star_index.endsWith('.tar.gz')) {
// UNTAR_STAR_INDEX(
// Channel.fromPath(params.star_index).map{ it -> [[id:it[0].baseName], it] }
// )
// ch_star_index = UNTAR_STAR_INDEX.out.untar.map{ meta, star_index -> [star_index] }.collect()
// ch_versions = ch_versions.mix(UNTAR_STAR_INDEX.out.versions)
// } else {
// ch_star_index = Channel.fromPath(params.star_index).collect()
// }
// }
// else {
// STAR_GENOMEGENERATE(
// ch_fasta,ch_gtf
// )
// .index
// .set { ch_star_index }
// ch_versions = ch_versions.mix(STAR_GENOMEGENERATE.out.versions)
// }
// }
GTF2BED(ch_gtf, feature_type)
STAR_GENOMEGENERATE(ch_fasta, ch_gtf)

ch_versions = ch_versions.mix(GATK4_CREATESEQUENCEDICTIONARY.out.versions)
ch_versions = ch_versions.mix(GFFREAD.out.versions)
ch_versions = ch_versions.mix(GTF2BED.out.versions)
ch_versions = ch_versions.mix(SAMTOOLS_FAIDX.out.versions)
ch_versions = ch_versions.mix(STAR_GENOMEGENERATE.out.versions)

emit:
// fasta = ch_fasta // path: genome.fasta
fasta_fai = SAMTOOLS_FAIDX.out.fai.map{ meta, fai -> [fai] } // path: genome.fasta.fai dict = ch_fasta_dict // path: genome.fasta.dict
// gtf = ch_gtf // path: genome.gtf
// exon_bed = ch_exon_bed // path: exon.bed
dict = GATK4_CREATESEQUENCEDICTIONARY.out.dict // path: genome.fasta.dict
exon_bed = GTF2BED.out.bed.collect() // path: exon.bed
fasta_fai = SAMTOOLS_FAIDX.out.fai.map{ meta, fai -> [fai] } // path: genome.fasta.fai
gtf = ch_gtf // path: genome.gtf
star_index = STAR_GENOMEGENERATE.out.index // path: star/index/
versions = ch_versions // channel: [ versions.yml ]
// bedtools_sort = ch_bedtools_sort // path: sort.bed
// bedtools_merge = ch_bedtools_merge // path: merge.bed
// star_index = ch_star_index // path: star/index/
versions = ch_versions // channel: [ versions.yml ]
}
18 changes: 11 additions & 7 deletions workflows/rnavar.nf
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,10 @@ include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/


// Initialize file channels based on params, defined in the params.genomes[params.genome] scope
ch_fasta = params.fasta ? Channel.fromPath(params.fasta).first() : Channel.empty()
ch_exon_bed = params.exon_bed ? Channel.fromPath(params.exon_bed) : Channel.empty()
ch_fasta = params.fasta ? Channel.fromPath(params.fasta).map{ fasta -> [ [ id:fasta.baseName ], fasta ] }.first() : Channel.empty()
ch_gff = params.gff ? Channel.fromPath(params.gff).first() : Channel.empty()
ch_gtf = params.gtf ? Channel.fromPath(params.gtf).map{ gtf -> [ [ id:gtf.baseName ], gtf ] }.first() : Channel.empty()

/*
========================================================================================
Expand All @@ -129,20 +132,21 @@ workflow RNAVAR {

// To gather all QC reports for MultiQC
ch_reports = Channel.empty()

// To gather used softwares versions for MultiQC
ch_versions = Channel.empty()

//
// SUBWORKFLOW: Uncompress and prepare reference genome files
// Prepare reference genome files
//

PREPARE_GENOME(
ch_fasta
// params.gtf,
// params.gff,
// params.gene_bed,
ch_exon_bed,
ch_fasta,
ch_gff,
ch_gtf,
// params.aligner,
// params.feature_type
params.feature_type
)

// ch_genome_bed = Channel.from([id:'genome.bed']).combine(PREPARE_GENOME.out.exon_bed)
Expand Down

0 comments on commit b3427df

Please sign in to comment.