Skip to content

Commit

Permalink
stash apply
Browse files Browse the repository at this point in the history
  • Loading branch information
maxulysse committed Jun 10, 2024
1 parent 818fea8 commit c0dbaa5
Show file tree
Hide file tree
Showing 3 changed files with 174 additions and 66 deletions.
41 changes: 18 additions & 23 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -57,29 +57,18 @@ workflow NFCORE_RNAVAR {

ch_versions = Channel.empty()

// Initialize fasta file with meta map:
ch_fasta_raw = params.fasta ? Channel.fromPath(params.fasta).map{ it -> [ [id:it.baseName], it ] }.collect() : Channel.empty()

// Initialize file channels based on params, defined in the params.genomes[params.genome] scope
ch_dbsnp = params.dbsnp ? Channel.fromPath(params.dbsnp).collect() : Channel.value([])
ch_known_indels = params.known_indels ? Channel.fromPath(params.known_indels).collect() : Channel.value([])
ch_gff = params.gff ? Channel.fromPath(params.gff).collect() : Channel.empty()
ch_gtf_raw = params.gtf ? Channel.fromPath(params.gtf).map{ gtf -> [ [ id:gtf.baseName ], gtf ] }.collect() : Channel.empty()
ch_dbsnp = params.dbsnp ? Channel.fromPath(params.dbsnp).collect() : Channel.value([])
ch_known_indels = params.known_indels ? Channel.fromPath(params.known_indels).collect() : Channel.value([])

// Initialize variant annotation associated channels
// Initialize value channels based on params, defined in the params.genomes[params.genome] scope
snpeff_db = params.snpeff_db ?: Channel.empty()
vep_cache_version = params.vep_cache_version ?: Channel.empty()
vep_genome = params.vep_genome ?: Channel.empty()
vep_species = params.vep_species ?: Channel.empty()

seq_platform = params.seq_platform ?: []
seq_center = params.seq_center ?: []

// Initialize value channels based on params, defined in the params.genomes[params.genome] scope
snpeff_db = params.snpeff_db ?: Channel.empty()
vep_cache_version = params.vep_cache_version ?: Channel.empty()
vep_genome = params.vep_genome ?: Channel.empty()
vep_species = params.vep_species ?: Channel.empty()
seq_platform = params.seq_platform ?: []
seq_center = params.seq_center ?: []

vep_extra_files = []

Expand All @@ -95,16 +84,22 @@ workflow NFCORE_RNAVAR {
vep_extra_files.add(file(params.spliceai_snv_tbi, checkIfExists: true))
}

PREPARE_GENOME(
ch_fasta_raw,
ch_gff,
ch_gtf_raw,
ch_dbsnp,
ch_known_indels,
params.feature_type)
PREPARE_GENOME (
params.fasta,
params.gtf,
params.gff,
params.additional_fasta,
params.transcript_fasta,
params.gene_bed,
params.star_index,
params.gencode,
params.featurecounts_group_type,
params.skip_gtf_filter
)

ch_fasta = PREPARE_GENOME.out.fasta
ch_star_index = PREPARE_GENOME.out.star_index
ch_gff = PREPARE_GENOME.out.gff
ch_dict = params.dict ? Channel.fromPath(params.dict).map{ it -> [ [id:'dict'], it ] }.collect()
: PREPARE_GENOME.out.dict
ch_fasta_fai = params.fasta_fai ? Channel.fromPath(params.fasta_fai).map{ it -> [ [id:'fai'], it ] }.collect()
Expand Down
193 changes: 153 additions & 40 deletions subworkflows/local/prepare_genome/main.nf
Original file line number Diff line number Diff line change
@@ -1,80 +1,193 @@
//
// Prepare reference genome files
// Uncompress and prepare reference genome files
//

include { BEDTOOLS_MERGE } from '../../../modules/nf-core/bedtools/merge/main'
include { BEDTOOLS_SORT } from '../../../modules/nf-core/bedtools/sort/main'
include { CUSTOM_CATADDITIONALFASTA } from '../../../modules/nf-core/custom/catadditionalfasta'
include { CUSTOM_GETCHROMSIZES } from '../../../modules/nf-core/custom/getchromsizes'
include { GATK4_CREATESEQUENCEDICTIONARY } from '../../../modules/nf-core/gatk4/createsequencedictionary/main'
include { GFFREAD } from '../../../modules/nf-core/gffread/main'
include { GTF2BED } from '../../../modules/local/gtf2bed'
include { GTF_FILTER } from '../../../modules/local/gtf_filter'
include { GUNZIP as GUNZIP_ADDITIONAL_FASTA } from '../../../modules/nf-core/gunzip'
include { GUNZIP as GUNZIP_FASTA } from '../../../modules/nf-core/gunzip'
include { GUNZIP as GUNZIP_GENE_BED } from '../../../modules/nf-core/gunzip'
include { GUNZIP as GUNZIP_GFF } from '../../../modules/nf-core/gunzip'
include { GUNZIP as GUNZIP_GTF } from '../../../modules/nf-core/gunzip'
include { GUNZIP as GUNZIP_TRANSCRIPT_FASTA } from '../../../modules/nf-core/gunzip'
include { SAMTOOLS_FAIDX } from '../../../modules/nf-core/samtools/faidx/main'
include { STAR_GENOMEGENERATE } from '../../../modules/nf-core/star/genomegenerate/main'
include { GUNZIP as GUNZIP_FASTA } from '../../../modules/nf-core/gunzip/main'
include { GUNZIP as GUNZIP_GTF } from '../../../modules/nf-core/gunzip/main'
include { TABIX_TABIX as TABIX_DBSNP } from '../../../modules/nf-core/tabix/tabix/main'
include { TABIX_TABIX as TABIX_KNOWN_INDELS } from '../../../modules/nf-core/tabix/tabix/main'
include { UNTAR as UNTAR_STAR_INDEX } from '../../../modules/nf-core/untar'

workflow PREPARE_GENOME {
take:
ch_fasta_raw // file: /path/to/genome.fasta
ch_gff // file: /path/to/genome.gff
ch_gtf_raw // file: /path/to/genome.gtf
ch_dbsnp
ch_known_indels
feature_type
fasta // file: /path/to/genome.fasta
gtf // file: /path/to/genome.gtf
gff // file: /path/to/genome.gff
additional_fasta // file: /path/to/additional.fasta
transcript_fasta // file: /path/to/transcript.fasta
gene_bed // file: /path/to/gene.bed
dbsnp // file: /path/to/dbsnp.vcf
known_indels // file: /path/to/known_indels.vcf
star_index // directory: /path/to/star/index/
featurecounts_group_type // string: The attribute type used to group feature types in the GTF file when generating the biotype plot with featureCounts
gencode // boolean: whether the genome is from GENCODE
skip_gtf_filter // boolean: Skip filtering of GTF for valid scaffolds and/ or transcript IDs

main:
ch_versions = Channel.empty()

//Unzip reference genome files if needed

if (params.fasta.endsWith('.gz')) {
GUNZIP_FASTA(ch_fasta_raw)
//
// Uncompress genome fasta file if required
//
if (fasta.endsWith('.gz')) {
ch_fasta = GUNZIP_FASTA ( [ [:], fasta ] ).gunzip.map { it[1] }
ch_versions = ch_versions.mix(GUNZIP_FASTA.out.versions)
} else {
ch_fasta = Channel.value(file(fasta))
}

ch_fasta = GUNZIP_FASTA.out.gunzip
//
// Uncompress GTF annotation file or create from GFF3 if required
//
if (gtf || gff) {
if (gtf) {
if (gtf.endsWith('.gz')) {
ch_gtf = GUNZIP_GTF ( [ [:], gtf ] ).gunzip.map { it[1] }
ch_versions = ch_versions.mix(GUNZIP_GTF.out.versions)
} else {
ch_gtf = Channel.value(file(gtf))
}
} else if (gff) {
if (gff.endsWith('.gz')) {
ch_gff = GUNZIP_GFF ( [ [:], gff ] ).gunzip.map { it[1] }
ch_versions = ch_versions.mix(GUNZIP_GFF.out.versions)
} else {
ch_gff = Channel.value(file(gff))
}
ch_gtf = GFFREAD ( ch_gff ).gtf
ch_versions = ch_versions.mix(GFFREAD.out.versions)
}

} else {
ch_fasta = ch_fasta_raw
// Determine whether to filter the GTF or not
def filter_gtf =
((
// Condition 1: Transcript FASTA file is not provided
!transcript_fasta
)) &&
(
// Condition 2: --skip_gtf_filter is not provided
!skip_gtf_filter
)
if (filter_gtf) {
GTF_FILTER ( ch_fasta, ch_gtf )
ch_gtf = GTF_FILTER.out.genome_gtf
ch_versions = ch_versions.mix(GTF_FILTER.out.versions)
}
}

if (params.gtf.endsWith('.gz')) {
GUNZIP_GTF(ch_gtf_raw)
//
// Uncompress additional fasta file and concatenate with reference fasta and gtf files
//
def biotype = gencode ? "gene_type" : featurecounts_group_type
if (additional_fasta) {
if (additional_fasta.endsWith('.gz')) {
ch_add_fasta = GUNZIP_ADDITIONAL_FASTA ( [ [:], additional_fasta ] ).gunzip.map { it[1] }
ch_versions = ch_versions.mix(GUNZIP_ADDITIONAL_FASTA.out.versions)
} else {
ch_add_fasta = Channel.value(file(additional_fasta))
}

ch_gtf = GUNZIP_GTF.out.gunzip
CUSTOM_CATADDITIONALFASTA (
ch_fasta.combine(ch_gtf).map { fasta, gtf -> [ [:], fasta, gtf ] },
ch_add_fasta.map { [ [:], it ] },
biotype
)
ch_fasta = CUSTOM_CATADDITIONALFASTA.out.fasta.map { it[1] }.first()
ch_gtf = CUSTOM_CATADDITIONALFASTA.out.gtf.map { it[1] }.first()
ch_versions = ch_versions.mix(CUSTOM_CATADDITIONALFASTA.out.versions)
}

//
// Uncompress gene BED annotation file or create from GTF if required
//
if (gene_bed) {
if (gene_bed.endsWith('.gz')) {
ch_gene_bed = GUNZIP_GENE_BED ( [ [:], gene_bed ] ).gunzip.map { it[1] }
ch_versions = ch_versions.mix(GUNZIP_GENE_BED.out.versions)
} else {
ch_gene_bed = Channel.value(file(gene_bed))
}
} else {
ch_gene_bed = GTF2BED ( ch_gtf ).bed
ch_versions = ch_versions.mix(GTF2BED.out.versions)
}

//
// Uncompress transcript fasta file / create if required
//
if (transcript_fasta) {
if (transcript_fasta.endsWith('.gz')) {
ch_transcript_fasta = GUNZIP_TRANSCRIPT_FASTA ( [ [:], transcript_fasta ] ).gunzip.map { it[1] }
ch_versions = ch_versions.mix(GUNZIP_TRANSCRIPT_FASTA.out.versions)
} else {
ch_transcript_fasta = Channel.value(file(transcript_fasta))
}
if (gencode) {
PREPROCESS_TRANSCRIPTS_FASTA_GENCODE ( ch_transcript_fasta )
ch_transcript_fasta = PREPROCESS_TRANSCRIPTS_FASTA_GENCODE.out.fasta
ch_versions = ch_versions.mix(PREPROCESS_TRANSCRIPTS_FASTA_GENCODE.out.versions)
}
} else {
ch_gtf = ch_gtf_raw
ch_transcript_fasta = MAKE_TRANSCRIPTS_FASTA ( ch_fasta, ch_gtf ).transcript_fasta
ch_versions = ch_versions.mix(MAKE_TRANSCRIPTS_FASTA.out.versions)
}

//
// Create chromosome sizes file
//
CUSTOM_GETCHROMSIZES ( ch_fasta.map { [ [:], it ] } )
ch_fai = CUSTOM_GETCHROMSIZES.out.fai.map { it[1] }
ch_chrom_sizes = CUSTOM_GETCHROMSIZES.out.sizes.map { it[1] }
ch_versions = ch_versions.mix(CUSTOM_GETCHROMSIZES.out.versions)

GATK4_CREATESEQUENCEDICTIONARY(ch_fasta)
GFFREAD(ch_gff, ch_fasta)
SAMTOOLS_FAIDX(ch_fasta, [['id':'genome'], []])
TABIX_DBSNP(ch_dbsnp.flatten().map{ it -> [ [ id:it.baseName ], it ] })
TABIX_KNOWN_INDELS(ch_known_indels.flatten().map{ it -> [ [ id:it.baseName ], it ] } )

ch_gtf = ch_gtf.mix(GFFREAD.out.gtf)

GTF2BED(ch_gtf, feature_type)
STAR_GENOMEGENERATE(ch_fasta, ch_gtf)

ch_versions = ch_versions.mix(GATK4_CREATESEQUENCEDICTIONARY.out.versions)
ch_versions = ch_versions.mix(GFFREAD.out.versions)
ch_versions = ch_versions.mix(GTF2BED.out.versions)
ch_versions = ch_versions.mix(SAMTOOLS_FAIDX.out.versions)
ch_versions = ch_versions.mix(STAR_GENOMEGENERATE.out.versions)
ch_versions = ch_versions.mix(TABIX_DBSNP.out.versions)
ch_versions = ch_versions.mix(TABIX_KNOWN_INDELS.out.versions)

//
// Uncompress STAR index or generate from scratch if required
//
ch_star_index = Channel.empty()
if (star_index) {
if (star_index.endsWith('.tar.gz')) {
ch_star_index = UNTAR_STAR_INDEX ( [ [:], star_index ] ).untar.map { it[1] }
ch_versions = ch_versions.mix(UNTAR_STAR_INDEX.out.versions)
} else {
ch_star_index = Channel.value(file(star_index))
}
} else {
ch_star_index = STAR_GENOMEGENERATE ( ch_fasta.map { [ [:], it ] }, ch_gtf.map { [ [:], it ] } ).index.map { it[1] }
ch_versions = ch_versions.mix(STAR_GENOMEGENERATE.out.versions)
}

emit:
dict = GATK4_CREATESEQUENCEDICTIONARY.out.dict // path: genome.fasta.dict
exon_bed = GTF2BED.out.bed.map{ bed -> [ [ id:bed.baseName ], bed ] }.collect() // path: exon.bed
fasta = ch_fasta
fasta_fai = SAMTOOLS_FAIDX.out.fai.map{ meta, fai -> [fai] } // path: genome.fasta.fai
gtf = ch_gtf // path: genome.gtf
star_index = STAR_GENOMEGENERATE.out.index // path: star/index/
chrom_sizes = ch_chrom_sizes // channel: path(genome.sizes)
dbsnp_tbi = TABIX_DBSNP.out.tbi.map{ meta, tbi -> [tbi] }.collect() // path: dbsnb.vcf.gz.tbi
dict = GATK4_CREATESEQUENCEDICTIONARY.out.dict // path: genome.fasta.dict
fai = ch_fai // channel: path(genome.fai)
fasta = ch_fasta // channel: path(genome.fasta)
gene_bed = ch_gene_bed // channel: path(gene.bed)
gtf = ch_gtf // channel: path(genome.gtf)
known_indels_tbi = TABIX_KNOWN_INDELS.out.tbi.map{ meta, tbi -> [tbi] }.collect() // path: {known_indels*}.vcf.gz.tbi
versions = ch_versions // channel: [ versions.yml ]
// bedtools_sort = ch_bedtools_sort // path: sort.bed
// bedtools_merge = ch_bedtools_merge // path: merge.bed
star_index = ch_star_index // channel: path(star/index/)
transcript_fasta = ch_transcript_fasta // channel: path(transcript.fasta)
versions = ch_versions.ifEmpty(null) // channel: [ versions.yml ]
}
6 changes: 3 additions & 3 deletions workflows/rnavar/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ workflow RNAVAR {

// Gather QC reports
ch_reports = ch_reports.mix(ch_bqsr_table.map{ meta, table -> table})
ch_versions = ch_versions.mix(GATK4_BASERECALIBRATOR.out.versions)
ch_versions = ch_versions.mix(GATK4_BASERECALIBRATOR.out.versions)

ch_bam_applybqsr = ch_splitncigar_bam_bai.join(ch_bqsr_table)
ch_bam_recalibrated_qc = Channel.empty()
Expand Down Expand Up @@ -256,8 +256,8 @@ workflow RNAVAR {
ch_dbsnp_for_haplotypecaller = [[id:'null'], []]
ch_dbsnp_for_haplotypecaller_tbi = [[id:'null'], []]
} else {
ch_dbsnp_for_haplotypecaller = ch_dbsnp.map{ vcf -> [[id:'dbsnp'], vcf] }
ch_dbsnp_for_haplotypecaller_tbi = ch_dbsnp_tbi.map{ tbi -> [[id:'dbsnp'], tbi] }
ch_dbsnp_for_haplotypecaller = ch_dbsnp.map{ it -> [[id:it.baseName], it] },
ch_dbsnp_for_haplotypecaller_tbi = ch_dbsnp_tbi.map{ it -> [[id:it.baseName], it] }
}

ch_haplotypecaller_vcf = Channel.empty()
Expand Down

0 comments on commit c0dbaa5

Please sign in to comment.