From 08d86b4830fd0ecafc9e9011592c8c5b593c6568 Mon Sep 17 00:00:00 2001 From: Darryl Nousome Date: Tue, 7 Nov 2023 14:11:36 -0500 Subject: [PATCH 01/11] feat: octopus --- docker/logan_base/Dockerfile | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/docker/logan_base/Dockerfile b/docker/logan_base/Dockerfile index bf16a99..d6bc17d 100644 --- a/docker/logan_base/Dockerfile +++ b/docker/logan_base/Dockerfile @@ -164,6 +164,15 @@ RUN wget https://github.com/AstraZeneca-NGS/VarDictJava/releases/download/v1.8.3 && rm /opt2/VarDict-1.8.3.tar ENV PATH="/opt2/VarDict-1.8.3/bin:$PATH" +# Install Octopus/v0.7.4 +RUN wget https://github.com/luntergroup/octopus/archive/refs/tags/v0.7.4.tar.gz \ + && tar -xvzf /opt2/v0.7.4.tar.gz \ + && rm /opt2/v0.7.4.tar.gz \ + && cd /opt2/octopus-0.7.4 \ + && cmake . +ENV PATH="/opt2/octopus-0.7.4/bin:$PATH" + + # Fastp From Opengene github RUN wget http://opengene.org/fastp/fastp.0.23.2 \ && mkdir fastp \ From e683183ee39332bcee04301677228f295793e44b Mon Sep 17 00:00:00 2001 From: Darryl Nousome Date: Tue, 7 Nov 2023 14:12:11 -0500 Subject: [PATCH 02/11] feat: added caller --- docker/logan_base/meta.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/logan_base/meta.yml b/docker/logan_base/meta.yml index b1a3033..c2ad4cf 100644 --- a/docker/logan_base/meta.yml +++ b/docker/logan_base/meta.yml @@ -1,4 +1,4 @@ dockerhub_namespace: dnousome image_name: ccbr_logan_base -version: v0.3.1 +version: v0.3.2 container: "$(dockerhub_namespace)/$(image_name):$(version)" From 9cfbe5d9f634856d9fbb0a1336cfcf6471e6a2d0 Mon Sep 17 00:00:00 2001 From: Darryl Nousome Date: Wed, 8 Nov 2023 13:55:57 -0500 Subject: [PATCH 03/11] fix: SV fixes --- logan | 2 +- main.nf | 11 ++-- nextflow.config | 14 +++++- workflow/modules/qc.nf | 61 ++++++++++++++++++++--- workflow/modules/structural_variant.nf | 18 +++---- workflow/modules/variant_calling.nf | 2 +- workflow/modules/variant_calling_tonly.nf | 7 +-- workflow/modules/workflows.nf | 14 ++++-- workflow/modules/workflows_tonly.nf | 32 +++++++----- 9 files changed, 118 insertions(+), 43 deletions(-) diff --git a/logan b/logan index 5d3bafd..3b397ff 100755 --- a/logan +++ b/logan @@ -230,7 +230,7 @@ def main(): outswarmmut=args.output+"_"+time1+'.slurm' with open(outswarmmut, "a") as outfile: outfile.write(code+"\n") - sbatch_mut="sbatch --cpus-per-task=2 --mem=8g --time 10-00:00:00 --partition norm --output submit_"+time1+".log --error error_"+time1+".log --mail-type=BEGIN,TIME_LIMIT_90,END "+outswarmmut + sbatch_mut="sbatch --cpus-per-task=2 --mem=8g --time 10-00:00:00 --partition norm --output submit_"+time1+".log --error error_"+time1+".log --mail-type=BEGIN,END "+outswarmmut sbatch_out='kickoff_'+time1+'.sh' with open(sbatch_out, "a") as outfile: outfile.write(sbatch_mut+"\n") diff --git a/main.nf b/main.nf index ab67050..a277c5d 100644 --- a/main.nf +++ b/main.nf @@ -28,7 +28,7 @@ PIPE_BAMSV=params.PIPE_BAMCNV PIPE_BAMCNV=params.PIPE_BAMCNV PIPE_TONLY_BAMVC=params.PIPE_TONLY_BAMVC -PIPE_TONLY_BAMSV=params.PIPE_TONLY_BAMCNV +PIPE_TONLY_BAMSV=params.PIPE_TONLY_BAMSV PIPE_TONLY_BAMCNV=params.PIPE_TONLY_BAMCNV @@ -160,11 +160,16 @@ workflow { } if (PIPE_TONLY_BAMSV){ INPUT_TONLY_BAM() - SV_TONLY(INPUT_TONLY_BAM.out.bamwithsample,INPUT_TONLY_BAM.out.splitout,INPUT_TONLY_BAM.out.sample_sheet) + SV_TONLY(INPUT_TONLY_BAM.out.bamwithsample) } if (PIPE_TONLY_BAMCNV){ INPUT_TONLY_BAM() - CNV_TONLY(INPUT_TONLY_BAM.out.bamwithsample,INPUT_TONLY_BAM.out.splitout,INPUT_TONLY_BAM.out.sample_sheet) + if (params.genome == "mm10"){ + CNV_TONLY(INPUT_TONLY_BAM.out.bamwithsample) + }else if (params.genome== "hg38"){ + CNV_TONLY(INPUT_TONLY_BAM.out.bamwithsample) + + } } } diff --git a/nextflow.config b/nextflow.config index 15093e7..c03dcaa 100644 --- a/nextflow.config +++ b/nextflow.config @@ -52,6 +52,7 @@ params { PIPE_BAMVC_TONLY=null PIPE_TONLY_BAMVC=null + PIPE_TONLY_BAMSV=null PIPE_TONLY_BAMCNV=null PIPE_TONLY_QC=null @@ -76,7 +77,7 @@ profiles { withLabel: process_highcpu { container = 'docker://dnousome/ccbr_logan_base:v0.3.1' } - withLabel: proces_highmem { + withLabel: process_highmem { container = 'docker://dnousome/ccbr_logan_base:v0.3.1' } withLabel: process_somaticcaller { @@ -103,6 +104,9 @@ profiles { withName: 'annotvep_tn|annotvep_tonly' { container = 'docker://dnousome/ccbr_vcf2maf:v102.0.0' } + withName: 'octopus_tn|octopus_tonly' { + container = 'docker://dancooke/octopus:latest' + } } @@ -223,6 +227,12 @@ profiles { time= 12.h cpus=16 } + withName: 'octopus_tn|octopus_tonly' { + container = 'docker://dancooke/octopus:latest' + memory=54.GB + time=24.h + cpus=16 + } //Global Processes withLabel: process_low { container = 'docker://dnousome/ccbr_logan_base:v0.3.1' @@ -240,8 +250,8 @@ profiles { withLabel: process_highcpu { container = 'docker://dnousome/ccbr_logan_base:v0.3.1' memory = 54.GB - cpus = 16 time = 72.h + cpus = 16 } withLabel: process_highmem { container = 'docker://dnousome/ccbr_logan_base:v0.3.1' diff --git a/workflow/modules/qc.nf b/workflow/modules/qc.nf index 32b9039..e951cdf 100644 --- a/workflow/modules/qc.nf +++ b/workflow/modules/qc.nf @@ -525,7 +525,7 @@ process somalier_extract { """ } -process somalier_analysis { +process somalier_analysis_human { /* To estimate relatedness, Somalier uses extracted site information to compare across all samples. This step also runs the ancestry estimation @@ -535,14 +535,8 @@ process somalier_analysis { @Output: Separate tab-separated value (TSV) files with relatedness and ancestry outputs - ancestry_db = config['references']['SOMALIER']['ANCESTRY_DB'], - sites_vcf = config['references']['SOMALIER']['SITES_VCF'], - genomeFasta = config['references']['GENOME'], - script_path_gender = config['scripts']['genderPrediction'], - script_path_samples = config['scripts']['combineSamples'], - script_path_pca = config['scripts']['ancestry'], */ - label 'process_mid' + label 'process_low' publishDir("${outdir}/QC/somalier", mode: 'copy') @@ -596,6 +590,57 @@ process somalier_analysis { """ } +process somalier_analysis_mouse { + /* + To estimate relatedness, Somalier uses extracted site information to + compare across all samples. This step also runs the ancestry estimation + function in Somalier. + @Input: + Exracted sites in (binary) somalier format for ALL samples in the cohort + @Output: + Separate tab-separated value (TSV) files with relatedness and ancestry outputs + + */ + label 'process_low' + + publishDir("${outdir}/QC/somalier", mode: 'copy') + + input: + path(somalierin) + + output: + tuple path("relatedness.pairs.tsv"), + path("relatedness.samples.tsv"), + path("predicted.genders.tsv"), + path("predicted.pairs.tsv") + + script: + """ + echo "Estimating relatedness" + somalier relate \ + -o "relatedness" \ + $somalierin + + Rscript $SCRIPT_PATH_GENDER \ + relatedness.samples.tsv \ + predicted.genders.tsv + + Rscript $SCRIPT_PATH_SAMPLES \ + relatedness.pairs.tsv \ + predicted.pairs.tsv + + """ + + stub: + + """ + touch relatedness.pairs.tsv + touch relatedness.samples.tsv + touch predicted.genders.tsv + touch predicted.pairs.tsv + + """ +} process multiqc { diff --git a/workflow/modules/structural_variant.nf b/workflow/modules/structural_variant.nf index 13c6e1d..2f62e6c 100644 --- a/workflow/modules/structural_variant.nf +++ b/workflow/modules/structural_variant.nf @@ -149,8 +149,6 @@ process manta_tonly { output: tuple val(tumorname), - path("${tumor.simpleName}.diplodSV.vcf.gz"), - path("${tumor.simpleName}.somaticSV.vcf.gz"), path("${tumor.simpleName}.candidateSV.vcf.gz"), path("${tumor.simpleName}.candidateSmallIndels.vcf.gz"), path("${tumor.simpleName}.tumorSV.vcf.gz") @@ -198,10 +196,10 @@ process svaba_tonly { path("${tumor.simpleName}.contigs.bam"), path("${tumor.simpleName}.discordant.txt.gz"), path("${tumor.simpleName}.alignments.txt.gz"), - path("${tumor.simpleName}.svaba.somatic.indel.vcf"), - path("${tumor.simpleName}.svaba.somatic.sv.vcf"), - path("${tumor.simpleName}.svaba.unfiltered.somatic.indel.vcf"), - path("${tumor.simpleName}.svaba.unfiltered.somatic.sv.vcf"), + path("${tumor.simpleName}.svaba.indel.vcf"), + path("${tumor.simpleName}.svaba.sv.vcf"), + path("${tumor.simpleName}.svaba.unfiltered.indel.vcf"), + path("${tumor.simpleName}.svaba.unfiltered.sv.vcf"), path("${tumor.simpleName}.log") @@ -217,10 +215,10 @@ process svaba_tonly { touch "${tumor.simpleName}.contigs.bam" touch "${tumor.simpleName}.discordant.txt.gz" touch "${tumor.simpleName}.alignments.txt.gz" - touch "${tumor.simpleName}.svaba.somatic.indel.vcf" - touch "${tumor.simpleName}.svaba.somatic.sv.vcf" - touch "${tumor.simpleName}.svaba.unfiltered.somatic.indel.vcf" - touch "${tumor.simpleName}.svaba.unfiltered.somatic.sv.vcf" + touch "${tumor.simpleName}.svaba.indel.vcf" + touch "${tumor.simpleName}.svaba.sv.vcf" + touch "${tumor.simpleName}.svaba.unfiltered.indel.vcf" + touch "${tumor.simpleName}.svaba.unfiltered.sv.vcf" touch "${tumor.simpleName}.log" """ diff --git a/workflow/modules/variant_calling.nf b/workflow/modules/variant_calling.nf index aba8c61..b3c0b5f 100644 --- a/workflow/modules/variant_calling.nf +++ b/workflow/modules/variant_calling.nf @@ -385,7 +385,7 @@ process varscan_tn { } process octopus_tn { - label 'process_highcpu' + //label 'process_highcpu' Using separate docker for octopus input: tuple val(tumorname), path(tumor), path(tumorbai), val(normalname), path(normal), path(normalbai), path(bed) diff --git a/workflow/modules/variant_calling_tonly.nf b/workflow/modules/variant_calling_tonly.nf index 992f78d..ee90876 100644 --- a/workflow/modules/variant_calling_tonly.nf +++ b/workflow/modules/variant_calling_tonly.nf @@ -17,6 +17,7 @@ outdir=file(params.output) process pileup_paired_tonly { label 'process_highmem' + input: tuple val(tumorname), path(tumor), path(tumorbai), path(bed) @@ -211,7 +212,7 @@ process mutect2filter_tonly { --output ${sample}.tonly.mut2.final.vcf.gz bcftools sort ${sample}.tonly.mut2.final.vcf.gz -@ $task.cpus -Oz |\ - bcftools norm --threads 16 --check-ref s -f $GENOMEREF -O v |\ + bcftools norm --threads $task.cpus --check-ref s -f $GENOMEREF -O v |\ awk '{{gsub(/\\y[W|K|Y|R|S|M]\\y/,"N",\$4); OFS = "\t"; print}}' |\ sed '/^\$/d' > ${sample}.tonly.mut2.norm.vcf.gz @@ -300,7 +301,7 @@ process vardict_tonly { process octopus_tonly { - label 'process_highcpu' + //label 'process_highcpu' input: tuple val(tumorname), path(tumor), path(tumorbai), path(bed) @@ -314,7 +315,7 @@ process octopus_tonly { """ octopus -R $GENOMEREF -C cancer -I ${tumor} \ --annotations AC AD DP -t ${bed} \ - --somatic-forest $SOMATIC_FOREST \ + $SOMATIC_FOREST \ -o ${tumorname}_${bed.simpleName}.octopus.vcf --threads $task.cpus """ diff --git a/workflow/modules/workflows.nf b/workflow/modules/workflows.nf index dfb52f4..4e8549e 100644 --- a/workflow/modules/workflows.nf +++ b/workflow/modules/workflows.nf @@ -6,7 +6,8 @@ include {fc_lane; fastq_screen;kraken;qualimap_bamqc;fastqc; samtools_flagstats;vcftools;collectvariantcallmetrics; bcftools_stats;gatk_varianteval; snpeff; - somalier_extract;somalier_analysis;multiqc} from './qc.nf' + somalier_extract;somalier_analysis_human;somalier_analysis_mouse; + multiqc} from './qc.nf' include {fastp; bwamem2; //indelrealign; bqsr; gatherbqsr; applybqsr; samtoolsindex} from './trim_align.nf' @@ -391,7 +392,6 @@ workflow QC_NOGL { //Somalier somalier_extract(applybqsr) som_in=somalier_extract.out.collect() - somalier_analysis(som_in) //Prep for MultiQC input fclane_out=fc_lane.out.map{samplename,info->info}.collect() @@ -402,7 +402,15 @@ workflow QC_NOGL { fastqc_out=fastqc.out.map{samplename,html,zip->tuple(html,zip)}.collect() samtools_flagstats_out=samtools_flagstats.out.collect() - somalier_analysis_out=somalier_analysis.out.collect() + + if(params.genome=="hg38"){ + somalier_analysis_human(som_in) + somalier_analysis_out=somalier_analysis_human.out.collect() + } + else if(params.genome=="mm10"){ + somalier_analysis_mouse(som_in) + somalier_analysis_out=somalier_analysis_mouse.out.collect() + } conall=fclane_out.concat(fqs_out,kraken_out,qualimap_out,samtools_flagstats_out, somalier_analysis_out).flatten().toList() diff --git a/workflow/modules/workflows_tonly.nf b/workflow/modules/workflows_tonly.nf index fd513d5..1df25aa 100644 --- a/workflow/modules/workflows_tonly.nf +++ b/workflow/modules/workflows_tonly.nf @@ -6,7 +6,8 @@ include {fc_lane; fastq_screen;kraken;qualimap_bamqc; samtools_flagstats;vcftools;collectvariantcallmetrics; bcftools_stats;gatk_varianteval; snpeff;fastqc; - somalier_extract;somalier_analysis;multiqc} from './qc.nf' + somalier_extract;somalier_analysis_human;somalier_analysis_mouse; + multiqc} from './qc.nf' include {deepvariant_step1;deepvariant_step2;deepvariant_step3; deepvariant_combined;glnexus} from './germline.nf' @@ -217,10 +218,11 @@ workflow SV_TONLY { //Manta manta_out=manta_tonly(bamwithsample) - .map{tumor,gsv,so_sv,unfil_sv,unfil_indel,tumorSV -> - tuple(tumor,so_sv,"manta")} + .map{tumor, sv, indel, tumorsv -> + tuple(tumor,tumorsv,"manta")} annotsv_manta_tonly(manta_out).ifEmpty("Empty SV input--No SV annotated") + //Delly } workflow CNV_TONLY { @@ -257,7 +259,14 @@ workflow QC_TONLY { somalier_extract(bqsrout) som_in=somalier_extract.out.collect() - somalier_analysis(som_in) + if(params.genome=="hg38"){ + somalier_analysis_human(som_in) + somalier_analysis_out=somalier_analysis_human.out.collect() + } + else if(params.genome=="mm10"){ + somalier_analysis_mouse(som_in) + somalier_analysis_out=somalier_analysis_mouse.out.collect() + } //Prep for MultiQC input fclane_out=fc_lane.out.map{samplename,info->info}.collect() @@ -268,7 +277,8 @@ workflow QC_TONLY { fastqc_out=fastqc.out.map{samplename,html,zip->tuple(html,zip)}.collect() samtools_flagstats_out=samtools_flagstats.out.collect() - somalier_analysis_out=somalier_analysis.out.collect() + + conall=fclane_out.concat(fqs_out,kraken_out,qualimap_out,fastqc_out, samtools_flagstats_out, @@ -285,23 +295,22 @@ workflow QC_TONLY { //Variant Calling from BAM only workflow INPUT_TONLY_BAM { main: - //Either BAM Input or File sheet input if(params.bam_input){ baminputonly=Channel.fromPath(params.bam_input) - .map{it-> tuple(it.simpleName,it,file("${it}.bai"))} sample_sheet=baminputonly.map{samplename,bam,bai -> tuple ( samplename)}.view() + }else if(params.file_input) { baminputonly=Channel.fromPath(params.file_input) .splitCsv(header: false, sep: "\t", strip:true) - .map{ sample,bam -> - tuple(sample, file(bam),file("${bam}.bai")) + .map{ sample,bam,bai -> + tuple(sample, file(bam),file(bai)) } + sample_sheet=baminputonly.map{samplename,bam,bai -> tuple ( - samplename)}.view() - + samplename)} } splitinterval(intervalbedin) @@ -313,6 +322,5 @@ workflow INPUT_TONLY_BAM { splitout=splitinterval.out sample_sheet - } From de2398147d7eb524f530b2d710b20a5c30f96977 Mon Sep 17 00:00:00 2001 From: Darryl Nousome Date: Mon, 13 Nov 2023 16:23:35 -0500 Subject: [PATCH 04/11] fix: cnv freec changes to match wg --- workflow/modules/copynumber.nf | 32 ++++++++++++--------- workflow/modules/structural_variant.nf | 39 ++++++++++++++++++++++++++ workflow/scripts/assess_significance.R | 29 +++++++++---------- workflow/scripts/make_freec_genome.pl | 2 +- 4 files changed, 72 insertions(+), 30 deletions(-) diff --git a/workflow/modules/copynumber.nf b/workflow/modules/copynumber.nf index 318fc5b..d6f5d43 100644 --- a/workflow/modules/copynumber.nf +++ b/workflow/modules/copynumber.nf @@ -143,7 +143,7 @@ process freec_paired { $FREECCHROMS \ ${tumor} \ ${normal} \ - $FREECPILE \ + $FREECPILEUP \ $GENOMEREF \ $FREECSNPS \ $FREECTARGETS @@ -152,8 +152,8 @@ process freec_paired { cat $FREECSIGNIFICANCE | \ R --slave \ - --args ${tumorname}.bam_CNVs \ - ${tumorname}.bam_ratio.txt + --args ${tumor}_CNVs \ + ${tumor}_ratio.txt cat $FREECPLOT | \ R --slave \ @@ -167,13 +167,16 @@ process freec_paired { """ touch ${tumorname}_vs_${normalname}.bam_CNVs.p.value.txt touch ${tumorname}_vs_${normalname}.bam_ratio.txt - touch ${tumorname}_vs_${normalname}.bam_BAF.txt + touch ${tumorname}_vs_${normalname}.bam_BAF.txt + touch ${tumorname}_vs_${normalname}.bam_ratio.txt.log2.png + touch ${tumorname}_vs_${normalname}.bam_ratio.txt.png + """ } process freec { - label 'process_highcpu' + label 'process_mid' publishDir("${outdir}/cnv/freec", mode: 'copy') input: @@ -186,7 +189,7 @@ process freec { $FREECLENGTHS \ $FREECCHROMS \ ${tumor} \ - $FREECPILE \ + $FREECPILEUP \ $GENOMEREF \ $FREECSNPS \ $FREECTARGETS @@ -195,22 +198,25 @@ process freec { cat $FREECSIGNIFICANCE | \ R --slave \ - --args ${tumorname}.bam_CNVs \ - ${tumorname}.bam_ratio.txt + --args ${tumor}_CNVs \ + ${tumor}_ratio.txt cat $FREECPLOT | \ R --slave \ --args 2 \ - ${tumorname}.bam_ratio.txt \ - ${tumorname}.bam_BAF.txt + ${tumor}_ratio.txt \ + ${tumor}_BAF.txt """ stub: """ - touch ${tumorname}.bam_CNVs.p.value.txt - touch ${tumorname}.bam_ratio.txt - touch ${tumorname}.bam_BAF.txt + touch ${tumor}_CNVs.p.value.txt + touch ${tumor}_ratio.txt + touch ${tumor}_BAF.txt + touch ${tumor}_ratio.txt.log2.png + touch ${tumor}_ratio.txt.png + """ } diff --git a/workflow/modules/structural_variant.nf b/workflow/modules/structural_variant.nf index 2f62e6c..c8327cf 100644 --- a/workflow/modules/structural_variant.nf +++ b/workflow/modules/structural_variant.nf @@ -224,3 +224,42 @@ process svaba_tonly { """ } + + + + +process annotsv_tonly { + //AnnotSV for Manta/Svaba works with either vcf.gz or .vcf files + //Requires bedtools,bcftools + + module = ['annotsv/3.3.1'] + publishDir(path: "${outdir}/SVtonly/annotated", mode: 'copy') + + input: + tuple val(tumorname), path(somaticvcf), val(sv) + + output: + tuple val(tumorname), + path("${sv}/${tumorname}.tsv"), + path("${sv}/${tumorname}.unannotated.tsv") + + + script: + """ + mkdir ${sv} + + AnnotSV -SVinputFile ${somaticvcf} \ + -genomeBuild $GENOME \ + -SVinputInfo 1 -outputFile ${tumorname} \ + -outputDir ${sv} + + """ + + stub: + """ + mkdir ${sv} + + touch "${sv}/${tumorname}.tsv" + touch "${sv}/${tumorname}.unannotated.tsv" + """ +} \ No newline at end of file diff --git a/workflow/scripts/assess_significance.R b/workflow/scripts/assess_significance.R index 4193d97..7f7216c 100644 --- a/workflow/scripts/assess_significance.R +++ b/workflow/scripts/assess_significance.R @@ -34,27 +34,24 @@ normals <- subsetByOverlaps(ratio.bed,normals) #qqline(log(score(normals))[which(!is.na(score(normals)))], col = 2) numberOfCol=length(cnvs) - +wscore=c() +kscore=c() for (i in c(1:length(cnvs[,1]))) { - values <- score(subsetByOverlaps(ratio.bed,cnvs.bed[i])) - #wilcox.test(values,mu=mu) - W <- function(values,normals){resultw <- try(wilcox.test(values,score(normals)), silent = TRUE) - if(class(resultw)=="try-error") return(list("statistic"=NA,"parameter"=NA,"p.value"=NA,"null.value"=NA,"alternative"=NA,"method"=NA,"data.name"=NA)) else resultw} - KS <- function(values,normals){resultks <- try(ks.test(values,score(normals)), silent = TRUE) - if(class(resultks)=="try-error") return(list("statistic"=NA,"p.value"=NA,"alternative"=NA,"method"=NA,"data.name"=NA)) else resultks} - #resultks <- try(KS <- ks.test(values,score(normals)), silent = TRUE) - # if(class(resultks)=="try-error") NA) else resultks - cnvs[i,numberOfCol+1]=W(values,normals)$p.value - cnvs[i,numberOfCol+2]=KS(values,normals)$p.value - } - -if (numberOfCol==5) { - names(cnvs)=c("chr","start","end","copy number","status","WilcoxonRankSumTestPvalue","KolmogorovSmirnovPvalue") +values <- score(subsetByOverlaps(ratio.bed,cnvs.bed[i])) +resultw <- class(try(wilcox.test(values,score(normals)), silent = TRUE)) +ifelse(resultw == "try-error", wscore <- c(wscore, "NA"), wscore <- c(wscore, wilcox.test(values,score(normals))$p.value)) +resultks <- class(try(ks.test(values,score(normals)), silent = TRUE)) +ifelse(resultks == "try-error",kscore <- c(kscore, "NA"),kscore <- c(kscore, ks.test(values,score(normals))$p.value)) } +cnvs = cbind(cnvs, as.numeric(wscore), as.numeric(kscore)) + if (numberOfCol==7) { - names(cnvs)=c("chr","start","end","copy number","status","genotype","uncertainty","WilcoxonRankSumTestPvalue","KolmogorovSmirnovPvalue") + names(cnvs)=c("chr","start","end","copy number","status","WilcoxonRankSumTestPvalue","KolmogorovSmirnovPvalue") } if (numberOfCol==9) { + names(cnvs)=c("chr","start","end","copy number","status","genotype","uncertainty","WilcoxonRankSumTestPvalue","KolmogorovSmirnovPvalue") +} +if (numberOfCol==11) { names(cnvs)=c("chr","start","end","copy number","status","genotype","uncertainty","somatic/germline","precentageOfGermline","WilcoxonRankSumTestPvalue","KolmogorovSmirnovPvalue") } write.table(cnvs, file=paste(args[4],".p.value.txt",sep=""),sep="\t",quote=F,row.names=F) diff --git a/workflow/scripts/make_freec_genome.pl b/workflow/scripts/make_freec_genome.pl index 165fb11..28aec4c 100644 --- a/workflow/scripts/make_freec_genome.pl +++ b/workflow/scripts/make_freec_genome.pl @@ -23,7 +23,7 @@ print C "chrLenFile = $chrLenFile\n"; print C "ploidy = 2,3,4\nbreakPointThreshold = 0.8\nwindow = 50000\n"; print C "chrFiles = $chrFiles\n"; -print C "minimalSubclonePresence = 20\nmaxThreads = 8\n"; +print C "minimalSubclonePresence = 20\nmaxThreads = 4\n"; print C "outputDir = $ARGV[0]\n\n"; print C '[sample]' . "\n\n"; From e2d8c6b232f4f8c728c7e293f1fc40cc80688eac Mon Sep 17 00:00:00 2001 From: Darryl Nousome Date: Mon, 13 Nov 2023 16:23:50 -0500 Subject: [PATCH 05/11] fix: allow combine variants --- logan | 19 +++-- main.nf | 16 ++--- nextflow.config | 3 +- workflow/modules/variant_calling.nf | 86 +++++++++++++++++++++-- workflow/modules/variant_calling_tonly.nf | 16 +++-- workflow/modules/workflows.nf | 44 ++++++++---- workflow/modules/workflows_tonly.nf | 39 ++++++---- 7 files changed, 170 insertions(+), 53 deletions(-) diff --git a/logan b/logan index 3b397ff..19780bc 100755 --- a/logan +++ b/logan @@ -78,16 +78,22 @@ def main(): elif args.file_input: in1="--file_input "+args.file_input elif args.bam: - in1="--bam "+args.bam + in1="--bam_input '"+args.bam+"'" baminput=True else: print("Missing sample sheet for paired mode or you would like Tumor only mode?") alignmode='--PIPE_ALIGN' - if args.vc: + if args.vc and baminput: + vcmode="--PIPE_BAMVC" + elif args.vc: vcmode="--PIPE_VC" - if args.sv: + if args.sv and baminput: + svmode="--PIPE_BAMSV" + elif args.sv: svmode="--PIPE_SV" - if args.cnv: + if args.cnv and baminput: + cnvmode="--PIPE_BAMCNV" + elif args.cnv: cnvmode="--PIPE_CNV" if args.germline: germmode="--PIPE_GERMLINE" @@ -202,7 +208,10 @@ def main(): time1=time.strftime("%Y_%m_%d_%H%M") stubbase=" -stub -without-podman T -without-conda -without-docker" if args.stub: - cmd1_stub=cmd1 + stubbase + if not 'baminput' in locals(): + cmd1_stub=cmd1 + stubbase + else: + cmd1_stub="" if (args.vc): cmd2_stub=cmd2 + stubbase else: diff --git a/main.nf b/main.nf index a277c5d..9edcaf2 100644 --- a/main.nf +++ b/main.nf @@ -33,7 +33,7 @@ PIPE_TONLY_BAMCNV=params.PIPE_TONLY_BAMCNV include {INPUT; ALIGN; GL; - VC; INPUT_BAMVC; SV; CNVmm10; CNVhg38; + VC; INPUT_BAM; SV; CNVmm10; CNVhg38; QC_GL; QC_NOGL} from "./workflow/modules/workflows.nf" @@ -103,19 +103,19 @@ workflow { } } if (PIPE_BAMVC){ - INPUT_BAMVC() - VC(INPUT_BAMVC.out.bamwithsample,INPUT_BAMVC.out.splitout,INPUT_BAMVC.out.sample_sheet) + INPUT_BAM() + VC(INPUT_BAM.out.bamwithsample,INPUT_BAM.out.splitout,INPUT_BAM.out.sample_sheet) } if (PIPE_BAMSV){ - INPUT_BAMVC() - SV(INPUT_BAMVC.out.bamwithsample) + INPUT_BAM() + SV(INPUT_BAM.out.bamwithsample) } if (PIPE_BAMCNV){ - INPUT_BAMVC() + INPUT_BAM() if (params.genome == "mm10"){ - CNVmm10(INPUT_BAMVC.out.bamwithsample) + CNVmm10(INPUT_BAM.out.bamwithsample) } else if (params.genome== "hg38"){ - CNVhg38(INPUT_BAMVC.out.bamwithsample) + CNVhg38(INPUT_BAM.out.bamwithsample) } } diff --git a/nextflow.config b/nextflow.config index c03dcaa..2f094e7 100644 --- a/nextflow.config +++ b/nextflow.config @@ -229,7 +229,7 @@ profiles { } withName: 'octopus_tn|octopus_tonly' { container = 'docker://dancooke/octopus:latest' - memory=54.GB + memory=70.GB time=24.h cpus=16 } @@ -239,7 +239,6 @@ profiles { memory = 16.GB time = 12.h cpus = 2 - } withLabel: process_mid { container = 'docker://dnousome/ccbr_logan_base:v0.3.1' diff --git a/workflow/modules/variant_calling.nf b/workflow/modules/variant_calling.nf index b3c0b5f..b7d2518 100644 --- a/workflow/modules/variant_calling.nf +++ b/workflow/modules/variant_calling.nf @@ -227,7 +227,8 @@ process mutect2filter { tuple val(sample), path(mutvcfs), path(stats), path(obs), path(pileups), path(normal_pileups),path(tumorcontamination),path(normalcontamination) output: tuple val(sample), path("${sample}.mut2.marked.vcf.gz"), - path("${sample}.mut2.norm.vcf.gz"), path("${sample}.marked.vcf.gz.filteringStats.tsv") + path("${sample}.mut2.norm.vcf.gz"), + path("${sample}.mut2.marked.vcf.gz.filteringStats.tsv") script: //Include the stats and concat ${mutvcfs} -Oz -o ${sample}.concat.vcf.gz @@ -252,17 +253,18 @@ process mutect2filter { --exclude-filtered \ --output ${sample}.mut2.final.vcf.gz - bcftools sort ${sample}.mut2.final.vcf.gz -@ $task.cpus -Oz |\ + bcftools sort ${sample}.mut2.final.vcf.gz |\ bcftools norm --threads $task.cpus --check-ref s -f $GENOMEREF -O v |\ awk '{{gsub(/\\y[W|K|Y|R|S|M]\\y/,"N",\$4); OFS = "\\t"; print}}' |\ - sed '/^\$/d' > ${sample}.mut2.norm.vcf.gz + sed '/^\$/d' > ${sample}.mut2.norm.vcf |\ + bcftools view - -Oz -o ${sample}.mut2.norm.vcf.gz """ stub: """ touch ${sample}.mut2.marked.vcf.gz touch ${sample}.mut2.norm.vcf.gz - touch ${sample}.marked.vcf.gz.filteringStats.tsv + touch ${sample}.mut2.marked.vcf.gz.filteringStats.tsv """ @@ -388,8 +390,10 @@ process octopus_tn { //label 'process_highcpu' Using separate docker for octopus input: - tuple val(tumorname), path(tumor), path(tumorbai), val(normalname), path(normal), path(normalbai), path(bed) + tuple val(tumorname), path(tumor), path(tumorbai), + val(normalname), path(normal), path(normalbai), path(bed) + output: tuple val(tumorname), path("${tumorname}_vs_${normalname}_${bed.simpleName}.octopus.vcf") @@ -398,6 +402,7 @@ process octopus_tn { """ octopus -R $GENOMEREF -I ${normal} ${tumor} --normal-sample ${normalname} \ + -C cancer \ --annotations AC AD DP -t ${bed} \ --threads $task.cpus \ $GERMLINE_FOREST \ @@ -431,7 +436,7 @@ process combineVariants { """ mkdir ${vc} gatk --java-options "-Xmx48g" MergeVcfs \ - -O ${sample}.${vc}.temp.vcf.gz\ + -O ${sample}.${vc}.temp.vcf.gz \ -D $GENOMEDICT \ -I $vcfin bcftools sort ${sample}.${vc}.temp.vcf.gz -Oz -o ${sample}.${vc}.marked.vcf.gz @@ -456,6 +461,75 @@ process combineVariants { } + +process bcftools_index_octopus { + label 'process_low' + + input: + tuple val(sample), + path(vcf) + + output: + tuple val(sample), + path(vcf), + path("${vcf}.tbi") + + script: + """ + bcftools index -t ${vcf} + """ + + stub: + """ + touch ${vcf} + touch ${vcf}.tbi + """ + +} + +process combineVariants_octopus { + label 'process_highmem' + publishDir(path: "${outdir}/vcfs/", mode: 'copy') + + input: + tuple val(sample), path(vcfs), path(vcfsindex), val(vc) + + output: + tuple val(sample), + path("${vc}/${sample}.${vc}.marked.vcf.gz"), path("${vc}/${sample}.${vc}.norm.vcf.gz") + + script: + vcfin = vcfs.join(" ") + + """ + mkdir ${vc} + bcftools concat $vcfin -a -Oz -o ${sample}.${vc}.temp.vcf.gz + bcftools sort ${sample}.${vc}.temp.vcf.gz -Oz -o ${sample}.${vc}.marked.vcf.gz + bcftools norm ${sample}.${vc}.marked.vcf.gz --threads $task.cpus --check-ref s -f $GENOMEREF -O v |\ + awk '{{gsub(/\\y[W|K|Y|R|S|M]\\y/,"N",\$4); OFS = "\\t"; print}}' |\ + sed '/^\$/d' > ${sample}.${vc}.temp.vcf + + bcftools view ${sample}.${vc}.temp.vcf -f PASS -Oz -o ${vc}/${sample}.${vc}.norm.vcf.gz + + mv ${sample}.${vc}.marked.vcf.gz ${vc} + """ + + stub: + + """ + mkdir ${vc} + touch ${vc}/${sample}.${vc}.marked.vcf.gz + touch ${vc}/${sample}.${vc}.norm.vcf.gz + + """ + +} + + + + + + process combineVariants_strelka { //Concat all somatic snvs/indels across all files, strelka separates snv/indels label 'process_mid' diff --git a/workflow/modules/variant_calling_tonly.nf b/workflow/modules/variant_calling_tonly.nf index ee90876..33d5009 100644 --- a/workflow/modules/variant_calling_tonly.nf +++ b/workflow/modules/variant_calling_tonly.nf @@ -187,7 +187,8 @@ process mutect2filter_tonly { tuple val(sample), path(mutvcfs), path(stats), path(obs), path(pileups),path(tumorcontamination) output: tuple val(sample), path("${sample}.tonly.mut2.marked.vcf.gz"), - path("${sample}.tonly.mut2.norm.vcf.gz"), path("${sample}.tonly.marked.vcf.gz.filteringStats.tsv") + path("${sample}.tonly.mut2.norm.vcf.gz"), + path("${sample}.tonly.mut2.marked.vcf.gz.filteringStats.tsv") script: //Include the stats and concat ${mutvcfs} -Oz -o ${sample}.concat.vcf.gz @@ -211,10 +212,11 @@ process mutect2filter_tonly { --exclude-filtered \ --output ${sample}.tonly.mut2.final.vcf.gz - bcftools sort ${sample}.tonly.mut2.final.vcf.gz -@ $task.cpus -Oz |\ + bcftools sort ${sample}.tonly.mut2.final.vcf.gz |\ bcftools norm --threads $task.cpus --check-ref s -f $GENOMEREF -O v |\ awk '{{gsub(/\\y[W|K|Y|R|S|M]\\y/,"N",\$4); OFS = "\t"; print}}' |\ - sed '/^\$/d' > ${sample}.tonly.mut2.norm.vcf.gz + sed '/^\$/d' |\ + bcftools view - -Oz -o ${sample}.tonly.mut2.norm.vcf.gz """ @@ -222,7 +224,7 @@ process mutect2filter_tonly { """ touch ${sample}.tonly.mut2.marked.vcf.gz touch ${sample}.tonly.mut2.norm.vcf.gz - touch ${sample}.tonly.marked.vcf.gz.filteringStats.tsv + touch ${sample}.tonly.mut2.marked.vcf.gz.filteringStats.tsv """ } @@ -308,7 +310,7 @@ process octopus_tonly { output: tuple val(tumorname), - path("${tumorname}_${bed.simpleName}.octopus.vcf") + path("${tumorname}_${bed.simpleName}.octopus.vcf.gz") script: @@ -316,14 +318,14 @@ process octopus_tonly { octopus -R $GENOMEREF -C cancer -I ${tumor} \ --annotations AC AD DP -t ${bed} \ $SOMATIC_FOREST \ - -o ${tumorname}_${bed.simpleName}.octopus.vcf --threads $task.cpus + -o ${tumorname}_${bed.simpleName}.octopus.vcf.gz --threads $task.cpus """ stub: """ - touch ${tumorname}_${bed.simpleName}.octopus.vcf + touch ${tumorname}_${bed.simpleName}.octopus.vcf.gz """ } diff --git a/workflow/modules/workflows.nf b/workflow/modules/workflows.nf index 4e8549e..1556045 100644 --- a/workflow/modules/workflows.nf +++ b/workflow/modules/workflows.nf @@ -18,10 +18,10 @@ include {deepvariant_step1;deepvariant_step2;deepvariant_step3; include {mutect2; mutect2filter; pileup_paired_t; pileup_paired_n; contamination_paired; learnreadorientationmodel;mergemut2stats; strelka_tn; combineVariants_strelka; - varscan_tn; vardict_tn; octopus_tn; + varscan_tn; vardict_tn; octopus_tn; bcftools_index_octopus; bcftools_index_octopus as bcftools_index_octopus_tonly; combineVariants as combineVariants_vardict; combineVariants as combineVariants_varscan; combineVariants as combineVariants_vardict_tonly; combineVariants as combineVariants_varscan_tonly; - combineVariants as combineVariants_octopus; combineVariants as combineVariants_octopus_tonly; + combineVariants_octopus; combineVariants_octopus as combineVariants_octopus_tonly; annotvep_tn as annotvep_tn_mut2; annotvep_tn as annotvep_tn_strelka; annotvep_tn as annotvep_tn_varscan; annotvep_tn as annotvep_tn_vardict; annotvep_tn as annotvep_tn_octopus; combinemafs_tn} from './variant_calling.nf' @@ -275,19 +275,22 @@ workflow VC { .map{tumor,marked,normvcf,normal ->tuple(tumor,"varscan_tonly",normvcf)} | annotvep_tonly_varscan //Octopus_TN - octopus_comb=octopus_tn(bambyinterval).groupTuple().map{tumor,vcf-> tuple(tumor,vcf,"octopus")} | combineVariants_octopus + octopus_comb=octopus_tn(bambyinterval) | bcftools_index_octopus + octopus_comb.groupTuple().map{tumor,vcf,vcfindex-> tuple(tumor,vcf,vcfindex,"octopus")} | combineVariants_octopus octopus_comb.join(sample_sheet) - .map{tumor,marked,normvcf,normal ->tuple(tumor,normal,"varscan",normvcf)} | annotvep_tn_octopus + .map{tumor,marked,normvcf,normal ->tuple(tumor,normal,"octopus",normvcf)} | annotvep_tn_octopus - //Octopus_tonly - octopus_tonly_comb=bambyinterval.map{tumor,bam,bai,normal,nbam,nbai,bed-> - tuple(tumor,bam,bai,bed)} | octopus_tonly - octopus_tonly_comb1=octopus_tonly_comb.groupTuple().map{tumor,vcf-> tuple(tumor,vcf,"octopus_tonly")} | combineVariants_octopus_tonly - octopus_tonly_comb1.join(sample_sheet) + //Octopus_tonly + octopus_tonly_out=bambyinterval.map{tumor,bam,bai,normal,nbam,nbai,bed-> + tuple(tumor,bam,bai,bed)} | octopus_tonly | bcftools_index_octopus_tonly + octopus_tonly_comb=octopus_tonly_out.groupTuple().map{tumor,vcf,vcfindex-> tuple(tumor,vcf,vcfindex,"octopus_tonly")} | combineVariants_octopus_tonly + + octopus_tonly_comb.join(sample_sheet) .map{tumor,marked,normvcf,normal ->tuple(tumor,"octopus_tonly",normvcf)} | annotvep_tonly_octopus + //Combine All Variants Using VCF and Then Reannotate //annotvep_tn_mut2.out.concat(annotvep_tn_strelka.out).concat(annotvep_tn_vardict.out).concat(annotvep_tn_varscan.out) | combinemafs_tn //annotvep_tonly_mut2.out.concat(annotvep_tonly_vardict.out).concat(annotvep_tonly_varscan.out) | combinemafs_tonly @@ -471,7 +474,7 @@ workflow QC_GL { //Variant Calling from BAM only -workflow INPUT_BAMVC { +workflow INPUT_BAM { if(params.sample_sheet){ sample_sheet=Channel.fromPath(params.sample_sheet, checkIfExists: true) @@ -486,13 +489,28 @@ workflow INPUT_BAMVC { //Either BAM Input or File sheet input if(params.bam_input){ - baminputonly=Channel.fromPath(params.bam_input) + //Check if Index is .bai or .bam.bai + bambai=params.bam_input +".bai" + baionly = bambai.replace(".bam", "") + bamcheck1=file(bambai) + bamcheck2=file(baionly) + + if (bamcheck1.size()>0){ + baminputonly=Channel.fromPath(params.bam_input) .map{it-> tuple(it.simpleName,it,file("${it}.bai"))} + } + if (bamcheck2.size()>0){ + bai=Channel.from(bamcheck2).map{it -> tuple(it.simpleName,it)}.view() + baminputonly=Channel.fromPath(params.bam_input) + .map{it-> tuple(it.simpleName,it)} + .join(bai) + } + }else if(params.file_input) { baminputonly=Channel.fromPath(params.file_input) .splitCsv(header: false, sep: "\t", strip:true) - .map{ sample,bam -> - tuple(sample, file(bam),file("${bam}.bai")) + .map{ sample,bam,bai -> + tuple(sample, file(bam),file(bai)) } } diff --git a/workflow/modules/workflows_tonly.nf b/workflow/modules/workflows_tonly.nf index 1df25aa..cc204b5 100644 --- a/workflow/modules/workflows_tonly.nf +++ b/workflow/modules/workflows_tonly.nf @@ -16,15 +16,17 @@ include {fastp; bwamem2; bqsr; gatherbqsr; applybqsr; samtoolsindex} from './trim_align.nf' include {mutect2; mutect2filter; pileup_paired_t; pileup_paired_n; - contamination_paired; learnreadorientationmodel;mergemut2stats; + bcftools_index_octopus; + contamination_paired; learnreadorientationmodel; mergemut2stats; combineVariants as combineVariants_vardict; combineVariants as combineVariants_varscan; combineVariants as combineVariants_vardict_tonly; combineVariants as combineVariants_varscan_tonly; - combineVariants as combineVariants_octopus_tonly; + combineVariants_octopus ; annotvep_tn as annotvep_tn_mut2; annotvep_tn as annotvep_tn_strelka; annotvep_tn as annotvep_tn_varscan; annotvep_tn as annotvep_tn_vardict; combinemafs_tn} from './variant_calling.nf' include {mutect2_t_tonly; mutect2filter_tonly; pileup_paired_tonly; - varscan_tonly; vardict_tonly; octopus_tonly; + varscan_tonly; vardict_tonly; + octopus_tonly; contamination_tumoronly; learnreadorientationmodel_tonly; mergemut2stats_tonly; @@ -32,7 +34,7 @@ include {mutect2_t_tonly; mutect2filter_tonly; pileup_paired_tonly; annotvep_tonly as annotvep_tonly_mut2; annotvep_tonly as annotvep_tonly_octopus; combinemafs_tonly} from './variant_calling_tonly.nf' -include {manta_tonly; svaba_tonly; annotsv_tn as annotsv_manta_tonly; annotsv_tn as annotsv_svaba_tonly} from './structural_variant.nf' +include {manta_tonly; svaba_tonly; annotsv_tonly as annotsv_manta_tonly; annotsv_tonly as annotsv_svaba_tonly} from './structural_variant.nf' include {freec; purple } from './copynumber.nf' @@ -172,8 +174,7 @@ workflow VC_TONLY { mutect2filter_tonly(mut2tonly_filter) - //##VCF2MAF TO - + //vcf2maf Annotate mutect2filter_tonly.out .join(sample_sheet) .map{tumor,markedvcf,finalvcf,stats -> tuple(tumor,"mutect2",finalvcf)} | annotvep_tonly_mut2 @@ -190,10 +191,10 @@ workflow VC_TONLY { varscan_tonly_comb.join(sample_sheet) .map{tumor,marked,normvcf ->tuple(tumor,"varscan_tonly",normvcf)} | annotvep_tonly_varscan - //Octopus_tonly + //Octopus_tonly octopus_tonly_comb=bambyinterval.map{tumor,bam,bai,bed-> - tuple(tumor,bam,bai,bed)} | octopus_tonly - octopus_tonly_comb1=octopus_tonly_comb.groupTuple().map{tumor,vcf-> tuple(tumor,vcf,"octopus_tonly")} | combineVariants_octopus_tonly + tuple(tumor,bam,bai,bed)} | octopus_tonly | bcftools_index_octopus + octopus_tonly_comb1=octopus_tonly_comb.groupTuple().map{tumor,vcf,vcfindex-> tuple(tumor,vcf,vcfindex, "octopus_tonly")} | combineVariants_octopus octopus_tonly_comb1.join(sample_sheet) .map{tumor,marked,normvcf ->tuple(tumor,"octopus_tonly",normvcf)} | annotvep_tonly_octopus @@ -297,10 +298,24 @@ workflow INPUT_TONLY_BAM { main: //Either BAM Input or File sheet input if(params.bam_input){ - baminputonly=Channel.fromPath(params.bam_input) - + bambai=params.bam_input +".bai" + baionly = bambai.replace(".bam", "") + bamcheck1=file(bambai) + bamcheck2=file(baionly) + + if (bamcheck1.size()>0){ + baminputonly=Channel.fromPath(params.bam_input) + .map{it-> tuple(it.simpleName,it,file("${it}.bai"))} + } + if (bamcheck2.size()>0){ + bai=Channel.from(bamcheck2).map{it -> tuple(it.simpleName,it)}.view() + baminputonly=Channel.fromPath(params.bam_input) + .map{it-> tuple(it.simpleName,it)} + .join(bai) + } + sample_sheet=baminputonly.map{samplename,bam,bai -> tuple ( - samplename)}.view() + samplename)} }else if(params.file_input) { baminputonly=Channel.fromPath(params.file_input) From d4ce161dee3f00d65a36f6abe0d2d9bde1c9b9be Mon Sep 17 00:00:00 2001 From: Darryl Nousome Date: Tue, 14 Nov 2023 14:15:04 -0500 Subject: [PATCH 06/11] feat: additional callers --- logan | 10 +- main.nf | 26 ++--- workflow/modules/structural_variant.nf | 58 ++++++++++- workflow/modules/variant_calling.nf | 78 +++++++++++++++ workflow/modules/workflows.nf | 130 ++++++++++++++++--------- workflow/modules/workflows_tonly.nf | 71 ++++++++++---- 6 files changed, 287 insertions(+), 86 deletions(-) diff --git a/logan b/logan index 19780bc..ea0da83 100755 --- a/logan +++ b/logan @@ -83,19 +83,21 @@ def main(): else: print("Missing sample sheet for paired mode or you would like Tumor only mode?") alignmode='--PIPE_ALIGN' - if args.vc and baminput: + if args.vc and args.bam: vcmode="--PIPE_BAMVC" elif args.vc: vcmode="--PIPE_VC" - if args.sv and baminput: + if args.sv and args.bam: svmode="--PIPE_BAMSV" elif args.sv: svmode="--PIPE_SV" - if args.cnv and baminput: + if args.cnv and args.bam: cnvmode="--PIPE_BAMCNV" elif args.cnv: cnvmode="--PIPE_CNV" - if args.germline: + if args.germline and args.bam: + germmode="--PIPE_BAMGERMLINE" + elif args.germline: germmode="--PIPE_GERMLINE" if args.qc and args.germline: qcmode="--PIPE_QC_GL" diff --git a/main.nf b/main.nf index 9edcaf2..3d6259f 100644 --- a/main.nf +++ b/main.nf @@ -33,13 +33,13 @@ PIPE_TONLY_BAMCNV=params.PIPE_TONLY_BAMCNV include {INPUT; ALIGN; GL; - VC; INPUT_BAM; SV; CNVmm10; CNVhg38; + VC; INPUT_BAM; SV; CNVmouse; CNVhuman; QC_GL; QC_NOGL} from "./workflow/modules/workflows.nf" include {INPUT_TONLY; INPUT_TONLY_BAM; ALIGN_TONLY; - VC_TONLY; SV_TONLY; CNV_TONLY; QC_TONLY } from "./workflow/modules/workflows_tonly.nf" + VC_TONLY; SV_TONLY; CNVhuman_tonly; CNVmouse_tonly; QC_TONLY } from "./workflow/modules/workflows_tonly.nf" log.info """\ @@ -96,9 +96,10 @@ workflow { INPUT() ALIGN(INPUT.out.fastqinput,INPUT.out.sample_sheet) if (params.genome == "mm10"){ - CNVmm10(ALIGN.out.bamwithsample) + CNVmouse(ALIGN.out.bamwithsample) } else if (params.genome== "hg38"){ - CNVhg38(ALIGN.out.bamwithsample) + VC(ALIGN.out.bamwithsample,ALIGN.out.splitout,ALIGN.out.sample_sheet) + CNVhuman(ALIGN.out.bamwithsample,VC.out.somaticcall_input) } } @@ -113,9 +114,10 @@ workflow { if (PIPE_BAMCNV){ INPUT_BAM() if (params.genome == "mm10"){ - CNVmm10(INPUT_BAM.out.bamwithsample) + CNVmouse(INPUT_BAM.out.bamwithsample) } else if (params.genome== "hg38"){ - CNVhg38(INPUT_BAM.out.bamwithsample) + VC(INPUT_BAM.out.bamwithsample,INPUT_BAM.out.splitout,INPUT_BAM.out.sample_sheet) + CNVhuman(INPUT_BAM.out.bamwithsample,VC.out.somaticcall_input) } } @@ -140,9 +142,10 @@ workflow { INPUT_TONLY() ALIGN_TONLY(INPUT_TONLY.out.fastqinput,INPUT_TONLY.out.sample_sheet) if (params.genome == "mm10"){ - CNV_TONLY(ALIGN_TONLY.out.bamwithsample) + CNVmouse_tonly(ALIGN_TONLY.out.bamwithsample) } else if (params.genome== "hg38"){ - CNV_TONLY(ALIGN_TONLY.out.bamwithsample) + VC_TONLY(ALIGN_TONLY.out.bamwithsample,ALIGN_TONLY.out.splitout,ALIGN_TONLY.out.sample_sheet) + CNVhuman_tonly(ALIGN_TONLY.out.bamwithsample,VC_TONLY.out.somaticcall_input) } } @@ -153,7 +156,7 @@ workflow { QC_TONLY(ALIGN_TONLY.out.fastqin,ALIGN_TONLY.out.fastpout,ALIGN_TONLY.out.bqsrout) } - //Variant Calling from BAM/Tumor Only + //Variant Calling from BAM-Tumor Only Mode if (PIPE_TONLY_BAMVC){ INPUT_TONLY_BAM() VC_TONLY(INPUT_TONLY_BAM.out.bamwithsample,INPUT_TONLY_BAM.out.splitout,INPUT_TONLY_BAM.out.sample_sheet) @@ -165,9 +168,10 @@ workflow { if (PIPE_TONLY_BAMCNV){ INPUT_TONLY_BAM() if (params.genome == "mm10"){ - CNV_TONLY(INPUT_TONLY_BAM.out.bamwithsample) + CNVmouse_tonly(INPUT_TONLY_BAM.out.bamwithsample) }else if (params.genome== "hg38"){ - CNV_TONLY(INPUT_TONLY_BAM.out.bamwithsample) + VC_TONLY(INPUT_TONLY_BAM.out.bamwithsample,INPUT_TONLY_BAM.out.splitout,INPUT_TONLY_BAM.out.sample_sheet) + CNVhuman_tonly(INPUT_TONLY_BAM.out.bamwithsample,VC_TONLY.out.somaticcall_input) } } diff --git a/workflow/modules/structural_variant.nf b/workflow/modules/structural_variant.nf index c8327cf..d807753 100644 --- a/workflow/modules/structural_variant.nf +++ b/workflow/modules/structural_variant.nf @@ -142,7 +142,7 @@ process annotsv_tn { process manta_tonly { label 'process_highcpu' - publishDir(path: "${outdir}/SVtonly/manta", mode: 'copy') + publishDir(path: "${outdir}/SV/manta_tonly", mode: 'copy') input: tuple val(tumorname), path(tumor), path(tumorbai) @@ -185,7 +185,7 @@ process manta_tonly { process svaba_tonly { label 'process_highcpu' - publishDir(path: "${outdir}/SVtonly/svaba", mode: 'copy') + publishDir(path: "${outdir}/SV/svaba_tonly", mode: 'copy') input: tuple val(tumorname), path(tumor), path(tumorbai) @@ -225,7 +225,59 @@ process svaba_tonly { } +process gunzip { + input: + tuple val(tumorname), + path(vcf), val(sv) + + output: + tuple val(tumorname), + path("${tumorname}.tumorSV.vcf"), val(sv) + + script: + """ + gunzip ${vcf} > ${tumorname}.tumorSV.vcf + """ + + stub: + + """ + touch ${tumorname}.tumorSV.vcf + """ + +} + + +process survivor_sv { + module = ['survivor'] + publishDir(path: "${outdir}/SV/survivor", mode: 'copy') + + input: + tuple val(tumorname), + path(vcfs),val(svs) + + output: + tuple val(tumorname), + path("${tumorname}_merged.vcf"), + val("survivor") + + + script: + strin = vcfs.join("\\n") + + """ + echo -e '$strin' > filelistin + SURVIVOR merge filelistin 1000 2 1 1 1 30 ${tumorname}_merged.vcf + """ + + stub: + strin = vcfs.join("\\n") + """ + echo -e '$strin' > filelistin + touch "${tumorname}_merged.vcf" + """ +} process annotsv_tonly { @@ -233,7 +285,7 @@ process annotsv_tonly { //Requires bedtools,bcftools module = ['annotsv/3.3.1'] - publishDir(path: "${outdir}/SVtonly/annotated", mode: 'copy') + publishDir(path: "${outdir}/SV/annotated_tonly", mode: 'copy') input: tuple val(tumorname), path(somaticvcf), val(sv) diff --git a/workflow/modules/variant_calling.nf b/workflow/modules/variant_calling.nf index b7d2518..108e79e 100644 --- a/workflow/modules/variant_calling.nf +++ b/workflow/modules/variant_calling.nf @@ -418,6 +418,84 @@ process octopus_tn { } +process lofreq_tn { + label 'process_somaticcaller' + module=["lofreq/2.1.5","bcftools/1.17"] + + input: + tuple val(tumorname), path(tumor), path(tumorbai), + val(normalname), path(normal), path(normalbai), path(bed) + + + output: + tuple val(tumorname), + path("${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final.snvs.vcf.gz"), + path("${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final_minus-dbsnp.snvs.vcf.gz"), + path("${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final.indels.vcf.gz"), + path("${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final_minus-dbsnp.indels.vcf.gz"), + path("${tumorname}_vs_${normalname}_${bed.simpleName}_lofreq.vcf.gz") + + script: + + """ + lofreq -f $GENOMEREF -n ${normal} -t ${tumor} \ + -d $DBSNP \ + --threads $task.cpus \ + -l ${bed} \ + --call-indels \ + -o ${tumorname}_vs_${normalname}_${bed.simpleName} + + bcftools concat ${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final_minus-dbsnp.snvs.vcf.gz \ + ${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final_minus-dbsnp.indels.vcf.gz" --threads $task.cpus -Oz -o \ + ${tumorname}_vs_${normalname}_${bed.simpleName}_lofreq.vcf.gz" + + + """ + + stub: + + """ + touch "${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final.snvs.vcf.gz" + touch "${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final_minus-dbsnp.snvs.vcf.gz" + touch "${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final.indels.vcf.gz" + touch "${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final_minus-dbsnp.indels.vcf.gz" + touch "${tumorname}_vs_${normalname}_${bed.simpleName}_lofreq.vcf.gz" + + """ +} + + + +process muse_tn { + label 'process_somaticcaller' + module=["muse/2.0.1"] + + input: + tuple val(tumorname), path(tumor), path(tumorbai), + val(normalname), path(normal), path(normalbai) + + + output: + tuple val(tumorname), + path("${tumorname}_vs_${normalname}.vcf.gz") + + script: + + """ + MuSE call -f $GENOMEREF -O ${tumorname}_vs_${normalname} -n $task.cpus $tumor $normal + MuSE sump -I ${tumorname}_vs_${normalname}.MuSE.txt \ + -O ${tumorname}_vs_${normalname} -n $task.cpus -D $DBSNP -G + + """ + + stub: + + """ + touch "${tumorname}_vs_${normalname}.vcf.gz" + """ + +} + process combineVariants { label 'process_highmem' diff --git a/workflow/modules/workflows.nf b/workflow/modules/workflows.nf index 1556045..06a6276 100644 --- a/workflow/modules/workflows.nf +++ b/workflow/modules/workflows.nf @@ -18,9 +18,11 @@ include {deepvariant_step1;deepvariant_step2;deepvariant_step3; include {mutect2; mutect2filter; pileup_paired_t; pileup_paired_n; contamination_paired; learnreadorientationmodel;mergemut2stats; strelka_tn; combineVariants_strelka; - varscan_tn; vardict_tn; octopus_tn; bcftools_index_octopus; bcftools_index_octopus as bcftools_index_octopus_tonly; - combineVariants as combineVariants_vardict; combineVariants as combineVariants_varscan; - combineVariants as combineVariants_vardict_tonly; combineVariants as combineVariants_varscan_tonly; + varscan_tn; vardict_tn; lofreq_tn; muse_tn; + octopus_tn; bcftools_index_octopus; bcftools_index_octopus as bcftools_index_octopus_tonly; + combineVariants as combineVariants_vardict; combineVariants as combineVariants_vardict_tonly; + combineVariants as combineVariants_varscan; combineVariants as combineVariants_varscan_tonly; + combineVariants as combineVariants_lofreq; combineVariants as combineVariants_muse; combineVariants_octopus; combineVariants_octopus as combineVariants_octopus_tonly; annotvep_tn as annotvep_tn_mut2; annotvep_tn as annotvep_tn_strelka; annotvep_tn as annotvep_tn_varscan; annotvep_tn as annotvep_tn_vardict; annotvep_tn as annotvep_tn_octopus; @@ -35,7 +37,10 @@ include {mutect2_t_tonly; mutect2filter_tonly; annotvep_tonly as annotvep_tonly_mut2; annotvep_tonly as annotvep_tonly_octopus; combinemafs_tonly} from './variant_calling_tonly.nf' -include {svaba_somatic; manta_somatic; annotsv_tn as annotsv_svaba;annotsv_tn as annotsv_manta} from './structural_variant.nf' +include {svaba_somatic; manta_somatic; + survivor_sv; gunzip; + annotsv_tn as annotsv_survivor_tn + annotsv_tn as annotsv_svaba;annotsv_tn as annotsv_manta} from './structural_variant.nf' include {sequenza; seqz_sequenza_bychr; freec; freec_paired } from './copynumber.nf' @@ -260,13 +265,13 @@ workflow VC { combineVariants_vardict_tonly.out.join(sample_sheet) .map{tumor,marked,normvcf,normal ->tuple(tumor,"vardict_tonly",normvcf)} | annotvep_tonly_vardict - //VarScan + //VarScan TN varscan_in=bambyinterval.join(contamination_paired.out) varscan_comb=varscan_tn(varscan_in).groupTuple().map{tumor,vcf-> tuple(tumor,vcf,"varscan")} | combineVariants_varscan varscan_comb.join(sample_sheet) .map{tumor,marked,normvcf,normal ->tuple(tumor,normal,"varscan",normvcf)} | annotvep_tn_varscan - //VarScan_tonly + //VarScan_TOnly varscan_tonly_comb=varscan_in.map{tumor,bam,bai,normal,nbam,nbai,bed,tpile,npile,tumorc,normalc -> tuple(tumor,bam,bai,bed,tpile,tumorc)} | varscan_tonly varscan_tonly_comb1=varscan_tonly_comb.groupTuple().map{tumor,vcf-> tuple(tumor,vcf,"varscan_tonly")} | combineVariants_varscan_tonly @@ -274,14 +279,21 @@ workflow VC { varscan_tonly_comb1.join(sample_sheet) .map{tumor,marked,normvcf,normal ->tuple(tumor,"varscan_tonly",normvcf)} | annotvep_tonly_varscan + //Lofreq TN + lofreq_tn(bambyinterval).groupTuple().map{tumor,snv,dbsnv,indel,dbindel,vcf-> tuple(tumor,vcf,"lofreq")} | combineVariants_lofreq + + //MuSE TN + muse_tn(bamwithsample).groupTuple().map{tumor,vcf-> tuple(tumor,vcf,"muse")} | combineVariants_muse + //Octopus_TN octopus_comb=octopus_tn(bambyinterval) | bcftools_index_octopus octopus_comb.groupTuple().map{tumor,vcf,vcfindex-> tuple(tumor,vcf,vcfindex,"octopus")} | combineVariants_octopus - octopus_comb.join(sample_sheet) - .map{tumor,marked,normvcf,normal ->tuple(tumor,normal,"octopus",normvcf)} | annotvep_tn_octopus + octopus_annotin=octopus_comb.join(sample_sheet) + .map{tumor,marked,normvcf,normal ->tuple(tumor,normal,"octopus",normvcf)} + annotvep_tn_octopus(octopus_annotin) - //Octopus_tonly + //Octopus_TOnly octopus_tonly_out=bambyinterval.map{tumor,bam,bai,normal,nbam,nbai,bed-> tuple(tumor,bam,bai,bed)} | octopus_tonly | bcftools_index_octopus_tonly octopus_tonly_comb=octopus_tonly_out.groupTuple().map{tumor,vcf,vcfindex-> tuple(tumor,vcf,vcfindex,"octopus_tonly")} | combineVariants_octopus_tonly @@ -289,13 +301,17 @@ workflow VC { octopus_tonly_comb.join(sample_sheet) .map{tumor,marked,normvcf,normal ->tuple(tumor,"octopus_tonly",normvcf)} | annotvep_tonly_octopus - - //Combine All Variants Using VCF and Then Reannotate //annotvep_tn_mut2.out.concat(annotvep_tn_strelka.out).concat(annotvep_tn_vardict.out).concat(annotvep_tn_varscan.out) | combinemafs_tn //annotvep_tonly_mut2.out.concat(annotvep_tonly_vardict.out).concat(annotvep_tonly_varscan.out) | combinemafs_tonly //Implement PCGR Annotator/CivIC Next + + if (params.genome=="hg38"){ + emit: + somaticcall_input=octopus_annotin + } + } @@ -316,41 +332,60 @@ workflow SV { tuple(tumor,so_sv,"manta")} annotsv_manta(manta_out).ifEmpty("Empty SV input--No SV annotated") + //Delly-WIP + + //Survivor + gunzip(manta_out).concat(svaba_out).groupTuple() + | survivor_sv | annotsv_survivor_tn | ifEmpty("Empty SV input--No SV annotated") + } -workflow CNVmm10 { +workflow CNVmouse { take: bamwithsample main: - if(params.genome=="mm10"){ - chrs=Channel.fromList(params.genomes[params.genome].chromosomes) - seqzin=bamwithsample.map{tname,tumor,tbai,nname,norm,nbai-> - tuple("${tname}_${nname}",tname,tumor,tbai,nname,norm,nbai)} - seqzin.combine(chrs) | seqz_sequenza_bychr - seqz_sequenza_bychr.out.groupTuple() - .map{pair, seqz -> tuple(pair, seqz.sort{it.name})} - | sequenza - bamwithsample | freec_paired - } + //Sequenza (Preferred for Paired) + chrs=Channel.fromList(params.genomes[params.genome].chromosomes) + seqzin=bamwithsample.map{tname,tumor,tbai,nname,norm,nbai-> + tuple("${tname}_${nname}",tname,tumor,tbai,nname,norm,nbai)} + seqzin.combine(chrs) | seqz_sequenza_bychr + seqz_sequenza_bychr.out.groupTuple() + .map{pair, seqz -> tuple(pair, seqz.sort{it.name})} + | sequenza + + //FREEC Paired Mode + bamwithsample | freec_paired + } -workflow CNVhg38 { +workflow CNVhuman { take: bamwithsample - + somaticcall_input + main: - if(params.genome=="hg38"){ - chrs=Channel.fromList(params.genomes[params.genome].chromosomes) - seqzin=bamwithsample.map{tname,tumor,tbai,nname,norm,nbai-> - tuple("${tname}_${nname}",tname,tumor,tbai,nname,norm,nbai)} - seqzin.combine(chrs) | seqz_sequenza_bychr - seqz_sequenza_bychr.out.groupTuple() - .map{pair, seqz -> tuple(pair, seqz.sort{it.name})} - | sequenza - bamwithsample.map{tname,tumor,tbai,nname,norm,nbai-> - tuple(tname,tumor,tbai)} | freec - } + //Sequenza + chrs=Channel.fromList(params.genomes[params.genome].chromosomes) + seqzin=bamwithsample.map{tname,tumor,tbai,nname,norm,nbai-> + tuple("${tname}_${nname}",tname,tumor,tbai,nname,norm,nbai)} + seqzin.combine(chrs) | seqz_sequenza_bychr + seqz_sequenza_bychr.out.groupTuple() + .map{pair, seqz -> tuple(pair, seqz.sort{it.name})} + | sequenza + + //FREEC + bamwithsample.map{tname,tumor,tbai,nname,norm,nbai-> + tuple(tname,tumor,tbai)} | freec + + //Purple + bamwithsample.map{tname,tumor,tbai,nname,norm,nbai-> + tuple(tname,tumor,tbai)} | amber + bamwithsample.map{tname,tumor,tbai,nname,norm,nbai-> + tuple(tname,tumor,tbai)} | cobalt + purplein=cobalt.out.join(amber.out) + purplein.join(somaticcall_input).view()| purple + } /* @@ -360,18 +395,8 @@ workflow CNVhg38 { //somaticinput=sample_sheet // .map{samplename,bam,vcf-> tuple(samplename,file(vcf))} - cobalt(baminput) - amber(baminput) - - pre1=cobalt.out.join(amber.out) - pre1.join(somaticinput).view()| purple - - - cobalt(baminput) - amber(baminput) + - pre1=cobalt.out.join(amber.out) - pre1.join(somaticinput).view()| purple */ @@ -450,9 +475,19 @@ workflow QC_GL { //Somalier somalier_extract(applybqsr) som_in=somalier_extract.out.collect() - somalier_analysis(som_in) + + //Prep for MultiQC input + if(params.genome=="hg38"){ + somalier_analysis_human(som_in) + somalier_analysis_out=somalier_analysis_human.out.collect() + } + else if(params.genome=="mm10"){ + somalier_analysis_mouse(som_in) + somalier_analysis_out=somalier_analysis_mouse.out.collect() + } + fclane_out=fc_lane.out.map{samplename,info->info}.collect() fqs_out=fastq_screen.out.collect() @@ -465,7 +500,6 @@ workflow QC_GL { snpeff_out=snpeff.out.collect()//map{vcf,csv,html->vcf,csv,html}.collect() vcftools_out=vcftools.out collectvariantcallmetrics_out=collectvariantcallmetrics.out//.map{details,summary->details,summary} - somalier_analysis_out=somalier_analysis.out.collect() conall=fclane_out.concat(fqs_out,kraken_out,qualimap_out,samtools_flagstats_out,bcftools_stats_out, gatk_varianteval_out,snpeff_out,vcftools_out,collectvariantcallmetrics_out,somalier_analysis_out).flatten().toList() diff --git a/workflow/modules/workflows_tonly.nf b/workflow/modules/workflows_tonly.nf index cc204b5..803c8dd 100644 --- a/workflow/modules/workflows_tonly.nf +++ b/workflow/modules/workflows_tonly.nf @@ -34,14 +34,15 @@ include {mutect2_t_tonly; mutect2filter_tonly; pileup_paired_tonly; annotvep_tonly as annotvep_tonly_mut2; annotvep_tonly as annotvep_tonly_octopus; combinemafs_tonly} from './variant_calling_tonly.nf' -include {manta_tonly; svaba_tonly; annotsv_tonly as annotsv_manta_tonly; annotsv_tonly as annotsv_svaba_tonly} from './structural_variant.nf' +include {manta_tonly; svaba_tonly; survivor_sv; gunzip; +annotsv_tonly as annotsv_manta_tonly; annotsv_tonly as annotsv_svaba_tonly; +annotsv_tonly as annotsv_survivor_tonly} from './structural_variant.nf' -include {freec; purple } from './copynumber.nf' +include {freec; amber; cobalt; purple } from './copynumber.nf' include {splitinterval} from "./splitbed.nf" - workflow INPUT_TONLY { if(params.fastq_input){ fastqinput=Channel.fromFilePairs(params.fastq_input) @@ -101,9 +102,7 @@ workflow ALIGN_TONLY { tobqsr=bwamem2.out.combine(gatherbqsr.out,by:0) applybqsr(tobqsr) - //samtoolsindex(applybqsr.out) - //samtoolsindex.out.view() - //bamwithsample=samtoolsindex.out.join(sample_sheet).map{it.swap(3,0)}.join(samtoolsindex.out).map{it.swap(3,0)} + bamwithsample=applybqsr.out.join(sample_sheet) .map{samplename,tumor,tumorbai -> tuple( samplename,tumor,tumorbai) } @@ -199,10 +198,13 @@ workflow VC_TONLY { octopus_tonly_comb1.join(sample_sheet) .map{tumor,marked,normvcf ->tuple(tumor,"octopus_tonly",normvcf)} | annotvep_tonly_octopus - //Combine All Final //annotvep_tonly_mut2.out.concat(annotvep_tonly_vardict.out).concat(annotvep_tonly_varscan.out) | combinemafs_tonly + + emit: + somaticcall_input=combineVariants_octopus.out + } @@ -214,31 +216,60 @@ workflow SV_TONLY { //Svaba svaba_out=svaba_tonly(bamwithsample) .map{ tumor,bps,contigs,discord,alignments,so_indel,so_sv,unfil_so_indel,unfil_sv,log -> - tuple(tumor,so_sv,"svaba")} + tuple(tumor,so_sv,"svaba_tonly")} annotsv_svaba_tonly(svaba_out).ifEmpty("Empty SV input--No SV annotated") //Manta manta_out=manta_tonly(bamwithsample) .map{tumor, sv, indel, tumorsv -> - tuple(tumor,tumorsv,"manta")} + tuple(tumor,tumorsv,"manta_tonly")} annotsv_manta_tonly(manta_out).ifEmpty("Empty SV input--No SV annotated") - //Delly + //Delly-WIP + + //Survivor + gunzip(manta_out).concat(svaba_out).groupTuple() + | survivor_sv |annotsv_survivor_tonly.out.ifEmpty("Empty SV input--No SV annotated") } -workflow CNV_TONLY { + + +workflow CNVmouse_tonly { take: bamwithsample main: - //mm10 use sequenza only, hg38 use purple - if(params.genome=="hg38"){ - purple(bamwithsample) - } - if(params.genome=="mm10"){ - freec(bamwithsample) - } - + freec(bamwithsample) +} + + +workflow CNVhuman_tonly { + take: + bamwithsample + somaticcall_input + + main: + //FREEC + bamwithsample.map{tname,tumor,tbai,nname,norm,nbai-> + tuple(tname,tumor,tbai)} | freec + + //Purple + bamwithsample.map{tname,tumor,tbai,nname,norm,nbai-> + tuple(tname,tumor,tbai)} | amber + bamwithsample.map{tname,tumor,tbai,nname,norm,nbai-> + tuple(tname,tumor,tbai)} | cobalt + purplein=cobalt.out.join(amber.out) + purplein.join(somaticcall_input).view()| purple + + //Sequenza + //chrs=Channel.fromList(params.genomes[params.genome].chromosomes) + //seqzin=bamwithsample.map{tname,tumor,tbai,nname,norm,nbai-> + // tuple("${tname}_${nname}",tname,tumor,tbai,nname,norm,nbai)} + //seqzin.combine(chrs) | seqz_sequenza_bychr + //seqz_sequenza_bychr.out.groupTuple() + // .map{pair, seqz -> tuple(pair, seqz.sort{it.name})} + // | sequenza + } workflow QC_TONLY { @@ -313,7 +344,7 @@ workflow INPUT_TONLY_BAM { .map{it-> tuple(it.simpleName,it)} .join(bai) } - + sample_sheet=baminputonly.map{samplename,bam,bai -> tuple ( samplename)} From 73aa49aba9c5d78b033f2a7783bf2ca1c9e40fb5 Mon Sep 17 00:00:00 2001 From: Darryl Nousome Date: Tue, 14 Nov 2023 20:20:32 -0500 Subject: [PATCH 07/11] fix: cnv fixes --- workflow/modules/copynumber.nf | 137 +++++++++++++++++++++------- workflow/modules/variant_calling.nf | 6 +- workflow/modules/workflows.nf | 48 +++++----- workflow/modules/workflows_tonly.nf | 45 ++++----- 4 files changed, 149 insertions(+), 87 deletions(-) diff --git a/workflow/modules/copynumber.nf b/workflow/modules/copynumber.nf index d6f5d43..dd91743 100644 --- a/workflow/modules/copynumber.nf +++ b/workflow/modules/copynumber.nf @@ -221,15 +221,16 @@ process freec { } +process amber_tonly { + label 'process_mid' + publishDir("${outdir}/cnv/amber", mode: 'copy') -process amber { - module=["java/12.0.1","R/3.6.3"] - publishDir("${outdir}/amber", mode: 'copy') input: - tuple val(samplename), path("${samplename}.bqsr.bam"), path("${samplename}.bqsr.bam.bai") + tuple val(tumorname), path(tumor), path(tumorbai) + output: - tuple val(samplename), path("${samplename}_amber") + tuple val(tumorname), path("${tumorname}_amber") //path("${samplename}.amber.baf.tsv.gz"), //path("${samplename}.amber.baf.pcf"), //path("${samplename}.amber.qc") @@ -240,9 +241,9 @@ process amber { """ java -Xmx32G -cp $amber com.hartwig.hmftools.amber.AmberApplication \ - -tumor ${samplename} -tumor_bam ${samplename}.bqsr.bam \ - -output_dir ${samplename}_amber \ - -threads 16 \ + -tumor ${tumorname} -tumor_bam ${tumor} \ + -output_dir ${tumorname}_amber \ + -threads $task.cpus \ -ref_genome_version V38 \ -loci $GERMLINEHET @@ -251,20 +252,92 @@ process amber { stub: """ - mkdir ${samplename}_amber - touch ${samplename}_amber/${samplename}.amber.baf.tsv.gz ${samplename}_amber/${samplename}.amber.baf.pcf ${samplename}_amber/${samplename}.amber.qc + mkdir ${tumorname}_amber + touch ${tumorname}_amber/${tumorname}.amber.baf.tsv.gz ${tumorname}_amber/${tumorname}.amber.baf.pcf ${tumorname}_amber/${tumorname}.amber.qc """ } -process cobalt { - module=["java/12.0.1","R/3.6.3"] - publishDir("${outdir}/cobalt", mode: 'copy') +process amber_tn { + label 'process_mid' + publishDir("${outdir}/cnv/amber", mode: 'copy') + + input: + tuple val(tumorname), path(tumor), path(tumorbai), + val(normalname), path(normal), path(normalbai) + + output: + tuple val(tumorname), path("${tumorname}_vs_${normalname}_amber") + //path("${samplename}.amber.baf.tsv.gz"), + //path("${samplename}.amber.baf.pcf"), + //path("${samplename}.amber.qc") + //path("${samplename}.amber.contamination.vcf.gz") Contamination maybe only with tumor + + script: + + """ + + java -Xmx32G -cp $amber com.hartwig.hmftools.amber.AmberApplication \ + -tumor ${tumorname} -tumor_bam ${tumor} \ + -reference ${normalname} -reference_bam ${normal} \ + -output_dir ${tumorname}_vs_${normalname}_amber \ + -threads $task.cpus \ + -ref_genome_version V38 \ + -loci $GERMLINEHET + + """ + + stub: + + """ + mkdir ${tumorname}_vs_${normalname}_amber + touch ${tumorname}_vs_${normalname}_amber/${tumorname}.amber.baf.tsv.gz ${tumorname}_vs_${normalname}_amber/${tumorname}.amber.baf.pcf ${tumorname}_vs_${normalname}_amber/${tumorname}.amber.qc + """ +} + +process cobalt_tonly { + label "process_mid" + publishDir("${outdir}/cnv/cobalt", mode: 'copy') + + input: + tuple val(tumorname), path(tumor), path(tumorbai) + + output: + tuple val(tumorname), path("${tumorname}_cobalt") + //path("${samplename}/${samplename}.cobalt.ratio.tsv.gz"), + //path("${samplename}/${samplename}.cobalt.ratio.pcf"), + //path("${samplename}/${samplename}.cobalt.gc.median.tsv") + + script: + + """ + + java -jar -Xmx8G $cobalt \ + -tumor ${tumorname} -tumor_bam ${tumor} \ + -output_dir ${tumorname}_cobalt \ + -threads $task.cpus \ + -tumor_only_diploid_bed $DIPLODREG \ + -gc_profile $GCPROFILE + + """ + + stub: + + """ + mkdir ${tumorname}_cobalt + touch ${tumorname}_cobalt/${tumorname}.cobalt.ratio.tsv.gz ${tumorname}_cobalt/${tumorname}.cobalt.ratio.pcf ${tumorname}_cobalt/${tumorname}.cobalt.gc.median.tsv + """ +} + +process cobalt_tn { + label "process_mid" + publishDir("${outdir}/cnv/cobalt", mode: 'copy') input: - tuple val(samplename), path("${samplename}.bqsr.bam"), path("${samplename}.bqsr.bam.bai") + tuple val(tumorname), path(tumor), path(tumorbai), + val(normalname), path(normal), path(normalbai) output: - tuple val(samplename), path("${samplename}_cobalt") + tuple val(tumorname), path("${tumorname}_vs_${normalname}_cobalt") //path("${samplename}/${samplename}.cobalt.ratio.tsv.gz"), //path("${samplename}/${samplename}.cobalt.ratio.pcf"), //path("${samplename}/${samplename}.cobalt.gc.median.tsv") @@ -274,9 +347,10 @@ process cobalt { """ java -jar -Xmx8G $cobalt \ - -tumor ${samplename} -tumor_bam ${samplename}.bqsr.bam \ - -output_dir ${samplename}_cobalt \ - -threads 16 \ + -tumor ${tumorname} -tumor_bam ${tumorname} \ + -reference ${normalname} -reference_bam ${normal} \ + -output_dir ${tumorname}_vs_${normalname}_cobalt \ + -threads $task.cpus \ -tumor_only_diploid_bed $DIPLODREG \ -gc_profile $GCPROFILE @@ -285,52 +359,53 @@ process cobalt { stub: """ - mkdir ${samplename}_cobalt - touch ${samplename}_cobalt/${samplename}.cobalt.ratio.tsv.gz ${samplename}_cobalt/${samplename}.cobalt.ratio.pcf ${samplename}_cobalt/${samplename}.cobalt.gc.median.tsv + mkdir ${tumorname}_vs_${normalname}_cobalt + touch ${tumorname}_vs_${normalname}_cobalt/${tumorname}.cobalt.ratio.tsv.gz ${tumorname}_vs_${normalname}_cobalt/${tumorname}.cobalt.ratio.pcf ${tumorname}_vs_${normalname}_cobalt/${tumorname}.cobalt.gc.median.tsv """ } process purple { - module=["java/12.0.1","R/3.6.3"] - - publishDir("${outdir}/purple", mode: 'copy') + label 'process_mid' + publishDir("${outdir}/cnv/purple", mode: 'copy') input: - tuple val(samplename), path(cobaltin), path(amberin), path("${samplename}.tonly.final.mut2.vcf.gz") + tuple val(tumorname), + path(cobaltin), + path(amberin), + path(somaticvcf) output: - tuple val(samplename), path("${samplename}") + tuple val(tumorname), path("${tumorname}") script: """ java -jar $purple \ - -tumor ${samplename} \ + -tumor ${tumorname} \ -amber ${amberin} \ -cobalt ${cobaltin} \ -gc_profile $GCPROFILE \ -ref_genome_version 38 \ -ref_genome $GENOME \ -ensembl_data_dir $ENSEMBLCACHE \ - -somatic_vcf ${samplename}.tonly.final.mut2.vcf.gz \ + -somatic_vcf ${somaticvcf} \ -driver_gene_panel $DRIVERS \ -somatic_hotspots $HOTSPOTS \ - -output_dir ${samplename} + -output_dir ${tumorname} """ stub: """ - mkdir ${samplename} - touch ${samplename}/${samplename}.purple.cnv.somatic.tsv ${samplename}/${samplename}.purple.cnv.gene.tsv ${samplename}/${samplename}.driver.catalog.somatic.tsv + mkdir ${tumorname} + touch ${tumorname}/${tumorname}.purple.cnv.somatic.tsv ${tumorname}/${tumorname}.purple.cnv.gene.tsv ${tumorname}/${tumorname}.driver.catalog.somatic.tsv """ } - /* process ascat_tn { module=["java/12.0.1","R/3.6.3"] diff --git a/workflow/modules/variant_calling.nf b/workflow/modules/variant_calling.nf index 108e79e..d87c33c 100644 --- a/workflow/modules/variant_calling.nf +++ b/workflow/modules/variant_calling.nf @@ -396,7 +396,7 @@ process octopus_tn { output: tuple val(tumorname), - path("${tumorname}_vs_${normalname}_${bed.simpleName}.octopus.vcf") + path("${tumorname}_vs_${normalname}_${bed.simpleName}.octopus.vcf.gz") script: @@ -407,13 +407,13 @@ process octopus_tn { --threads $task.cpus \ $GERMLINE_FOREST \ $SOMATIC_FOREST \ - -o ${tumorname}_vs_${normalname}_${bed.simpleName}.octopus.vcf + -o ${tumorname}_vs_${normalname}_${bed.simpleName}.octopus.vcf.gz """ stub: """ - touch "${tumorname}_vs_${normalname}_${bed.simpleName}.octopus.vcf" + touch "${tumorname}_vs_${normalname}_${bed.simpleName}.octopus.vcf.gz" """ } diff --git a/workflow/modules/workflows.nf b/workflow/modules/workflows.nf index 06a6276..b4b8646 100644 --- a/workflow/modules/workflows.nf +++ b/workflow/modules/workflows.nf @@ -26,6 +26,7 @@ include {mutect2; mutect2filter; pileup_paired_t; pileup_paired_n; combineVariants_octopus; combineVariants_octopus as combineVariants_octopus_tonly; annotvep_tn as annotvep_tn_mut2; annotvep_tn as annotvep_tn_strelka; annotvep_tn as annotvep_tn_varscan; annotvep_tn as annotvep_tn_vardict; annotvep_tn as annotvep_tn_octopus; + annotvep_tn as annotvep_tn_lofreq; annotvep_tn as annotvep_tn_muse; combinemafs_tn} from './variant_calling.nf' include {mutect2_t_tonly; mutect2filter_tonly; @@ -42,7 +43,8 @@ include {svaba_somatic; manta_somatic; annotsv_tn as annotsv_survivor_tn annotsv_tn as annotsv_svaba;annotsv_tn as annotsv_manta} from './structural_variant.nf' -include {sequenza; seqz_sequenza_bychr; freec; freec_paired } from './copynumber.nf' +include {amber_tn; cobalt_tn; purple; + sequenza; seqz_sequenza_bychr; freec; freec_paired } from './copynumber.nf' include {splitinterval} from "./splitbed.nf" @@ -280,26 +282,30 @@ workflow VC { .map{tumor,marked,normvcf,normal ->tuple(tumor,"varscan_tonly",normvcf)} | annotvep_tonly_varscan //Lofreq TN - lofreq_tn(bambyinterval).groupTuple().map{tumor,snv,dbsnv,indel,dbindel,vcf-> tuple(tumor,vcf,"lofreq")} | combineVariants_lofreq + lofreq_tn(bambyinterval).groupTuple().map{tumor,snv,dbsnv,indel,dbindel,vcf-> tuple(tumor,vcf,"lofreq")} + | combineVariants_lofreq | join(sample_sheet)| map{tumor,marked,normvcf,normal ->tuple(tumor,normal,"lofreq",normvcf)} + | annotvep_tn_lofreq //MuSE TN - muse_tn(bamwithsample).groupTuple().map{tumor,vcf-> tuple(tumor,vcf,"muse")} | combineVariants_muse + muse_tn(bamwithsample).groupTuple().map{tumor,vcf-> tuple(tumor,vcf,"muse")} + | combineVariants_muse | join(sample_sheet)| map{tumor,marked,normvcf,normal ->tuple(tumor,normal,"muse",normvcf)} + | annotvep_tn_muse //Octopus_TN - octopus_comb=octopus_tn(bambyinterval) | bcftools_index_octopus - octopus_comb.groupTuple().map{tumor,vcf,vcfindex-> tuple(tumor,vcf,vcfindex,"octopus")} | combineVariants_octopus - octopus_annotin=octopus_comb.join(sample_sheet) - .map{tumor,marked,normvcf,normal ->tuple(tumor,normal,"octopus",normvcf)} + octopus_annotin=octopus_tn(bambyinterval) | bcftools_index_octopus + | groupTuple() |map{tumor,vcf,vcfindex-> tuple(tumor,vcf,vcfindex,"octopus")} + | combineVariants_octopus | join(sample_sheet)|map{tumor,marked,normvcf,normal ->tuple(tumor,normal,"octopus",normvcf)} annotvep_tn_octopus(octopus_annotin) //Octopus_TOnly octopus_tonly_out=bambyinterval.map{tumor,bam,bai,normal,nbam,nbai,bed-> tuple(tumor,bam,bai,bed)} | octopus_tonly | bcftools_index_octopus_tonly - octopus_tonly_comb=octopus_tonly_out.groupTuple().map{tumor,vcf,vcfindex-> tuple(tumor,vcf,vcfindex,"octopus_tonly")} | combineVariants_octopus_tonly + octopus_tonly_comb=octopus_tonly_out.groupTuple().map{tumor,vcf,vcfindex-> tuple(tumor,vcf,vcfindex,"octopus_tonly")} + | combineVariants_octopus_tonly - octopus_tonly_comb.join(sample_sheet) - .map{tumor,marked,normvcf,normal ->tuple(tumor,"octopus_tonly",normvcf)} | annotvep_tonly_octopus + octopus_tonly_comb.join(sample_sheet) | + map{tumor,marked,normvcf,normal ->tuple(tumor,"octopus_tonly",normvcf)} | annotvep_tonly_octopus //Combine All Variants Using VCF and Then Reannotate //annotvep_tn_mut2.out.concat(annotvep_tn_strelka.out).concat(annotvep_tn_vardict.out).concat(annotvep_tn_varscan.out) | combinemafs_tn @@ -307,10 +313,8 @@ workflow VC { //Implement PCGR Annotator/CivIC Next - if (params.genome=="hg38"){ - emit: - somaticcall_input=octopus_annotin - } + emit: + somaticcall_input=octopus_annotin } @@ -374,17 +378,13 @@ workflow CNVhuman { .map{pair, seqz -> tuple(pair, seqz.sort{it.name})} | sequenza - //FREEC - bamwithsample.map{tname,tumor,tbai,nname,norm,nbai-> - tuple(tname,tumor,tbai)} | freec - //Purple - bamwithsample.map{tname,tumor,tbai,nname,norm,nbai-> - tuple(tname,tumor,tbai)} | amber - bamwithsample.map{tname,tumor,tbai,nname,norm,nbai-> - tuple(tname,tumor,tbai)} | cobalt - purplein=cobalt.out.join(amber.out) - purplein.join(somaticcall_input).view()| purple + bamwithsample | amber_tn + bamwithsample | cobalt_tn + purplein=amber_tn.out.join(cobalt_tn.out) + purplein.join(somaticcall_input)| + map{t1,amber,cobalt,n1,vc,vcf -> tuple(t1,amber,cobalt,vcf)} + | purple } diff --git a/workflow/modules/workflows_tonly.nf b/workflow/modules/workflows_tonly.nf index 803c8dd..5143cbe 100644 --- a/workflow/modules/workflows_tonly.nf +++ b/workflow/modules/workflows_tonly.nf @@ -38,7 +38,7 @@ include {manta_tonly; svaba_tonly; survivor_sv; gunzip; annotsv_tonly as annotsv_manta_tonly; annotsv_tonly as annotsv_svaba_tonly; annotsv_tonly as annotsv_survivor_tonly} from './structural_variant.nf' -include {freec; amber; cobalt; purple } from './copynumber.nf' +include {freec; amber_tonly; cobalt_tonly; purple } from './copynumber.nf' include {splitinterval} from "./splitbed.nf" @@ -141,12 +141,11 @@ workflow VC_TONLY { //LOR mut2tout_lor=mutect2_t_tonly.out.groupTuple() - .map { samplename,vcfs,f1r2,stats -> tuple( samplename, - f1r2.toSorted{ it -> (it.name =~ /${samplename}_(.*?).f1r2.tar.gz/)[0][1].toInteger() } - )} - learnreadorientationmodel_tonly(mut2tout_lor) + .map { samplename,vcfs,f1r2,stats -> tuple( samplename, + f1r2.toSorted{ it -> (it.name =~ /${samplename}_(.*?).f1r2.tar.gz/)[0][1].toInteger() } + )} + learnreadorientationmodel_tonly(mut2tout_lor) - //Stats mut2tonly_mstats=mutect2_t_tonly.out.groupTuple() .map { samplename,vcfs,f1r2,stats -> tuple( samplename, @@ -154,11 +153,9 @@ workflow VC_TONLY { )} mergemut2stats_tonly(mut2tonly_mstats) - //Contamination contamination_tumoronly(pileup_paired_tout) - //Final TUMOR ONLY FILTER allmut2tonly=mutect2_t_tonly.out.groupTuple() .map { samplename,vcfs,f1r2,stats -> tuple( samplename, @@ -172,8 +169,7 @@ workflow VC_TONLY { mutect2filter_tonly(mut2tonly_filter) - - //vcf2maf Annotate + //Annotate mutect2filter_tonly.out .join(sample_sheet) .map{tumor,markedvcf,finalvcf,stats -> tuple(tumor,"mutect2",finalvcf)} | annotvep_tonly_mut2 @@ -198,6 +194,7 @@ workflow VC_TONLY { octopus_tonly_comb1.join(sample_sheet) .map{tumor,marked,normvcf ->tuple(tumor,"octopus_tonly",normvcf)} | annotvep_tonly_octopus + //Combine All Final //annotvep_tonly_mut2.out.concat(annotvep_tonly_vardict.out).concat(annotvep_tonly_varscan.out) | combinemafs_tonly @@ -229,7 +226,7 @@ workflow SV_TONLY { //Survivor gunzip(manta_out).concat(svaba_out).groupTuple() - | survivor_sv |annotsv_survivor_tonly.out.ifEmpty("Empty SV input--No SV annotated") + | survivor_sv | annotsv_survivor_tonly.out.ifEmpty("Empty SV input--No SV annotated") } @@ -249,26 +246,16 @@ workflow CNVhuman_tonly { somaticcall_input main: - //FREEC - bamwithsample.map{tname,tumor,tbai,nname,norm,nbai-> - tuple(tname,tumor,tbai)} | freec + //FREEC-Unpaired onlypu + bamwithsample | freec //Purple - bamwithsample.map{tname,tumor,tbai,nname,norm,nbai-> - tuple(tname,tumor,tbai)} | amber - bamwithsample.map{tname,tumor,tbai,nname,norm,nbai-> - tuple(tname,tumor,tbai)} | cobalt - purplein=cobalt.out.join(amber.out) - purplein.join(somaticcall_input).view()| purple - - //Sequenza - //chrs=Channel.fromList(params.genomes[params.genome].chromosomes) - //seqzin=bamwithsample.map{tname,tumor,tbai,nname,norm,nbai-> - // tuple("${tname}_${nname}",tname,tumor,tbai,nname,norm,nbai)} - //seqzin.combine(chrs) | seqz_sequenza_bychr - //seqz_sequenza_bychr.out.groupTuple() - // .map{pair, seqz -> tuple(pair, seqz.sort{it.name})} - // | sequenza + bamwithsample | amber_tonly + bamwithsample | cobalt_tonly + purplein=amber_tonly.out.join(cobalt_tonly.out) + purplein.join(somaticcall_input)| + map{t1,amber,cobalt,vc,vcf -> tuple(t1,amber,cobalt,vcf)} + | purple } From f6d6c7b609eca507498585ef15598ea8e11705fe Mon Sep 17 00:00:00 2001 From: Darryl Nousome Date: Tue, 14 Nov 2023 20:45:17 -0500 Subject: [PATCH 08/11] fix: location change --- workflow/modules/copynumber.nf | 13 +++++-------- workflow/modules/trim_align.nf | 1 - 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/workflow/modules/copynumber.nf b/workflow/modules/copynumber.nf index dd91743..a30fa36 100644 --- a/workflow/modules/copynumber.nf +++ b/workflow/modules/copynumber.nf @@ -14,9 +14,6 @@ FREECSIGNIFICANCE = params.freec_significance FREECPLOT = params.freec_plot } -cobalt="/data/SCLC-BRAINMETS/cn/cobalt_v1.14.jar" -amber="/data/SCLC-BRAINMETS/cn/amber-3.9.jar" -purple="/data/SCLC-BRAINMETS/cn/purple_v3.8.2.jar" GERMLINEHET="/data/SCLC-BRAINMETS/cn/copy_number/GermlineHetPon.38.vcf.gz" GCPROFILE='/data/SCLC-BRAINMETS/cn/copy_number/GC_profile.1000bp.38.cnp' DIPLODREG='/data/SCLC-BRAINMETS/cn/copy_number/DiploidRegions.38.bed.gz' @@ -240,7 +237,7 @@ process amber_tonly { """ - java -Xmx32G -cp $amber com.hartwig.hmftools.amber.AmberApplication \ + java -Xmx32G -cp amber.jar com.hartwig.hmftools.amber.AmberApplication \ -tumor ${tumorname} -tumor_bam ${tumor} \ -output_dir ${tumorname}_amber \ -threads $task.cpus \ @@ -276,7 +273,7 @@ process amber_tn { """ - java -Xmx32G -cp $amber com.hartwig.hmftools.amber.AmberApplication \ + java -Xmx32G -cp amber.jar com.hartwig.hmftools.amber.AmberApplication \ -tumor ${tumorname} -tumor_bam ${tumor} \ -reference ${normalname} -reference_bam ${normal} \ -output_dir ${tumorname}_vs_${normalname}_amber \ @@ -311,7 +308,7 @@ process cobalt_tonly { """ - java -jar -Xmx8G $cobalt \ + java -jar -Xmx8G cobalt.jar \ -tumor ${tumorname} -tumor_bam ${tumor} \ -output_dir ${tumorname}_cobalt \ -threads $task.cpus \ @@ -346,7 +343,7 @@ process cobalt_tn { """ - java -jar -Xmx8G $cobalt \ + java -jar -Xmx8G cobalt.jar \ -tumor ${tumorname} -tumor_bam ${tumorname} \ -reference ${normalname} -reference_bam ${normal} \ -output_dir ${tumorname}_vs_${normalname}_cobalt \ @@ -382,7 +379,7 @@ process purple { """ - java -jar $purple \ + java -jar purple.jar \ -tumor ${tumorname} \ -amber ${amberin} \ -cobalt ${cobaltin} \ diff --git a/workflow/modules/trim_align.nf b/workflow/modules/trim_align.nf index 90bb83c..badac4c 100644 --- a/workflow/modules/trim_align.nf +++ b/workflow/modules/trim_align.nf @@ -55,7 +55,6 @@ process bwamem2 { tuple val(samplename), path("${samplename}.bam"), path("${samplename}.bai") script: - //bwamem2, samblaster, samtools sort for marking duplicates; """ bwa-mem2 mem -M \ From 2ae88523ad205d90084af9aef1978123a78e72e2 Mon Sep 17 00:00:00 2001 From: Darryl Nousome Date: Tue, 14 Nov 2023 23:01:43 -0500 Subject: [PATCH 09/11] feat: additional callers --- docker/logan_base/Dockerfile | 31 +++++++++++++++++++++++++------ docker/logan_base/meta.yml | 2 +- 2 files changed, 26 insertions(+), 7 deletions(-) diff --git a/docker/logan_base/Dockerfile b/docker/logan_base/Dockerfile index d6bc17d..2f85c23 100644 --- a/docker/logan_base/Dockerfile +++ b/docker/logan_base/Dockerfile @@ -165,12 +165,12 @@ RUN wget https://github.com/AstraZeneca-NGS/VarDictJava/releases/download/v1.8.3 ENV PATH="/opt2/VarDict-1.8.3/bin:$PATH" # Install Octopus/v0.7.4 -RUN wget https://github.com/luntergroup/octopus/archive/refs/tags/v0.7.4.tar.gz \ - && tar -xvzf /opt2/v0.7.4.tar.gz \ - && rm /opt2/v0.7.4.tar.gz \ - && cd /opt2/octopus-0.7.4 \ - && cmake . -ENV PATH="/opt2/octopus-0.7.4/bin:$PATH" +#RUN wget https://github.com/luntergroup/octopus/archive/refs/tags/v0.7.4.tar.gz \ +# && tar -xvzf /opt2/v0.7.4.tar.gz \ +# && rm /opt2/v0.7.4.tar.gz \ +# && cd /opt2/octopus-0.7.4 \ +# && cmake . +#ENV PATH="/opt2/octopus-0.7.4/bin:$PATH" # Fastp From Opengene github @@ -202,6 +202,25 @@ RUN wget -O svaba_1.2.0 https://github.com/walaj/svaba/releases/download/v1.2.0/ ENV PATH="/opt2/svaba:$PATH" +# LOFREQ +RUN wget https://github.com/CSB5/lofreq/raw/master/dist/lofreq_star-2.1.5_linux-x86-64.tgz \ + && tar -xzf lofreq_star-2.1.5_linux-x86-64.tgz \ + && chmod a+x lofreq_star-2.1.5_linux-x86-64/bin/lofreq + +ENV PATH="/opt2/lofreq_star-2.1.5_linux-x86-64/bin/:$PATH" +# MUSE +RUN wget -O muse_2.0.4.tar.gz https://github.com/wwylab/MuSE/archive/refs/tags/v2.0.4.tar.gz \ + && tar -xzf v2.0.4.tar.gz \ + && cd MuSE-2.0.4 \ + && ./install_muse.sh \ + && mv MuSE /opt2/ \ + && chmod a+x /opt2/MuSE \ + && rm -R /opt2/MuSE-2.0.4 \ + && rm /opt2/muse_2.0.4.tar.gz + +ENV PATH="/opt2/MuSE:$PATH" + + # Add Dockerfile and argparse.bash script # and export environment variables diff --git a/docker/logan_base/meta.yml b/docker/logan_base/meta.yml index c2ad4cf..473f5d1 100644 --- a/docker/logan_base/meta.yml +++ b/docker/logan_base/meta.yml @@ -1,4 +1,4 @@ dockerhub_namespace: dnousome image_name: ccbr_logan_base -version: v0.3.2 +version: v0.3.3 container: "$(dockerhub_namespace)/$(image_name):$(version)" From a26c05a134a084fefc7828cde465b69f2104be56 Mon Sep 17 00:00:00 2001 From: Darryl Nousome Date: Wed, 15 Nov 2023 09:15:54 -0500 Subject: [PATCH 10/11] fix: build error --- docker/logan_base/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/logan_base/Dockerfile b/docker/logan_base/Dockerfile index 2f85c23..3ed4fb6 100644 --- a/docker/logan_base/Dockerfile +++ b/docker/logan_base/Dockerfile @@ -210,7 +210,7 @@ RUN wget https://github.com/CSB5/lofreq/raw/master/dist/lofreq_star-2.1.5_linux- ENV PATH="/opt2/lofreq_star-2.1.5_linux-x86-64/bin/:$PATH" # MUSE RUN wget -O muse_2.0.4.tar.gz https://github.com/wwylab/MuSE/archive/refs/tags/v2.0.4.tar.gz \ - && tar -xzf v2.0.4.tar.gz \ + && tar -xzf muse_2.0.4.tar.gz \ && cd MuSE-2.0.4 \ && ./install_muse.sh \ && mv MuSE /opt2/ \ From cd48fd1d8d71348c09258f701ddd44c49f6cca4c Mon Sep 17 00:00:00 2001 From: Darryl Nousome Date: Wed, 15 Nov 2023 16:42:32 -0500 Subject: [PATCH 11/11] feat: mergevcfsandgenotype and add docker --- docker/logan_base/Dockerfile | 12 +- nextflow.config | 2 +- workflow/modules/variant_calling.nf | 121 +++++++++++++------- workflow/modules/variant_calling_tonly.nf | 55 +++++++-- workflow/modules/workflows.nf | 132 +++++++++++++--------- workflow/modules/workflows_tonly.nf | 58 +++++----- 6 files changed, 241 insertions(+), 139 deletions(-) diff --git a/docker/logan_base/Dockerfile b/docker/logan_base/Dockerfile index 3ed4fb6..55832b5 100644 --- a/docker/logan_base/Dockerfile +++ b/docker/logan_base/Dockerfile @@ -52,15 +52,9 @@ RUN wget https://github.com/broadinstitute/gatk/releases/download/4.3.0.0/gatk-4 && /opt2/gatk-4.3.0.0/gatk --list ENV PATH="/opt2/gatk-4.3.0.0:$PATH" -# Install last release of GATK3 (GATK/3.8-1) -# Only being used for the CombineVariants -# command that is not available in GATK4 -# Available via env variable: $GATK_JAR -# Requires Java8 or 1.8 -RUN wget https://storage.googleapis.com/gatk-software/package-archive/gatk/GenomeAnalysisTK-3.8-1-0-gf15c1c3ef.tar.bz2 \ - && tar -xvjf /opt2/GenomeAnalysisTK-3.8-1-0-gf15c1c3ef.tar.bz2 \ - && rm /opt2/GenomeAnalysisTK-3.8-1-0-gf15c1c3ef.tar.bz2 -ENV GATK_JAR="/opt2/GenomeAnalysisTK-3.8-1-0-gf15c1c3ef/GenomeAnalysisTK.jar" +# Use DISCVRSeq For CombineVariants Replacement +RUN wget https://github.com/BimberLab/DISCVRSeq/releases/download/1.3.61/DISCVRSeq-1.3.61.jar +ENV DISCVRSeq_JAR="/opt2/DISCVRSeq-1.3.61.jar" # Install dependencies needed to add a new repository over HTTPS RUN DEBIAN_FRONTEND=noninteractive apt-get install -y \ diff --git a/nextflow.config b/nextflow.config index 2f094e7..d868ce3 100644 --- a/nextflow.config +++ b/nextflow.config @@ -229,7 +229,7 @@ profiles { } withName: 'octopus_tn|octopus_tonly' { container = 'docker://dancooke/octopus:latest' - memory=70.GB + memory=72.GB time=24.h cpus=16 } diff --git a/workflow/modules/variant_calling.nf b/workflow/modules/variant_calling.nf index d87c33c..8354cc8 100644 --- a/workflow/modules/variant_calling.nf +++ b/workflow/modules/variant_calling.nf @@ -224,17 +224,17 @@ process mutect2filter { publishDir(path: "${outdir}/vcfs/mutect2", mode: 'copy') input: - tuple val(sample), path(mutvcfs), path(stats), path(obs), path(pileups), path(normal_pileups),path(tumorcontamination),path(normalcontamination) + tuple val(sample), path(mutvcfs), path(stats), path(obs), + path(pileups), path(normal_pileups),path(tumorcontamination),path(normalcontamination) output: - tuple val(sample), path("${sample}.mut2.marked.vcf.gz"), - path("${sample}.mut2.norm.vcf.gz"), + tuple val(sample), + path("${sample}.mut2.marked.vcf.gz"), path("${sample}.mut2.marked.vcf.gz.tbi"), + path("${sample}.mut2.norm.vcf.gz"), path("${sample}.mut2.norm.vcf.gz.tbi"), path("${sample}.mut2.marked.vcf.gz.filteringStats.tsv") script: - //Include the stats and concat ${mutvcfs} -Oz -o ${sample}.concat.vcf.gz mut2in = mutvcfs.join(" -I ") - """ gatk GatherVcfs -I ${mut2in} -O ${sample}.concat.vcf.gz gatk IndexFeatureFile -I ${sample}.concat.vcf.gz @@ -258,12 +258,13 @@ process mutect2filter { awk '{{gsub(/\\y[W|K|Y|R|S|M]\\y/,"N",\$4); OFS = "\\t"; print}}' |\ sed '/^\$/d' > ${sample}.mut2.norm.vcf |\ bcftools view - -Oz -o ${sample}.mut2.norm.vcf.gz + bcftools index -t ${sample}.mut2.norm.vcf.gz """ stub: """ - touch ${sample}.mut2.marked.vcf.gz - touch ${sample}.mut2.norm.vcf.gz + touch ${sample}.mut2.marked.vcf.gz ${sample}.mut2.marked.vcf.gz.tbi + touch ${sample}.mut2.norm.vcf.gz ${sample}.mut2.norm.vcf.gz.tbi touch ${sample}.mut2.marked.vcf.gz.filteringStats.tsv """ @@ -395,7 +396,7 @@ process octopus_tn { output: - tuple val(tumorname), + tuple val("${tumorname}_vs_${normalname}"), path("${tumorname}_vs_${normalname}_${bed.simpleName}.octopus.vcf.gz") script: @@ -506,8 +507,11 @@ process combineVariants { output: tuple val(sample), - path("${vc}/${sample}.${vc}.marked.vcf.gz"), path("${vc}/${sample}.${vc}.norm.vcf.gz") - + path("${vc}/${sample}.${vc}.marked.vcf.gz"), + path("${vc}/${sample}.${vc}.marked.vcf.gz.tbi"), + path("${vc}/${sample}.${vc}.norm.vcf.gz"), + path("${vc}/${sample}.${vc}.norm.vcf.gz.tbi") + script: vcfin = inputvcf.join(" -I ") @@ -518,13 +522,16 @@ process combineVariants { -D $GENOMEDICT \ -I $vcfin bcftools sort ${sample}.${vc}.temp.vcf.gz -Oz -o ${sample}.${vc}.marked.vcf.gz - bcftools norm ${sample}.${vc}.marked.vcf.gz --threads $task.cpus --check-ref s -f $GENOMEREF -O v |\ + bcftools norm ${sample}.${vc}.marked.vcf.gz -m- --threads $task.cpus --check-ref s -f $GENOMEREF -O v |\ awk '{{gsub(/\\y[W|K|Y|R|S|M]\\y/,"N",\$4); OFS = "\\t"; print}}' |\ sed '/^\$/d' > ${sample}.${vc}.temp.vcf bcftools view ${sample}.${vc}.temp.vcf -f PASS -Oz -o ${vc}/${sample}.${vc}.norm.vcf.gz mv ${sample}.${vc}.marked.vcf.gz ${vc} + + bcftools index ${vc}/${sample}.${vc}.marked.vcf.gz -t + bcftools index ${vc}/${sample}.${vc}.norm.vcf.gz -t """ stub: @@ -533,7 +540,8 @@ process combineVariants { mkdir ${vc} touch ${vc}/${sample}.${vc}.marked.vcf.gz touch ${vc}/${sample}.${vc}.norm.vcf.gz - + touch ${vc}/${sample}.${vc}.marked.vcf.gz.tbi + touch ${vc}/${sample}.${vc}.norm.vcf.gz.tbi """ } @@ -559,8 +567,7 @@ process bcftools_index_octopus { stub: """ - touch ${vcf} - touch ${vcf}.tbi + touch ${vcf} ${vcf}.tbi """ } @@ -574,7 +581,10 @@ process combineVariants_octopus { output: tuple val(sample), - path("${vc}/${sample}.${vc}.marked.vcf.gz"), path("${vc}/${sample}.${vc}.norm.vcf.gz") + path("${vc}/${sample}.${vc}.marked.vcf.gz"), + path("${vc}/${sample}.${vc}.marked.vcf.gz.tbi"), + path("${vc}/${sample}.${vc}.norm.vcf.gz"), + path("${vc}/${sample}.${vc}.norm.vcf.gz.tbi") script: vcfin = vcfs.join(" ") @@ -583,13 +593,16 @@ process combineVariants_octopus { mkdir ${vc} bcftools concat $vcfin -a -Oz -o ${sample}.${vc}.temp.vcf.gz bcftools sort ${sample}.${vc}.temp.vcf.gz -Oz -o ${sample}.${vc}.marked.vcf.gz - bcftools norm ${sample}.${vc}.marked.vcf.gz --threads $task.cpus --check-ref s -f $GENOMEREF -O v |\ + bcftools norm ${sample}.${vc}.marked.vcf.gz -m- --threads $task.cpus --check-ref s -f $GENOMEREF -O v |\ awk '{{gsub(/\\y[W|K|Y|R|S|M]\\y/,"N",\$4); OFS = "\\t"; print}}' |\ sed '/^\$/d' > ${sample}.${vc}.temp.vcf bcftools view ${sample}.${vc}.temp.vcf -f PASS -Oz -o ${vc}/${sample}.${vc}.norm.vcf.gz mv ${sample}.${vc}.marked.vcf.gz ${vc} + + bcftools index ${vc}/${sample}.${vc}.marked.vcf.gz -t + bcftools index ${vc}/${sample}.${vc}.norm.vcf.gz -t """ stub: @@ -598,16 +611,14 @@ process combineVariants_octopus { mkdir ${vc} touch ${vc}/${sample}.${vc}.marked.vcf.gz touch ${vc}/${sample}.${vc}.norm.vcf.gz + touch ${vc}/${sample}.${vc}.marked.vcf.gz.tbi + touch ${vc}/${sample}.${vc}.norm.vcf.gz.tbi """ } - - - - process combineVariants_strelka { //Concat all somatic snvs/indels across all files, strelka separates snv/indels label 'process_mid' @@ -617,7 +628,9 @@ process combineVariants_strelka { tuple val(sample), path(strelkasnvs), path(strelkaindels) output: - tuple val(sample), path("${sample}.strelka.vcf.gz"),path("${sample}.filtered.strelka.vcf.gz") + tuple val(sample), + path("${sample}.strelka.vcf.gz"),path("${sample}.strelka.vcf.gz.tbi"), + path("${sample}.filtered.strelka.vcf.gz"),path("${sample}.filtered.strelka.vcf.gz.tbi") script: @@ -628,29 +641,72 @@ process combineVariants_strelka { """ bcftools concat $vcfin $indelsin --threads $task.cpus -Oz -o ${sample}.temp.strelka.vcf.gz - bcftools sort ${sample}.temp.strelka.vcf.gz -Oz -o ${sample}.strelka.vcf.gz + bcftools norm ${sample}.temp.strelka.vcf.gz -m- --threads $task.cpus --check-ref s -f $GENOMEREF -O v |\ + awk '{{gsub(/\\y[W|K|Y|R|S|M]\\y/,"N",\$4); OFS = "\\t"; print}}' |\ + sed '/^\$/d' > ${sample}.temp1.strelka.vcf.gz + + bcftools sort ${sample}.temp1.strelka.vcf.gz -Oz -o ${sample}.strelka.vcf.gz bcftools view ${sample}.strelka.vcf.gz --threads $task.cpus -f PASS -Oz -o ${sample}.filtered.strelka.vcf.gz + bcftools index ${sample}.strelka.vcf.gz -t + bcftools index ${sample}.filtered.strelka.vcf.gz -t """ stub: """ - touch ${sample}.strelka.vcf.gz - touch ${sample}.filtered.strelka.vcf.gz + touch ${sample}.strelka.vcf.gz ${sample}.strelka.vcf.gz.tbi + touch ${sample}.filtered.strelka.vcf.gz ${sample}.filtered.strelka.vcf.gz.tbi """ } +process somaticcombine { + label 'process_mid' + publishDir(path: "${outdir}/vcfs/combined", mode: 'copy') + + input: + tuple val(tumorsample), val(normal), + val(callers), + path(vcfs), path(vcfindex) + + output: + tuple val(tumorsample), val(normal), + path("${tumorsample}_combined.vcf.gz"), + path("${tumorsample}_combined.vcf.gz.tbi") + + script: + vcfin1=[callers, vcfs].transpose().collect { a, b -> a + " " + b } + vcfin2="-V:" + vcfin1.join(" -V:") + println vcfin2 + + """ + java -jar DISCVRSeq-1.3.61.jar MergeVcfsAndGenotypes \ + -R $GENOMEREF \ + --genotypeMergeOption PRIORITIZE \ + --priority_list mutect2,strelka,octopus,muse,lofreq,vardict,varscan \ + --filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED + -O ${tumorsample}_combined.vcf.gz \ + $vcfin2 + """ + + stub: + + """ + touch ${tumorsample}_combined.vcf.gz + touch ${tumorsample}_combined.vcf.gz.tbi + """ + +} process annotvep_tn { publishDir(path: "${outdir}/mafs/", mode: 'copy') input: tuple val(tumorsample), val(normalsample), - val(vc), path(tumorvcf) + val(vc), path(tumorvcf),path(vcfindex) output: path("paired/${vc}/${tumorsample}.maf") @@ -739,18 +795,3 @@ process combinemafs_tn { } - -/* -process combineVariants_allcallers { - - publishDir(path: "${outdir}/vcfs/", mode: 'copy') - - input: - tuple val(sample), path(inputvcf), val(vc) - - output: - tuple val(sample), - path("${vc}/${sample}.${vc}.marked.vcf.gz"), path("${vc}/${sample}.${vc}.norm.vcf.gz") - -} -*/ \ No newline at end of file diff --git a/workflow/modules/variant_calling_tonly.nf b/workflow/modules/variant_calling_tonly.nf index 33d5009..3da5360 100644 --- a/workflow/modules/variant_calling_tonly.nf +++ b/workflow/modules/variant_calling_tonly.nf @@ -186,8 +186,9 @@ process mutect2filter_tonly { input: tuple val(sample), path(mutvcfs), path(stats), path(obs), path(pileups),path(tumorcontamination) output: - tuple val(sample), path("${sample}.tonly.mut2.marked.vcf.gz"), - path("${sample}.tonly.mut2.norm.vcf.gz"), + tuple val(sample), + path("${sample}.tonly.mut2.marked.vcf.gz"),path("${sample}.tonly.mut2.marked.vcf.gz.tbi"), + path("${sample}.tonly.mut2.norm.vcf.gz"),path("${sample}.tonly.mut2.norm.vcf.gz.tbi"), path("${sample}.tonly.mut2.marked.vcf.gz.filteringStats.tsv") script: @@ -217,13 +218,14 @@ process mutect2filter_tonly { awk '{{gsub(/\\y[W|K|Y|R|S|M]\\y/,"N",\$4); OFS = "\t"; print}}' |\ sed '/^\$/d' |\ bcftools view - -Oz -o ${sample}.tonly.mut2.norm.vcf.gz + bcftools index -t ${sample}.tonly.mut2.norm.vcf.gz """ stub: """ - touch ${sample}.tonly.mut2.marked.vcf.gz - touch ${sample}.tonly.mut2.norm.vcf.gz + touch ${sample}.tonly.mut2.marked.vcf.gz ${sample}.tonly.mut2.marked.vcf.gz.tbi + touch ${sample}.tonly.mut2.norm.vcf.gz ${sample}.tonly.mut2.norm.vcf.gz.tbi touch ${sample}.tonly.mut2.marked.vcf.gz.filteringStats.tsv """ } @@ -310,7 +312,7 @@ process octopus_tonly { output: tuple val(tumorname), - path("${tumorname}_${bed.simpleName}.octopus.vcf.gz") + path("${tumorname}_${bed.simpleName}.tonly.octopus.vcf.gz") script: @@ -318,25 +320,62 @@ process octopus_tonly { octopus -R $GENOMEREF -C cancer -I ${tumor} \ --annotations AC AD DP -t ${bed} \ $SOMATIC_FOREST \ - -o ${tumorname}_${bed.simpleName}.octopus.vcf.gz --threads $task.cpus + -o ${tumorname}_${bed.simpleName}.tonly.octopus.vcf.gz --threads $task.cpus """ stub: """ - touch ${tumorname}_${bed.simpleName}.octopus.vcf.gz + touch ${tumorname}_${bed.simpleName}.tonly.octopus.vcf.gz """ } +process somaticcombine_tonly { + label 'process_mid' + publishDir(path: "${outdir}/vcfs/combined_tonly", mode: 'copy') + + input: + tuple val(tumorsample), + val(callers), + path(vcfs), path(vcfindex) + + output: + tuple val(tumorsample), + path("${tumorsample}_combined_tonly.vcf.gz"), + path("${tumorsample}_combined_tonly.vcf.gz.tbi") + + script: + vcfin1=[callers, vcfs].transpose().collect { a, b -> a + " " + b } + vcfin2="-V:" + vcfin1.join(" -V:") + println vcfin2 + + """ + java -jar DISCVRSeq-1.3.61.jar MergeVcfsAndGenotypes \ + -R $GENOMEREF \ + --genotypeMergeOption PRIORITIZE \ + --priority_list mutect2,octopus,vardict,varscan \ + --filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED + -O ${tumorsample}_combined.vcf.gz \ + $vcfin2 + """ + + stub: + """ + touch ${tumorsample}_combined_tonly.vcf.gz ${tumorsample}_combined_tonly.vcf.gz.tbi + """ + +} + process annotvep_tonly { publishDir("${outdir}/mafs", mode: "copy") input: tuple val(tumorsample), - val(vc), path(tumorvcf) + val(vc), path(tumorvcf), + path(vcfindex) output: diff --git a/workflow/modules/workflows.nf b/workflow/modules/workflows.nf index b4b8646..59343cc 100644 --- a/workflow/modules/workflows.nf +++ b/workflow/modules/workflows.nf @@ -27,7 +27,8 @@ include {mutect2; mutect2filter; pileup_paired_t; pileup_paired_n; annotvep_tn as annotvep_tn_mut2; annotvep_tn as annotvep_tn_strelka; annotvep_tn as annotvep_tn_varscan; annotvep_tn as annotvep_tn_vardict; annotvep_tn as annotvep_tn_octopus; annotvep_tn as annotvep_tn_lofreq; annotvep_tn as annotvep_tn_muse; - combinemafs_tn} from './variant_calling.nf' + annotvep_tn as annotvep_tn_combined; + combinemafs_tn; somaticcombine} from './variant_calling.nf' include {mutect2_t_tonly; mutect2filter_tonly; varscan_tonly; vardict_tonly; octopus_tonly; @@ -36,7 +37,8 @@ include {mutect2_t_tonly; mutect2filter_tonly; mergemut2stats_tonly; annotvep_tonly as annotvep_tonly_varscan; annotvep_tonly as annotvep_tonly_vardict; annotvep_tonly as annotvep_tonly_mut2; annotvep_tonly as annotvep_tonly_octopus; - combinemafs_tonly} from './variant_calling_tonly.nf' + annotvep_tonly as annotvep_tonly_combined; + combinemafs_tonly;somaticcombine_tonly} from './variant_calling_tonly.nf' include {svaba_somatic; manta_somatic; survivor_sv; gunzip; @@ -199,8 +201,6 @@ workflow VC { .join(mergemut2stats.out) .join(learnreadorientationmodel.out) .join(contamination_paired.out) - mutect2filter(mut2tn_filter) - //Tumor Only Calling bambyinterval_t=bambyinterval.map{tumorname,tumor,tumorbai,normalname,normalbam,normalbai,bed ->tuple(tumorname,tumor,tumorbai,bed)} @@ -235,87 +235,109 @@ workflow VC { .join(learnreadorientationmodel_tonly.out) .join(contamination_tumoronly.out) - mutect2filter_tonly(mut2tonly_filter) - mutect2filter.out - .join(sample_sheet) - .map{tumor,markedvcf,finalvcf,stats,normal -> tuple(tumor,normal,"mutect2",finalvcf)} | annotvep_tn_mut2 - - mutect2filter_tonly.out - .join(sample_sheet) - .map{tumor,markedvcf,finalvcf,stats,normal -> tuple(tumor,"mutect2",finalvcf)} | annotvep_tonly_mut2 + + //Annotation) + mutect2_in=mutect2filter(mut2tn_filter) + | join(sample_sheet) + | map{tumor,markedvcf,markedindex,normvcf,normindex,stats,normal -> tuple(tumor,normal,"mutect2",normvcf,normindex)} + annotvep_tn_mut2(mutect2_in) - //Strelka + + mutect2_in_tonly=mutect2filter_tonly(mut2tonly_filter) + | join(sample_sheet) + | map{tumor,markedvcf,markedindex,normvcf,normindex, stats,normal -> tuple(tumor,"mutect2",normvcf,normindex)} + annotvep_tonly_mut2(mutect2_in_tonly) + + //Strelka TN strelka_tn(bambyinterval) strelkaout=strelka_tn.out.groupTuple() .map { samplename,vcfs,indels -> tuple( samplename, vcfs.toSorted{ it -> (it.name =~ /${samplename}_(.*?).somatic.snvs.vcf.gz/)[0][1].toInteger() }, indels.toSorted{ it -> (it.name =~ /${samplename}_(.*?).somatic.indels.vcf.gz/)[0][1].toInteger() } )} - combineVariants_strelka(strelkaout) - combineVariants_strelka.out.join(sample_sheet) - .map{tumor,markedvcf,finalvcf,normal -> tuple(tumor,normal,"strelka",finalvcf)} | annotvep_tn_strelka + strelka_in=combineVariants_strelka(strelkaout) | join(sample_sheet) + | map{tumor,markedvcf,markedindex,finalvcf,finalindex,normal -> tuple(tumor,normal,"strelka",finalvcf,finalindex)} + annotvep_tn_strelka(strelka_in) //Vardict vardict_comb=vardict_tn(bambyinterval).groupTuple().map{tumor,vcf-> tuple(tumor,vcf,"vardict")} | combineVariants_vardict - vardict_comb.join(sample_sheet) - .map{tumor,marked,normvcf,normal ->tuple(tumor,normal,"vardict",normvcf)} | annotvep_tn_vardict + vardict_in=vardict_comb.join(sample_sheet) + .map{tumor,marked,markedindex,normvcf,normindex,normal ->tuple(tumor,normal,"vardict",normvcf,normindex)} + annotvep_tn_vardict(vardict_in) //VarDict_tonly vardict_tonly_comb=bambyinterval.map{tumorname,tumorbam,tumorbai,normname,normbam,normbai,bed -> tuple(tumorname,tumorbam,tumorbai,bed)} - vardict_tonly(vardict_tonly_comb).groupTuple().map{tumor,vcf-> tuple(tumor,vcf,"vardict_tonly")} |combineVariants_vardict_tonly - combineVariants_vardict_tonly.out.join(sample_sheet) - .map{tumor,marked,normvcf,normal ->tuple(tumor,"vardict_tonly",normvcf)} | annotvep_tonly_vardict - + vardict_tonly(vardict_tonly_comb).groupTuple().map{tumor,vcf-> tuple(tumor,vcf,"vardict_tonly")} | combineVariants_vardict_tonly + + vardict_in_tonly=combineVariants_vardict_tonly.out.join(sample_sheet) + .map{tumor,marked,markedindex,normvcf,normindex,normal ->tuple(tumor,"vardict_tonly",normvcf,normindex)} + annotvep_tonly_vardict(vardict_in_tonly) + //VarScan TN - varscan_in=bambyinterval.join(contamination_paired.out) - varscan_comb=varscan_tn(varscan_in).groupTuple().map{tumor,vcf-> tuple(tumor,vcf,"varscan")} | combineVariants_varscan - varscan_comb.join(sample_sheet) - .map{tumor,marked,normvcf,normal ->tuple(tumor,normal,"varscan",normvcf)} | annotvep_tn_varscan - - //VarScan_TOnly - varscan_tonly_comb=varscan_in.map{tumor,bam,bai,normal,nbam,nbai,bed,tpile,npile,tumorc,normalc -> - tuple(tumor,bam,bai,bed,tpile,tumorc)} | varscan_tonly - varscan_tonly_comb1=varscan_tonly_comb.groupTuple().map{tumor,vcf-> tuple(tumor,vcf,"varscan_tonly")} | combineVariants_varscan_tonly + varscan_in=bambyinterval.join(contamination_paired.out) + | varscan_tn | groupTuple() |map{tumor,vcf-> tuple(tumor,vcf,"varscan")} | combineVariants_varscan + | join(sample_sheet) + | map{tumor,marked,markedindex,normvcf,normindex,normal ->tuple(tumor,normal,"varscan",normvcf,normindex)} + annotvep_tn_varscan(varscan_in) - varscan_tonly_comb1.join(sample_sheet) - .map{tumor,marked,normvcf,normal ->tuple(tumor,"varscan_tonly",normvcf)} | annotvep_tonly_varscan - + //VarScan_TOnly + varscan_in_tonly=bambyinterval.join(contamination_paired.out) + | map{tumor,bam,bai,normal,nbam,nbai,bed,tpile,npile,tumorc,normalc -> + tuple(tumor,bam,bai,bed,tpile,tumorc)} | varscan_tonly + | groupTuple() | map{tumor,vcf-> tuple(tumor,vcf,"varscan_tonly")} | combineVariants_varscan_tonly + | join(sample_sheet) + | map{tumor,marked,markedindex,normvcf,normindex,normal ->tuple(tumor,"varscan_tonly",normvcf,normindex)} + annotvep_tonly_varscan(varscan_in_tonly) + //Lofreq TN - lofreq_tn(bambyinterval).groupTuple().map{tumor,snv,dbsnv,indel,dbindel,vcf-> tuple(tumor,vcf,"lofreq")} - | combineVariants_lofreq | join(sample_sheet)| map{tumor,marked,normvcf,normal ->tuple(tumor,normal,"lofreq",normvcf)} - | annotvep_tn_lofreq + lofreq_in=lofreq_tn(bambyinterval).groupTuple().map{tumor,snv,dbsnv,indel,dbindel,vcf-> tuple(tumor,vcf,"lofreq")} + | combineVariants_lofreq | join(sample_sheet) + | map{tumor,marked,markedindex,normvcf,normindex,normal->tuple(tumor,normal,"lofreq",normvcf,normindex)} + annotvep_tn_lofreq(lofreq_in) //MuSE TN - muse_tn(bamwithsample).groupTuple().map{tumor,vcf-> tuple(tumor,vcf,"muse")} - | combineVariants_muse | join(sample_sheet)| map{tumor,marked,normvcf,normal ->tuple(tumor,normal,"muse",normvcf)} - | annotvep_tn_muse + muse_in=muse_tn(bamwithsample).groupTuple().map{tumor,vcf-> tuple(tumor,vcf,"muse")} + | combineVariants_muse | join(sample_sheet) + | map{tumor,marked,markedindex,normvcf,normindex,normal ->tuple(tumor,normal,"muse",normvcf,normindex)} + annotvep_tn_muse(muse_in) //Octopus_TN - octopus_annotin=octopus_tn(bambyinterval) | bcftools_index_octopus - | groupTuple() |map{tumor,vcf,vcfindex-> tuple(tumor,vcf,vcfindex,"octopus")} - | combineVariants_octopus | join(sample_sheet)|map{tumor,marked,normvcf,normal ->tuple(tumor,normal,"octopus",normvcf)} - annotvep_tn_octopus(octopus_annotin) + octopus_in=octopus_tn(bambyinterval) | bcftools_index_octopus + | groupTuple() | map{samplename,vcf,vcfindex-> tuple(samplename,vcf.toSorted{it->(it.name =~ /${samplename}_(.*).octopus.vcf.gz/)[0][1].toInteger()},vcfindex,"octopus")} + | combineVariants_octopus | map{samplename,marked,markedindex,normvcf,normindex -> + tuple(samplename.split('_vs_')[0],samplename.split('_vs_')[1],"octopus",normvcf,normindex)} + annotvep_tn_octopus(octopus_in) - //Octopus_TOnly - octopus_tonly_out=bambyinterval.map{tumor,bam,bai,normal,nbam,nbai,bed-> + octopus_in_tonly=bambyinterval.map{tumor,bam,bai,normal,nbam,nbai,bed-> tuple(tumor,bam,bai,bed)} | octopus_tonly | bcftools_index_octopus_tonly - octopus_tonly_comb=octopus_tonly_out.groupTuple().map{tumor,vcf,vcfindex-> tuple(tumor,vcf,vcfindex,"octopus_tonly")} - | combineVariants_octopus_tonly - - octopus_tonly_comb.join(sample_sheet) | - map{tumor,marked,normvcf,normal ->tuple(tumor,"octopus_tonly",normvcf)} | annotvep_tonly_octopus + | groupTuple() + | map{samplename,vcf,vcfindex->tuple(samplename,vcf.toSorted{it->(it.name =~ /${samplename}_(.*).tonly.octopus.vcf.gz/)[0][1].toInteger()},vcfindex,"octopus_tonly")} + | combineVariants_octopus_tonly + | join(sample_sheet) | + map{tumor,marked,markedindex,normvcf,normindex,normal ->tuple(tumor,"octopus_tonly",normvcf,normindex)} + annotvep_tonly_octopus(octopus_in_tonly) //Combine All Variants Using VCF and Then Reannotate - //annotvep_tn_mut2.out.concat(annotvep_tn_strelka.out).concat(annotvep_tn_vardict.out).concat(annotvep_tn_varscan.out) | combinemafs_tn - //annotvep_tonly_mut2.out.concat(annotvep_tonly_vardict.out).concat(annotvep_tonly_varscan.out) | combinemafs_tonly + mutect2_in|concat(strelka_in)|concat(octopus_in)|concat(muse_in)|concat(lofreq_in) + | concat(vardict_in) |concat(varscan_in)|groupTuple() + | somaticcombine + | map{tumor,normal,vcf,index ->tuple(tumor,normal,"combined",vcf,index)} + | annotvep_tn_combined + + mutect2_in_tonly|concat(octopus_in_tonly) + | concat(vardict_in_tonly)|concat(varscan_in_tonly) + | somaticcombine_tonly + | map{tumor,vcf,index ->tuple(tumor,"combined_tonly",vcf,index)} + | annotvep_tonly_combined + //Implement PCGR Annotator/CivIC Next emit: - somaticcall_input=octopus_annotin - + somaticcall_input=octopus_in + } diff --git a/workflow/modules/workflows_tonly.nf b/workflow/modules/workflows_tonly.nf index 5143cbe..af05fe1 100644 --- a/workflow/modules/workflows_tonly.nf +++ b/workflow/modules/workflows_tonly.nf @@ -32,7 +32,8 @@ include {mutect2_t_tonly; mutect2filter_tonly; pileup_paired_tonly; mergemut2stats_tonly; annotvep_tonly as annotvep_tonly_varscan; annotvep_tonly as annotvep_tonly_vardict; annotvep_tonly as annotvep_tonly_mut2; annotvep_tonly as annotvep_tonly_octopus; - combinemafs_tonly} from './variant_calling_tonly.nf' + annotvep_tonly as annotvep_tonly_combined; + combinemafs_tonly; somaticcombine_tonly} from './variant_calling_tonly.nf' include {manta_tonly; svaba_tonly; survivor_sv; gunzip; annotsv_tonly as annotsv_manta_tonly; annotsv_tonly as annotsv_svaba_tonly; @@ -167,36 +168,41 @@ workflow VC_TONLY { .join(learnreadorientationmodel_tonly.out) .join(contamination_tumoronly.out) - mutect2filter_tonly(mut2tonly_filter) - - //Annotate - mutect2filter_tonly.out - .join(sample_sheet) - .map{tumor,markedvcf,finalvcf,stats -> tuple(tumor,"mutect2",finalvcf)} | annotvep_tonly_mut2 + mutect2_tonly_in=mutect2filter_tonly(mut2tonly_filter) + | join(sample_sheet) + | map{tumor,markedvcf,markedindex,finalvcf,finalindex,stats -> tuple(tumor,"mutect2",finalvcf,finalindex)} + annotvep_tonly_mut2(mutect2_tonly_in) - //VarDict_tonly - vardict_tonly(bambyinterval).groupTuple().map{tumor,vcf-> tuple(tumor,vcf,"vardict_tonly")} | combineVariants_vardict_tonly - combineVariants_vardict_tonly.out.join(sample_sheet) - .map{tumor,marked,normvcf ->tuple(tumor,"vardict_tonly",normvcf)} | annotvep_tonly_vardict + //VarDict + vardict_in_tonly=vardict_tonly(bambyinterval) | groupTuple()| map{tumor,vcf -> tuple(tumor,vcf,"vardict_tonly")} + | combineVariants_vardict_tonly + | join(sample_sheet) + | map{tumor,marked,markedindex,normvcf,normindex ->tuple(tumor,"vardict_tonly",normvcf,normindex)} + annotvep_tonly_vardict(vardict_in_tonly) //VarScan_tonly - varscan_in=bambyinterval.join(contamination_tumoronly.out) - varscan_tonly_comb=varscan_tonly(varscan_in).groupTuple().map{tumor,vcf-> tuple(tumor,vcf,"varscan")} | combineVariants_varscan_tonly - - varscan_tonly_comb.join(sample_sheet) - .map{tumor,marked,normvcf ->tuple(tumor,"varscan_tonly",normvcf)} | annotvep_tonly_varscan + varscan_in_tonly=bambyinterval.join(contamination_tumoronly.out) + | varscan_tonly | groupTuple() | map{tumor,vcf-> tuple(tumor,vcf,"varscan")} + | combineVariants_varscan_tonly + | join(sample_sheet) + | map{tumor,marked,markedindex,normvcf,normindex ->tuple(tumor,"varscan_tonly",normvcf,normindex)} + annotvep_tonly_varscan(varscan_in_tonly) //Octopus_tonly - octopus_tonly_comb=bambyinterval.map{tumor,bam,bai,bed-> - tuple(tumor,bam,bai,bed)} | octopus_tonly | bcftools_index_octopus - octopus_tonly_comb1=octopus_tonly_comb.groupTuple().map{tumor,vcf,vcfindex-> tuple(tumor,vcf,vcfindex, "octopus_tonly")} | combineVariants_octopus - - octopus_tonly_comb1.join(sample_sheet) - .map{tumor,marked,normvcf ->tuple(tumor,"octopus_tonly",normvcf)} | annotvep_tonly_octopus - - - //Combine All Final - //annotvep_tonly_mut2.out.concat(annotvep_tonly_vardict.out).concat(annotvep_tonly_varscan.out) | combinemafs_tonly + octopus_in_tonly=bambyinterval | octopus_tonly | bcftools_index_octopus + | groupTuple() + | map{tumor,vcf,vcfindex -> tuple(tumor,vcf.toSorted{it -> it.name} + ,vcfindex, "octopus_tonly")} + | combineVariants_octopus | join(sample_sheet) + | map{tumor,marked,markedindex,normvcf,normindex ->tuple(tumor,"octopus_tonly",normvcf,normindex)} + annotvep_tonly_octopus(octopus_in_tonly) + + + mutect2_tonly_in|concat(octopus_in_tonly) + | concat(vardict_in_tonly)|concat(varscan_in_tonly) + | somaticcombine_tonly + | map{tumor,vcf,index ->tuple(tumor,"combined_tonly",vcf,index)} + | annotvep_tonly_combined emit: