diff --git a/bin/strelka_convert.py b/bin/strelka_convert.py index e0e6a34..4b5d4fe 100755 --- a/bin/strelka_convert.py +++ b/bin/strelka_convert.py @@ -5,6 +5,7 @@ import gzip import os +##Adapted from https://github.com/bcbio/bcbio-nextgen/blob/72f42706faa5cfe4f0680119bf148e0bdf2b78ba/bcbio/variation/strelka2.py#L30 def _tumor_normal_genotypes(ref, alt, info): """Retrieve standard 0/0, 0/1, 1/1 style genotypes from INFO field. @@ -76,25 +77,30 @@ def _af_annotate_and_filter(in_file,out_file): ID='AF', Number='1',Type='Float', Description='Allele frequency, as calculated in bcbio: AD/DP (germline), U/DP (somatic snps), TIR/DPI (somatic indels)' )) + vcf.add_format_to_header(dict( + ID='AD', Number='.',Type='Integer', + Description='Depth of reads supporting alleles' + )) writer = cyvcf2.Writer(out_file, vcf) for rec in vcf: - #print(rec) if rec.is_snp: # snps? alt_counts = rec.format(rec.ALT[0] +'U')[:,0] # {ALT}U=tier1_depth,tier2_depth + ref_counts = rec.format(rec.REF + 'U')[:,0] else: # indels alt_counts = rec.format('TIR')[:,0] # TIR=tier1_depth,tier2_depth + ref_counts = rec.format('TAR')[:,0] dp = rec.format('DP')[:,0] if dp is not None : with np.errstate(divide='ignore', invalid='ignore'): # ignore division by zero and put AF=.0 #alt_n = alt_counts_n[0]/DP_n #alt_t = alt_counts_t[0]/DP_t + ad = [ref_counts[0],alt_counts[0],ref_counts[1],alt_counts[1]] af = np.true_divide(alt_counts, dp) rec.set_format('AF',np.round(af,5)) + rec.set_format('AD',np.array(ad)) writer.write_record(rec) writer.close() -#def is_gzipped(path): -# return path.endswith(".gz") def _add_gt(in_file): ##Set genotypes now diff --git a/modules/local/variant_calling.nf b/modules/local/variant_calling.nf index e258a8d..c23215f 100644 --- a/modules/local/variant_calling.nf +++ b/modules/local/variant_calling.nf @@ -632,8 +632,8 @@ process combineVariants { script: vcfin = inputvcf.join(" -I ") //Create Tumor Normal here - samplist=sample.tokenize('_vs_') - if(samplist.size>1){ + samplist=sample.split('_vs_') + if(samplist.size()>1){ samporder = samplist.join(",") }else{ samporder = sample @@ -647,16 +647,18 @@ process combineVariants { -I $vcfin bcftools view ${sample}.${vc}.markedtemp.vcf.gz -s $samporder -Oz -o ${sample}.${vc}.marked.vcf.gz + bcftools index -t ${sample}.${vc}.marked.vcf.gz + bcftools norm ${sample}.${vc}.marked.vcf.gz -m- --threads $task.cpus --check-ref s -f $GENOMEREF -O v |\ awk '{{gsub(/\\y[W|K|Y|R|S|M|B|D|H|V]\\y/,"N",\$4); OFS = "\t"; print}}' |\ sed '/^\$/d' > ${sample}.${vc}.temp.vcf bcftools view ${sample}.${vc}.temp.vcf -f PASS -s $samporder -Oz -o ${vc}/${sample}.${vc}.norm.vcf.gz + bcftools index ${vc}/${sample}.${vc}.norm.vcf.gz -t mv ${sample}.${vc}.marked.vcf.gz ${vc} mv ${sample}.${vc}.marked.vcf.gz.tbi ${vc} - bcftools index ${vc}/${sample}.${vc}.norm.vcf.gz -t """ stub: @@ -690,8 +692,10 @@ process combineVariants_alternative { script: vcfin = vcfs.join(" ") - samplist=sample.tokenize('_vs_') - if(samplist.size>1){ + samplist=sample.split('_vs_') + if (vc.contains("lofreq")) { + samporder = samplist[0] + }else if(samplist.size()>1){ samporder = samplist.join(",") }else{ samporder = sample @@ -793,8 +797,8 @@ process combineVariants_strelka { vcfin = strelkasnvs.join(" ") indelsin = strelkaindels.join(" ") - samplist=sample.tokenize('_vs_') - if(samplist.size>1){ + samplist=sample.split('_vs_') + if(samplist.size()>1){ samporder = samplist.join(",") }else{ samporder = sample @@ -849,7 +853,7 @@ process convert_strelka { stub: """ - touch ${tumor}.filtered.strelka-fixed.vcf.gz ${tumor}.filtered.strelka-fixed.vcf.gz.tbi + touch ${tumor}_vs_${normal}.filtered.strelka-fixed.vcf.gz ${tumor}_vs_${normal}.filtered.strelka-fixed.vcf.gz.tbi """ }