Skip to content

Commit

Permalink
fix: strelka convert gt
Browse files Browse the repository at this point in the history
  • Loading branch information
dnousome committed Jul 22, 2024
1 parent 12b3b9c commit 73f8602
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 11 deletions.
12 changes: 9 additions & 3 deletions bin/strelka_convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import gzip
import os

##Adapted from https://github.com/bcbio/bcbio-nextgen/blob/72f42706faa5cfe4f0680119bf148e0bdf2b78ba/bcbio/variation/strelka2.py#L30
def _tumor_normal_genotypes(ref, alt, info):
"""Retrieve standard 0/0, 0/1, 1/1 style genotypes from INFO field.
Expand Down Expand Up @@ -76,25 +77,30 @@ def _af_annotate_and_filter(in_file,out_file):
ID='AF', Number='1',Type='Float',
Description='Allele frequency, as calculated in bcbio: AD/DP (germline), <ALT>U/DP (somatic snps), TIR/DPI (somatic indels)'
))
vcf.add_format_to_header(dict(
ID='AD', Number='.',Type='Integer',
Description='Depth of reads supporting alleles'
))
writer = cyvcf2.Writer(out_file, vcf)
for rec in vcf:
#print(rec)
if rec.is_snp: # snps?
alt_counts = rec.format(rec.ALT[0] +'U')[:,0] # {ALT}U=tier1_depth,tier2_depth
ref_counts = rec.format(rec.REF + 'U')[:,0]
else: # indels
alt_counts = rec.format('TIR')[:,0] # TIR=tier1_depth,tier2_depth
ref_counts = rec.format('TAR')[:,0]
dp = rec.format('DP')[:,0]
if dp is not None :
with np.errstate(divide='ignore', invalid='ignore'): # ignore division by zero and put AF=.0
#alt_n = alt_counts_n[0]/DP_n
#alt_t = alt_counts_t[0]/DP_t
ad = [ref_counts[0],alt_counts[0],ref_counts[1],alt_counts[1]]
af = np.true_divide(alt_counts, dp)
rec.set_format('AF',np.round(af,5))
rec.set_format('AD',np.array(ad))
writer.write_record(rec)
writer.close()

#def is_gzipped(path):
# return path.endswith(".gz")

def _add_gt(in_file):
##Set genotypes now
Expand Down
20 changes: 12 additions & 8 deletions modules/local/variant_calling.nf
Original file line number Diff line number Diff line change
Expand Up @@ -632,8 +632,8 @@ process combineVariants {
script:
vcfin = inputvcf.join(" -I ")
//Create Tumor Normal here
samplist=sample.tokenize('_vs_')
if(samplist.size>1){
samplist=sample.split('_vs_')
if(samplist.size()>1){
samporder = samplist.join(",")
}else{
samporder = sample
Expand All @@ -647,16 +647,18 @@ process combineVariants {
-I $vcfin
bcftools view ${sample}.${vc}.markedtemp.vcf.gz -s $samporder -Oz -o ${sample}.${vc}.marked.vcf.gz
bcftools index -t ${sample}.${vc}.marked.vcf.gz
bcftools norm ${sample}.${vc}.marked.vcf.gz -m- --threads $task.cpus --check-ref s -f $GENOMEREF -O v |\
awk '{{gsub(/\\y[W|K|Y|R|S|M|B|D|H|V]\\y/,"N",\$4); OFS = "\t"; print}}' |\
sed '/^\$/d' > ${sample}.${vc}.temp.vcf
bcftools view ${sample}.${vc}.temp.vcf -f PASS -s $samporder -Oz -o ${vc}/${sample}.${vc}.norm.vcf.gz
bcftools index ${vc}/${sample}.${vc}.norm.vcf.gz -t
mv ${sample}.${vc}.marked.vcf.gz ${vc}
mv ${sample}.${vc}.marked.vcf.gz.tbi ${vc}
bcftools index ${vc}/${sample}.${vc}.norm.vcf.gz -t
"""

stub:
Expand Down Expand Up @@ -690,8 +692,10 @@ process combineVariants_alternative {

script:
vcfin = vcfs.join(" ")
samplist=sample.tokenize('_vs_')
if(samplist.size>1){
samplist=sample.split('_vs_')
if (vc.contains("lofreq")) {
samporder = samplist[0]
}else if(samplist.size()>1){
samporder = samplist.join(",")
}else{
samporder = sample
Expand Down Expand Up @@ -793,8 +797,8 @@ process combineVariants_strelka {

vcfin = strelkasnvs.join(" ")
indelsin = strelkaindels.join(" ")
samplist=sample.tokenize('_vs_')
if(samplist.size>1){
samplist=sample.split('_vs_')
if(samplist.size()>1){
samporder = samplist.join(",")
}else{
samporder = sample
Expand Down Expand Up @@ -849,7 +853,7 @@ process convert_strelka {
stub:

"""
touch ${tumor}.filtered.strelka-fixed.vcf.gz ${tumor}.filtered.strelka-fixed.vcf.gz.tbi
touch ${tumor}_vs_${normal}.filtered.strelka-fixed.vcf.gz ${tumor}_vs_${normal}.filtered.strelka-fixed.vcf.gz.tbi
"""

}
Expand Down

0 comments on commit 73f8602

Please sign in to comment.