Skip to content

Commit

Permalink
Add battenberg-smchet CNV format.
Browse files Browse the repository at this point in the history
  • Loading branch information
jwintersinger committed Nov 4, 2015
1 parent 73783c3 commit bb40b88
Showing 1 changed file with 21 additions and 11 deletions.
32 changes: 21 additions & 11 deletions parser/parse_cnvs.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,8 @@ class BattenbergParser(CnvParser):
def __init__(self, bb_filename, cellularity):
self._bb_filename = bb_filename
self._cellularity = cellularity
# Used by SMC-Het parser, which has fields shifted by 1.
self._field_offset = 0

def _compute_cn(self, cnv1, cnv2):
'''
Expand All @@ -102,17 +104,17 @@ def parse(self):
header = bbf.next()
for line in bbf:
fields = line.strip().split()
chrom = fields[1].lower()
start = int(fields[2])
end = int(fields[3])
pval = float(fields[5])
chrom = fields[1 + self._field_offset].lower()
start = int(fields[2 + self._field_offset])
end = int(fields[3 + self._field_offset])
pval = float(fields[5 + self._field_offset])

cnv1 = {}
cnv1['start'] = start
cnv1['end'] = end
cnv1['major_cn'] = int(fields[8])
cnv1['minor_cn'] = int(fields[9])
cnv1['cellular_prevalence'] = float(fields[10]) * self._cellularity
cnv1['major_cn'] = int(fields[8 + self._field_offset])
cnv1['minor_cn'] = int(fields[9 + self._field_offset])
cnv1['cellular_prevalence'] = float(fields[10 + self._field_offset]) * self._cellularity

cnv2 = None
# Stefan's comment on p values: The p-values correspond "to whether a
Expand All @@ -127,9 +129,9 @@ def parse(self):
cnv2 = {}
cnv2['start'] = start
cnv2['end'] = end
cnv2['major_cn'] = int(fields[11])
cnv2['minor_cn'] = int(fields[12])
cnv2['cellular_prevalence'] = float(fields[13]) * self._cellularity
cnv2['major_cn'] = int(fields[11 + self._field_offset])
cnv2['minor_cn'] = int(fields[12 + self._field_offset])
cnv2['cellular_prevalence'] = float(fields[13 + self._field_offset]) * self._cellularity
else:
cnv1['cellular_prevalence'] = self._cellularity

Expand All @@ -138,6 +140,12 @@ def parse(self):
cn_regions[chrom].append(cnv2)
return cn_regions

class BattenbergSmchetParser(BattenbergParser):
def __init__(self, bb_filename, cellularity):
super(BattenbergSmchetParser, self).__init__(bb_filename, cellularity)
# SMC-Het Battenberg files lack the initial index column.
self._field_offset = -1

def restricted_float(x):
x = float(x)
if x < 0.0 or x > 1.0:
Expand All @@ -149,7 +157,7 @@ def main():
description='Create CNV input file for parser from Battenberg or TITAN data',
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
parser.add_argument('-f', '--cnv-format', dest='input_type', required=True, choices=('battenberg', 'titan'),
parser.add_argument('-f', '--cnv-format', dest='input_type', required=True, choices=('battenberg', 'battenberg-smchet', 'titan'),
help='Type of CNV input')
parser.add_argument('-c', '--cellularity', dest='cellularity', type=restricted_float, required=True,
help='Fraction of sample that is cancerous rather than somatic. Used only for estimating CNV confidence -- if no CNVs, need not specify argument.')
Expand All @@ -160,6 +168,8 @@ def main():

if args.input_type == 'battenberg':
parser = BattenbergParser(args.cnv_file, args.cellularity)
elif args.input_type == 'battenberg-smchet':
parser = BattenbergSmchetParser(args.cnv_file, args.cellularity)
elif args.input_type == 'titan':
parser = TitanParser(args.cnv_file, args.cellularity)
else:
Expand Down

0 comments on commit bb40b88

Please sign in to comment.