diff --git a/stranger/utils.py b/stranger/utils.py index de3fad1..c1871aa 100644 --- a/stranger/utils.py +++ b/stranger/utils.py @@ -174,7 +174,17 @@ def get_repeat_info(variant_info, repeat_info): repeat_id = variant_info['info_dict'].get('REPID') if not repeat_id: - repeat_id = variant_info['info_dict'].get('TRID').split('_')[1] + + for key in ['TRID', 'ID']: + value = variant_info['info_dict'].get(key) + if value is not None: + # Some IDs may have _, some may not + if '_' in value: + repeat_id = value.split('_')[1] + else: + repeat_id = value + break + if not repeat_id in repeat_info: LOG.warning("No info for repeat id %s", repeat_id) return None @@ -223,7 +233,17 @@ def get_trgt_repeat_res(variant_info, repeat_info): repeat_id = variant_info['info_dict'].get('REPID') if not repeat_id: - repeat_id = variant_info['info_dict'].get('TRID').split('_')[1] + + for key in ['TRID', 'ID']: + value = variant_info['info_dict'].get(key) + if value is not None: + # Some IDs may have _, some may not + if '_' in value: + repeat_id = value.split('_')[1] + else: + repeat_id = value + break + if not repeat_id in repeat_info: LOG.warning("No info for repeat id %s", repeat_id) return None @@ -236,6 +256,10 @@ def get_trgt_repeat_res(variant_info, repeat_info): for allele in mc.split(","): mcs = allele.split('_') # GT would have the index of the MC in the ALT field list if we wanted to be specific... + + # What should we do if MC is . ? + if allele == ".": + continue if len(mcs) > 1: pathologic_mcs = repeat_info[repeat_id].get('pathologic_struc', range(len(mcs))) @@ -365,8 +389,8 @@ def decompose_var(variant_info): for index, alt in enumerate(variant_info['alts']): for individual_index, format_dict in enumerate(variant_info['format_dicts']): - gts = format_dict["GT"].split("/") - + gts = re.split(r'[\/|]', format_dict["GT"]) + delimiter = "/" if "/" in format_dict["GT"] else "|" updated_fields = [] for gt_component, decomposed_field in enumerate(gts): if decomposed_field == "0": @@ -386,7 +410,7 @@ def decompose_var(variant_info): # unclear component updated_fields.append(".") - result_variants[index]['format_dicts'][individual_index]['GT'] = "/".join(updated_fields) + result_variants[index]['format_dicts'][individual_index]['GT'] = delimiter.join(updated_fields) for field, individual_value in format_dict.items(): if field in ["GT"]: