diff --git a/clair/call_var.py b/clair/call_var.py index 8b8a7a7..6ad9794 100644 --- a/clair/call_var.py +++ b/clair/call_var.py @@ -20,7 +20,7 @@ ) from clair.task.genotype import Genotype, genotype_string_from, genotype_enum_from, genotype_enum_for_task from clair.task.variant_length import VariantLength -from shared.utils import IUPAC_base_to_num_dict as BASE2NUM, IUPAC_base_to_ACGT_base_dict as BASE2ACGT +from shared.utils import IUPAC_base_to_num_dict as BASE2NUM, IUPAC_base_to_ACGT_base_dict as BASE2ACGT, BASIC_BASES import shared.param as param @@ -943,6 +943,9 @@ def batch_output(mini_batch, batch_Y, output_config, output_utilities): chromosome, position, reference_sequence = chr_pos_seq position = int(position) + if reference_sequence[tensor_position_center] not in BASIC_BASES: + continue + # read depth read_depth = sum( x[tensor_position_center, :, Channel.delete] + x[tensor_position_center, :, Channel.reference] diff --git a/shared/utils.py b/shared/utils.py index 61dacaf..d68256e 100644 --- a/shared/utils.py +++ b/shared/utils.py @@ -26,6 +26,7 @@ (0, 1, 2, 3, 3, 0, 1, 1, 0, 2, 0, 1, 0, 0, 0) )) +BASIC_BASES = set("ACGTU") def is_file_exists(file_name, suffix=""): if not isinstance(file_name, str) or not isinstance(suffix, str):