Skip to content

Commit

Permalink
Also handle variants that are in exons but adjacent to introns
Browse files Browse the repository at this point in the history
  • Loading branch information
b0d0nne11 committed Jan 24, 2025
1 parent 6cc7a79 commit b311115
Show file tree
Hide file tree
Showing 5 changed files with 315 additions and 64 deletions.
48 changes: 36 additions & 12 deletions src/hgvs/utils/altseqbuilder.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,18 +197,42 @@ def _get_variant_region(self):
and self._var_c.posedit.pos.end.datum == Datum.CDS_END
):
result = self.WHOLE_GENE
elif self._var_c.posedit.edit.type == "ins" and self._var_c.posedit.pos.start.offset == -1 and self._var_c.posedit.pos.end.offset == 0:
# ins at intron-exon boundary
result = self.INTRON if global_config.mapping.ref_at_boundary_is_intronic else self.EXON
elif self._var_c.posedit.edit.type == "ins" and self._var_c.posedit.pos.start.offset == 0 and self._var_c.posedit.pos.end.offset == 1:
# ins at exon-intron boundary
result = self.INTRON if global_config.mapping.ref_at_boundary_is_intronic else self.EXON
elif self._var_c.posedit.edit.type == "dup" and self._var_c.posedit.pos.end.offset == -1:
# dup at intron-exon boundary
result = self.INTRON if global_config.mapping.ref_at_boundary_is_intronic else self.EXON
elif self._var_c.posedit.edit.type == "dup" and self._var_c.posedit.pos.start.offset == 1:
# dup at exon-intron boundary
result = self.INTRON if global_config.mapping.ref_at_boundary_is_intronic else self.EXON
elif (
global_config.mapping.ref_at_boundary_is_intronic
and self._var_c.posedit.edit.type == "dup"
and self._var_c.posedit.pos.start.base in self._transcript_data.exon_start_positions
):
result = self.INTRON
elif (
global_config.mapping.ref_at_boundary_is_intronic
and self._var_c.posedit.edit.type == "dup"
and self._var_c.posedit.pos.end.base in self._transcript_data.exon_end_positions
):
result = self.INTRON
elif (
not global_config.mapping.ref_at_boundary_is_intronic
and self._var_c.posedit.edit.type == "ins"
and self._var_c.posedit.pos.start.offset == -1 and self._var_c.posedit.pos.end.offset == 0
):
result = self.EXON
elif (
not global_config.mapping.ref_at_boundary_is_intronic
and self._var_c.posedit.edit.type == "ins"
and self._var_c.posedit.pos.start.offset == 0 and self._var_c.posedit.pos.end.offset == 1
):
result = self.EXON
elif (
not global_config.mapping.ref_at_boundary_is_intronic
and self._var_c.posedit.edit.type == "dup"
and self._var_c.posedit.pos.end.offset == -1
):
result = self.EXON
elif (
not global_config.mapping.ref_at_boundary_is_intronic
and self._var_c.posedit.edit.type == "dup"
and self._var_c.posedit.pos.start.offset == 1
):
result = self.EXON
elif self._var_c.posedit.pos.start.offset != 0 or self._var_c.posedit.pos.end.offset != 0:
# leave out anything else intronic for now
result = self.INTRON
Expand Down
8 changes: 8 additions & 0 deletions src/hgvs/utils/reftranscriptdata.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,16 @@ def __init__(self, hdp, tx_ac, pro_ac):
# TODO: drop get_acs_for_protein_seq; use known mapping or digest (wo/pro ac inference)
pro_ac = hdp.get_pro_ac_for_tx_ac(tx_ac) or hdp.get_acs_for_protein_seq(protein_seq)[0]

exon_start_positions = [-tx_info["cds_start_i"]]
exon_end_positions = [exon_start_positions[0] + tx_info["lengths"][0]]
for exon_length in tx_info["lengths"][1:]:
exon_start_positions.append(exon_end_positions[-1] + 1)
exon_end_positions.append(exon_end_positions[-1] + exon_length)

self.transcript_sequence = tx_seq
self.aa_sequence = protein_seq
self.cds_start = cds_start
self.cds_stop = cds_stop
self.protein_accession = pro_ac
self.exon_start_positions = exon_start_positions
self.exon_end_positions = exon_end_positions
Binary file modified tests/data/cache-py3.hdp
Binary file not shown.
2 changes: 1 addition & 1 deletion tests/data/gcp/real.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ ID00048 NC_000010.10:g.89692922T>C NM_000314.4:c.406T>C NP_000305.3:p.(Cys136Arg
ID00049 NC_000010.10:g.89692921dupA NM_000314.4:c.405dupA NP_000305.3:p.(Cys136Metfs*44)
ID00050 NC_000010.10:g.89692923_89692939delGTGCATATTTATTACAT NM_000314.4:c.407_423delGTGCATATTTATTACAT NP_000305.3:p.(Cys136Serfs*38)
ID00051 NC_000010.10:g.89712015C>A NM_000314.4:c.633C>A NP_000305.3:p.(Cys211*)
ID00052 NC_000010.10:g.89685314dupT NM_000314.4:c.209dupT NP_000305.3:p.(Cys71Leufs*3)
ID00052 NC_000010.10:g.89685314dupT NM_000314.4:c.209dupT NP_000305.3:p.?
ID00053 NC_000010.10:g.89711893C>T NM_000314.4:c.511C>T NP_000305.3:p.(Gln171*)
ID00054 NC_000010.10:g.89692963dupA NM_000314.4:c.447dupA NP_000305.3:p.(Glu150Argfs*30)
ID00055 NC_000010.10:g.89685315G>A NM_000314.4:c.209+1G>A NP_000305.3:p.?
Expand Down
Loading

0 comments on commit b311115

Please sign in to comment.