Skip to content
This repository has been archived by the owner on Mar 19, 2024. It is now read-only.

Commit

Permalink
avoid direct use of coordinates mapping in has_reading_frames
Browse files Browse the repository at this point in the history
  • Loading branch information
Donaim committed Sep 21, 2023
1 parent f950e4e commit d1ee5c1
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 22 deletions.
26 changes: 7 additions & 19 deletions intact/intact.py
Original file line number Diff line number Diff line change
Expand Up @@ -505,18 +505,11 @@ def translate(seq, frame = 0, to_stop = False):


def has_reading_frames(
alignment, sequence, reference, is_small,
aligned_sequence, is_small,
expected, error_bar, reverse = False
):
"""
Check for presence of small reading frames
"""

coordinates_mapping = coords.map_positions(alignment[0], alignment[1].seq)
reverse_coordinates_mapping = coords.map_positions(alignment[1], alignment[0].seq)

reference_aligned_mapping = coords.map_nonaligned_to_aligned_positions(reference, alignment[0].seq)
query_aligned_mapping = coords.map_nonaligned_to_aligned_positions(sequence, alignment[1].seq)
sequence = aligned_sequence.this
reference = aligned_sequence.reference

errors = []
matches = []
Expand Down Expand Up @@ -544,13 +537,8 @@ def find_closest(aminoacids, start, direction, target):
return 0

def find_candidate_positions(e, q_start, q_end):
expected_nucleotides = e.nucleotides
expected_aminoacids = e.aminoacids
expected_protein = expected_aminoacids.strip("*")
q_start = coordinates_mapping[e.start]
q_end = coordinates_mapping[e.end - 1 if e.end == len(coordinates_mapping) else e.end]
got_nucleotides = sequence.seq[q_start:q_end]
got_aminoacids = translate(got_nucleotides)
q_start_a = q_start // 3
q_end_a = q_end // 3
n = len(sequence.seq) - 1
Expand All @@ -576,8 +564,8 @@ def find_candidate_positions(e, q_start, q_end):
"forward", dist, got_protein, got_aminoacids)

def find_real_correspondence(e):
q_start = coordinates_mapping[e.start]
q_end = coordinates_mapping[e.end - 1 if e.end == len(coordinates_mapping) else e.end]
q_start = aligned_sequence.map_index(e.start)
q_end = aligned_sequence.map_index(e.end)
candidates = find_candidate_positions(e, q_start, q_end)
return min(candidates, key=lambda x: x.distance)

Expand Down Expand Up @@ -895,12 +883,12 @@ def intact( working_dir,
alignment = aligned_sequence.get_alignment()

sequence_orfs, orf_errors = has_reading_frames(
alignment, sequence, reference, False,
aligned_sequence, False,
forward_orfs, error_bar)
sequence_errors.extend(orf_errors)

sequence_small_orfs, small_orf_errors = has_reading_frames(
alignment, sequence, reference, True,
aligned_sequence, True,
small_orfs, error_bar, reverse = False)
if include_small_orfs:
sequence_errors.extend(small_orf_errors)
Expand Down
13 changes: 10 additions & 3 deletions util/aligned_sequence.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,14 +33,21 @@ def aligned_this(self):
return self.get_alignment()[1]


def map_index(self, index):
def get_coordinates_mapping(self):
if not self.coordinates_mapping:
self.coordinates_mapping = coords.map_positions(self.aligned_reference(), self.aligned_this())

return self.coordinates_mapping


def map_index(self, index):
if isinstance(index, ReferenceIndex):
index = index.value

if not isinstance(index, int):
raise TypeError(f"Expected integer as index", index)

return self.coordinates_mapping[index]
return self.get_coordinates_mapping()[index]


def index(self, index):
Expand All @@ -59,7 +66,7 @@ def slice(self, first, last):
newthis = self.this[first:(last + 1)]
newreference = self.reference[self.map_index(first):(self.map_index(last) + 1)]
# TODO: calculate new "coordinates_mapping" and new "alignment" from these indexes.
return Sequence(this=newthis, reference=newreference)
return AlignedSequence(this=newthis, reference=newreference)


def reverse(self):
Expand Down

0 comments on commit d1ee5c1

Please sign in to comment.