Skip to content

Commit

Permalink
If ungapped align is softclipped, use ksw_extend
Browse files Browse the repository at this point in the history
Closes #357
  • Loading branch information
marcelm committed Dec 15, 2023
1 parent f0e8333 commit 82611d8
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 8 deletions.
49 changes: 41 additions & 8 deletions src/aln.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -221,15 +221,48 @@ inline Alignment extend_seed(
AlignmentInfo info;
int result_ref_start;
bool gapped = true;
if (projected_ref_end - projected_ref_start == query.size() && consistent_nam) {
std::string_view ref_segm_ham = ref.substr(projected_ref_start, query.size());
auto hamming_dist = hamming_distance(query, ref_segm_ham);

if (hamming_dist >= 0 && (((float) hamming_dist / query.size()) < 0.05) ) { //Hamming distance worked fine, no need to ksw align
info = hamming_align(query, ref_segm_ham, aligner.parameters.match, aligner.parameters.mismatch, aligner.parameters.end_bonus);
result_ref_start = projected_ref_start + info.ref_start;
gapped = false;
if (consistent_nam && projected_ref_end - projected_ref_start == query.size()) {
const std::string_view projected_ref = ref.substr(projected_ref_start, query.size());
info = hamming_align(query, projected_ref, aligner.parameters.match, aligner.parameters.mismatch, aligner.parameters.end_bonus);
if (info.query_end < query.length()) {
// Right end is soft clipped, do gapped alignment on it
const std::string query_right = query.substr(info.query_end);
const std::string_view ref_right = projected_ref.substr(info.ref_end);
auto right = aligner.ksw_extend(query_right, ref_right, false);

info.query_end += right.query_end;
info.ref_end += right.ref_end;
info.edit_distance += right.edit_distance;
info.sw_score += right.sw_score;
assert(!info.cigar.empty());
info.cigar.pop_oplen();

info.cigar += right.cigar;
}

if (info.query_start > 0) {
// Left end is soft clipped, do gapped alignment on it
std::string query_left = query.substr(0, info.query_start);
std::string ref_left{projected_ref.substr(0, info.ref_start)};
std::reverse(query_left.begin(), query_left.end());
std::reverse(ref_left.begin(), ref_left.end());
auto left = aligner.ksw_extend(query_left, ref_left, true);

info.query_start -= left.query_end;
info.ref_start -= left.ref_end;
info.edit_distance += left.edit_distance;
info.sw_score += left.sw_score;

// TODO a bit too complicated
info.cigar.reverse();
info.cigar.pop_oplen();
info.cigar += left.cigar;
info.cigar.reverse();
}

assert(info.cigar.derived_sequence_length() == query.length());
result_ref_start = projected_ref_start + info.ref_start;
gapped = false;
}
if (gapped) {
const int diff = std::abs(nam.ref_span() - nam.query_span());
Expand Down
4 changes: 4 additions & 0 deletions src/cigar.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,10 @@ class Cigar {
std::reverse(m_ops.begin(), m_ops.end());
}

void pop_oplen() {
m_ops.pop_back();
}

/* Return a new Cigar that uses M instead of =/X */
Cigar to_m() const;

Expand Down

0 comments on commit 82611d8

Please sign in to comment.