diff --git a/src/aln.cpp b/src/aln.cpp index 71871dfa..1da19074 100644 --- a/src/aln.cpp +++ b/src/aln.cpp @@ -221,15 +221,48 @@ inline Alignment extend_seed( AlignmentInfo info; int result_ref_start; bool gapped = true; - if (projected_ref_end - projected_ref_start == query.size() && consistent_nam) { - std::string_view ref_segm_ham = ref.substr(projected_ref_start, query.size()); - auto hamming_dist = hamming_distance(query, ref_segm_ham); - - if (hamming_dist >= 0 && (((float) hamming_dist / query.size()) < 0.05) ) { //Hamming distance worked fine, no need to ksw align - info = hamming_align(query, ref_segm_ham, aligner.parameters.match, aligner.parameters.mismatch, aligner.parameters.end_bonus); - result_ref_start = projected_ref_start + info.ref_start; - gapped = false; + if (consistent_nam && projected_ref_end - projected_ref_start == query.size()) { + const std::string_view projected_ref = ref.substr(projected_ref_start, query.size()); + info = hamming_align(query, projected_ref, aligner.parameters.match, aligner.parameters.mismatch, aligner.parameters.end_bonus); + if (info.query_end < query.length()) { + // Right end is soft clipped, do gapped alignment on it + const std::string query_right = query.substr(info.query_end); + const std::string_view ref_right = projected_ref.substr(info.ref_end); + auto right = aligner.ksw_extend(query_right, ref_right, false); + + info.query_end += right.query_end; + info.ref_end += right.ref_end; + info.edit_distance += right.edit_distance; + info.sw_score += right.sw_score; + assert(!info.cigar.empty()); + info.cigar.pop_oplen(); + + info.cigar += right.cigar; } + + if (info.query_start > 0) { + // Left end is soft clipped, do gapped alignment on it + std::string query_left = query.substr(0, info.query_start); + std::string ref_left{projected_ref.substr(0, info.ref_start)}; + std::reverse(query_left.begin(), query_left.end()); + std::reverse(ref_left.begin(), ref_left.end()); + auto left = aligner.ksw_extend(query_left, ref_left, true); + + info.query_start -= left.query_end; + info.ref_start -= left.ref_end; + info.edit_distance += left.edit_distance; + info.sw_score += left.sw_score; + + // TODO a bit too complicated + info.cigar.reverse(); + info.cigar.pop_oplen(); + info.cigar += left.cigar; + info.cigar.reverse(); + } + + assert(info.cigar.derived_sequence_length() == query.length()); + result_ref_start = projected_ref_start + info.ref_start; + gapped = false; } if (gapped) { const int diff = std::abs(nam.ref_span() - nam.query_span()); diff --git a/src/cigar.hpp b/src/cigar.hpp index 931ab8a1..55bda51d 100644 --- a/src/cigar.hpp +++ b/src/cigar.hpp @@ -93,6 +93,10 @@ class Cigar { std::reverse(m_ops.begin(), m_ops.end()); } + void pop_oplen() { + m_ops.pop_back(); + } + /* Return a new Cigar that uses M instead of =/X */ Cigar to_m() const;