diff --git a/include/dream_stellar/extension/align_banded_nw_best_ends.hpp b/include/dream_stellar/extension/align_banded_nw_best_ends.hpp index 11db3e10..54cbb9c9 100644 --- a/include/dream_stellar/extension/align_banded_nw_best_ends.hpp +++ b/include/dream_stellar/extension/align_banded_nw_best_ends.hpp @@ -9,20 +9,21 @@ using namespace seqan2; /////////////////////////////////////////////////////////////////////////////// // Computes the banded alignment matrix and additionally a string with the best // alignment end point for each alignment length. -template +template inline void _align_banded_nw_best_ends(TTrace& trace, std::vector & bestEnds, - TStringSet const & str, + TSegmentVector const & str, TScore const & sc, TDiagonal const diagL, TDiagonal const diagU) { typedef typename Value::Type TTraceValue; typedef typename Value::Type TScoreValue; - typedef typename Value::Type TString; + typedef typename Value::Type TSegment; // was Segment> now Segment> typedef typename Size::Type TSize; - using TAlphabet = typename Value::Type; + using TAlphabet = typename Value::Type; + //!TODO: TAlphabet should NOT be of container type SEQAN_ASSERT_GEQ(diagU, diagL); @@ -30,8 +31,8 @@ _align_banded_nw_best_ends(TTrace& trace, TTraceValue const Diagonal = 0; TTraceValue const Horizontal = 1; TTraceValue const Vertical = 2; - TString const& str1 = str[0]; - TString const& str2 = str[1]; + TSegment const& str1 = str[0]; + TSegment const& str2 = str[1]; TSize const len1 = length(str1) + 1; TSize const len2 = length(str2) + 1; TSize const diagonalWidth = (TSize) (diagU - diagL + 1); diff --git a/include/dream_stellar/stellar_extension.hpp b/include/dream_stellar/stellar_extension.hpp index 4ed6e200..17364103 100644 --- a/include/dream_stellar/stellar_extension.hpp +++ b/include/dream_stellar/stellar_extension.hpp @@ -148,7 +148,8 @@ template & possibleEndsLeft, - StringSet const, InfixSegment>> const & sequencesLeft, + //!TODO: should sequencesLeft be owning? + std::vector const, InfixSegment>> const & sequencesLeft, TDiagonal const diagLower, TDiagonal const diagUpper, TScore const & scoreMatrix) { @@ -178,7 +179,7 @@ template & possibleEndsRight, - StringSet const, InfixSegment>> const & sequencesRight, + std::vector const, InfixSegment>> const & sequencesRight, TDiagonal const diagLower, TDiagonal const diagUpper, TScore const & scoreMatrix) { @@ -414,8 +415,8 @@ _tracebackRight(TMatrix const & matrixRight, template bool -_bestExtension(Segment const & infH, - Segment const & infV, +_bestExtension(Segment const & infH, // database + Segment const & infV, // query TSeed const & seed, TSeed const & seedOld, TPos const alignLen, @@ -427,6 +428,10 @@ _bestExtension(Segment const & infH, TAlign & align, stellar_best_extension_time & best_extension_runtime) { + //!NOTE: TSequence = std::span + using TAlphabet = std::remove_cv::type; + using TOwningContainer = std::vector; + typedef std::vector TAlignmentMatrix; typedef ExtensionEndPosition TEndInfo; typedef typename std::vector::iterator TEndIterator; @@ -448,10 +453,8 @@ _bestExtension(Segment const & infH, assert(endPositionH(seedOld) <= endPositionH(seed)); // infixRightH assert(endPositionV(seedOld) <= endPositionV(seed)); // infixRightV - TSequence sequenceCopyLeftH; - TSequence sequenceCopyLeftV; - StringSet> sequencesLeft; - StringSet> sequencesRight; + std::vector> sequencesLeft; + std::vector> sequencesRight; // Compute diagonals for updated seeds module with infixH/first alignment row being in the horizontal direction. TDiagonal const diagLowerLeft = lowerDiagonal(seedOld) - upperDiagonal(seed); @@ -465,23 +468,32 @@ _bestExtension(Segment const & infH, { // fill banded matrix and gaps string for ... if (direction == EXTEND_BOTH || direction == EXTEND_LEFT) { // ... extension to the left - best_extension_runtime.banded_needleman_wunsch_left_time.measure_time([&]() - { // prepare copy segment... - reserve(sequenceCopyLeftH, beginPositionH(seedOld) - beginPositionH(seed)); - reserve(sequenceCopyLeftV, beginPositionV(seedOld) - beginPositionV(seed)); + //!TODO: can these be references instead of copies? + TOwningContainer segmentCopyLeftH; + TOwningContainer segmentCopyLeftV; + segmentCopyLeftH.reserve(beginPositionH(seedOld) - beginPositionH(seed)); + segmentCopyLeftV.reserve(beginPositionV(seedOld) - beginPositionV(seed)); - // ...copy segment... - append(sequenceCopyLeftH, infix(host(infH), beginPositionH(seed), beginPositionH(seedOld))); - append(sequenceCopyLeftV, infix(host(infV), beginPositionV(seed), beginPositionV(seedOld))); + best_extension_runtime.banded_needleman_wunsch_left_time.measure_time([&]() + { + // ...copy reverse complement segment + //!TODO: are seqan2 and seqan3 ranks compatible? + for (auto n : host(infH).subspan(beginPositionH(seed), beginPositionH(seedOld)) | std::views::reverse | seqan3::views::complement) + { + TAlphabet ad_n; + segmentCopyLeftH.push_back(seqan3::custom::assign_rank_to(seqan3::to_rank(n), ad_n)); + } - // ...and reverse local copy - reverse(sequenceCopyLeftH); - reverse(sequenceCopyLeftV); + for (TAlphabet n : host(infV).subspan(beginPositionV(seed), beginPositionV(seedOld)) | std::views::reverse | seqan3::views::complement) + { + TAlphabet ad_n; + segmentCopyLeftV.push_back(seqan3::custom::assign_rank_to(seqan3::to_rank(n), ad_n)); + } // put infix segments - appendValue(sequencesLeft, infix(sequenceCopyLeftH, 0, length(sequenceCopyLeftH))); - appendValue(sequencesLeft, infix(sequenceCopyLeftV, 0, length(sequenceCopyLeftV))); + sequencesLeft.emplace_back(infix(segmentCopyLeftH, 0, segmentCopyLeftH.size())); + sequencesLeft.emplace_back(infix(segmentCopyLeftV, 0, segmentCopyLeftV.size())); _fillMatrixBestEndsLeft(matrixLeft, possibleEndsLeft, sequencesLeft, diagLowerLeft, diagUpperLeft, scoreMatrix); SEQAN_ASSERT_NOT(possibleEndsLeft.empty()); @@ -492,9 +504,13 @@ _bestExtension(Segment const & infH, if (direction == EXTEND_BOTH || direction == EXTEND_RIGHT) { // ... extension to the right best_extension_runtime.banded_needleman_wunsch_right_time.measure_time([&]() { - appendValue(sequencesRight, infix(host(infH), endPositionH(seedOld), endPositionH(seed))); - appendValue(sequencesRight, infix(host(infV), endPositionV(seedOld), endPositionV(seed))); + // prepare copy segment... + //!TODO: can these be references instead of copies? + auto segmentCopyRightH = TOwningContainer(host(infH).begin() + endPositionH(seedOld), host(infH).begin() + endPositionH(seed)); + auto segmentCopyRightV = TOwningContainer(host(infV).begin() + endPositionV(seedOld), host(infV).begin() + endPositionV(seed)); + sequencesRight.emplace_back(infix(segmentCopyRightH, 0, segmentCopyRightH.size())); + sequencesRight.emplace_back(infix(segmentCopyRightV, 0, segmentCopyRightV.size())); _fillMatrixBestEndsRight(matrixRight, possibleEndsRight, sequencesRight, diagLowerRight, diagUpperRight, scoreMatrix); SEQAN_ASSERT_NOT(possibleEndsRight.empty()); }); // measure_time @@ -623,7 +639,7 @@ _extendAndExtract(Align, InfixSeg typedef typename Position::Type TPos; typedef Seed TSeed; - //!TODO: what are infH and infV segments? + //!NOTE: TSequence = std::span // std::cerr << "LOCAL ALIGN\n" << row(localAlign, 0) << "\n" << row(localAlign, 1) << "\n"; // std::cerr << "ALIGN\n" << row(align, 0) << "\n" << row(align, 1) << "\n"; @@ -660,7 +676,6 @@ _extendAndExtract(Align, InfixSeg Segment infixSequenceV = host(infV); // inner nested Segment extension_runtime.extend_seed_time.measure_time([&]() { - //!TODO: The seed extension should take place in the complete sequence not the segment extendSeed(seed, infixSequenceH, infixSequenceV, direction, scoreMatrix, scoreDropOff, GappedXDrop()); }); if (static_cast(seedSize(seed)) < minLength - (int)floor(minLength*eps)) diff --git a/include/utilities/alphabet_wrapper/std/span.hpp b/include/utilities/alphabet_wrapper/std/span.hpp new file mode 100644 index 00000000..54b4c416 --- /dev/null +++ b/include/utilities/alphabet_wrapper/std/span.hpp @@ -0,0 +1,36 @@ +#pragma once + +#include +#include +#include + +namespace std +{ + +// https://brevzin.github.io/c++/2020/03/30/span-comparisons/ +template +concept const_contiguous_range_of = + ranges::contiguous_range && + same_as< + remove_cvref_t, + ranges::range_value_t>; + +namespace std { + template R> + bool operator==(span lhs, R const& rhs) + { + return ranges::equal(lhs, rhs); + } + + template R> + auto operator<=>(span lhs, R const& rhs) + { + return lexicographical_compare_three_way( + lhs.begin(), lhs.end(), + ranges::begin(rhs), ranges::end(rhs)); + } +} + +} // namespace std diff --git a/include/valik/search/search_local.hpp b/include/valik/search/search_local.hpp index 7151fa99..21c193c1 100644 --- a/include/valik/search/search_local.hpp +++ b/include/valik/search/search_local.hpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include