Skip to content

Commit

Permalink
Replace seqan2::StringSet and String with std::vector in bestExtension
Browse files Browse the repository at this point in the history
  • Loading branch information
eaasna committed Aug 30, 2024
1 parent 30e185c commit 81d38eb
Show file tree
Hide file tree
Showing 4 changed files with 83 additions and 30 deletions.
13 changes: 7 additions & 6 deletions include/dream_stellar/extension/align_banded_nw_best_ends.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,29 +9,30 @@ using namespace seqan2;
///////////////////////////////////////////////////////////////////////////////
// Computes the banded alignment matrix and additionally a string with the best
// alignment end point for each alignment length.
template <typename TTrace, typename TEnd, typename TStringSet, typename TScore, typename TDiagonal>
template <typename TTrace, typename TEnd, typename TSegmentVector, typename TScore, typename TDiagonal>
inline void
_align_banded_nw_best_ends(TTrace& trace,
std::vector<TEnd> & bestEnds,
TStringSet const & str,
TSegmentVector const & str,
TScore const & sc,
TDiagonal const diagL,
TDiagonal const diagU)
{
typedef typename Value<TTrace>::Type TTraceValue;
typedef typename Value<TScore>::Type TScoreValue;
typedef typename Value<TStringSet>::Type TString;
typedef typename Value<TSegmentVector>::Type TSegment; // was Segment<String<TAlphabet>> now Segment<std::vector<TAlphabet>>
typedef typename Size<TTrace>::Type TSize;
using TAlphabet = typename Value<TString>::Type;
using TAlphabet = typename Value<TSegment>::Type;
//!TODO: TAlphabet should NOT be of container type

SEQAN_ASSERT_GEQ(diagU, diagL);

// Initialization
TTraceValue const Diagonal = 0;
TTraceValue const Horizontal = 1;
TTraceValue const Vertical = 2;
TString const& str1 = str[0];
TString const& str2 = str[1];
TSegment const& str1 = str[0];
TSegment const& str2 = str[1];
TSize const len1 = length(str1) + 1;
TSize const len2 = length(str2) + 1;
TSize const diagonalWidth = (TSize) (diagU - diagL + 1);
Expand Down
63 changes: 39 additions & 24 deletions include/dream_stellar/stellar_extension.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,8 @@ template<typename TMatrix, typename TPossEnd, typename TAlphabet, typename TDiag
void
_fillMatrixBestEndsLeft(TMatrix & matrixLeft,
std::vector<TPossEnd> & possibleEndsLeft,
StringSet<Segment<std::span<TAlphabet> const, InfixSegment>> const & sequencesLeft,
//!TODO: should sequencesLeft be owning?
std::vector<Segment<std::vector<TAlphabet> const, InfixSegment>> const & sequencesLeft,
TDiagonal const diagLower,
TDiagonal const diagUpper,
TScore const & scoreMatrix) {
Expand Down Expand Up @@ -178,7 +179,7 @@ template<typename TMatrix, typename TPossEnd, typename TAlphabet, typename TDiag
void
_fillMatrixBestEndsRight(TMatrix & matrixRight,
std::vector<TPossEnd> & possibleEndsRight,
StringSet<Segment<std::span<TAlphabet> const, InfixSegment>> const & sequencesRight,
std::vector<Segment<std::vector<TAlphabet> const, InfixSegment>> const & sequencesRight,
TDiagonal const diagLower,
TDiagonal const diagUpper,
TScore const & scoreMatrix) {
Expand Down Expand Up @@ -414,8 +415,8 @@ _tracebackRight(TMatrix const & matrixRight,
template<typename TSequence, typename TSeed, typename TPos, typename TDir, typename TScore,
typename TSize, typename TEps, typename TAlign>
bool
_bestExtension(Segment<TSequence const, InfixSegment> const & infH,
Segment<TSequence const, InfixSegment> const & infV,
_bestExtension(Segment<TSequence const, InfixSegment> const & infH, // database
Segment<TSequence const, InfixSegment> const & infV, // query
TSeed const & seed,
TSeed const & seedOld,
TPos const alignLen,
Expand All @@ -427,6 +428,10 @@ _bestExtension(Segment<TSequence const, InfixSegment> const & infH,
TAlign & align,
stellar_best_extension_time & best_extension_runtime)
{
//!NOTE: TSequence = std::span<TAlphabet>
using TAlphabet = std::remove_cv<typename TSequence::value_type>::type;
using TOwningContainer = std::vector<TAlphabet>;

typedef std::vector<TraceBack> TAlignmentMatrix;
typedef ExtensionEndPosition<TPos> TEndInfo;
typedef typename std::vector<TEndInfo>::iterator TEndIterator;
Expand All @@ -448,10 +453,8 @@ _bestExtension(Segment<TSequence const, InfixSegment> const & infH,
assert(endPositionH(seedOld) <= endPositionH(seed)); // infixRightH
assert(endPositionV(seedOld) <= endPositionV(seed)); // infixRightV

TSequence sequenceCopyLeftH;
TSequence sequenceCopyLeftV;
StringSet<Segment<TSequence const, InfixSegment>> sequencesLeft;
StringSet<Segment<TSequence const, InfixSegment>> sequencesRight;
std::vector<Segment<TOwningContainer const, InfixSegment>> sequencesLeft;
std::vector<Segment<TOwningContainer const, InfixSegment>> sequencesRight;

// Compute diagonals for updated seeds module with infixH/first alignment row being in the horizontal direction.
TDiagonal const diagLowerLeft = lowerDiagonal(seedOld) - upperDiagonal(seed);
Expand All @@ -465,23 +468,32 @@ _bestExtension(Segment<TSequence const, InfixSegment> const & infH,
{
// fill banded matrix and gaps string for ...
if (direction == EXTEND_BOTH || direction == EXTEND_LEFT) { // ... extension to the left
best_extension_runtime.banded_needleman_wunsch_left_time.measure_time([&]()
{
// prepare copy segment...
reserve(sequenceCopyLeftH, beginPositionH(seedOld) - beginPositionH(seed));
reserve(sequenceCopyLeftV, beginPositionV(seedOld) - beginPositionV(seed));
//!TODO: can these be references instead of copies?
TOwningContainer segmentCopyLeftH;
TOwningContainer segmentCopyLeftV;
segmentCopyLeftH.reserve(beginPositionH(seedOld) - beginPositionH(seed));
segmentCopyLeftV.reserve(beginPositionV(seedOld) - beginPositionV(seed));

// ...copy segment...
append(sequenceCopyLeftH, infix(host(infH), beginPositionH(seed), beginPositionH(seedOld)));
append(sequenceCopyLeftV, infix(host(infV), beginPositionV(seed), beginPositionV(seedOld)));
best_extension_runtime.banded_needleman_wunsch_left_time.measure_time([&]()
{
// ...copy reverse complement segment
//!TODO: are seqan2 and seqan3 ranks compatible?
for (auto n : host(infH).subspan(beginPositionH(seed), beginPositionH(seedOld)) | std::views::reverse | seqan3::views::complement)
{
TAlphabet ad_n;
segmentCopyLeftH.push_back(seqan3::custom::assign_rank_to(seqan3::to_rank(n), ad_n));
}

// ...and reverse local copy
reverse(sequenceCopyLeftH);
reverse(sequenceCopyLeftV);
for (TAlphabet n : host(infV).subspan(beginPositionV(seed), beginPositionV(seedOld)) | std::views::reverse | seqan3::views::complement)
{
TAlphabet ad_n;
segmentCopyLeftV.push_back(seqan3::custom::assign_rank_to(seqan3::to_rank(n), ad_n));
}

// put infix segments
appendValue(sequencesLeft, infix(sequenceCopyLeftH, 0, length(sequenceCopyLeftH)));
appendValue(sequencesLeft, infix(sequenceCopyLeftV, 0, length(sequenceCopyLeftV)));
sequencesLeft.emplace_back(infix(segmentCopyLeftH, 0, segmentCopyLeftH.size()));
sequencesLeft.emplace_back(infix(segmentCopyLeftV, 0, segmentCopyLeftV.size()));

_fillMatrixBestEndsLeft(matrixLeft, possibleEndsLeft, sequencesLeft, diagLowerLeft, diagUpperLeft, scoreMatrix);
SEQAN_ASSERT_NOT(possibleEndsLeft.empty());
Expand All @@ -492,9 +504,13 @@ _bestExtension(Segment<TSequence const, InfixSegment> const & infH,
if (direction == EXTEND_BOTH || direction == EXTEND_RIGHT) { // ... extension to the right
best_extension_runtime.banded_needleman_wunsch_right_time.measure_time([&]()
{
appendValue(sequencesRight, infix(host(infH), endPositionH(seedOld), endPositionH(seed)));
appendValue(sequencesRight, infix(host(infV), endPositionV(seedOld), endPositionV(seed)));
// prepare copy segment...
//!TODO: can these be references instead of copies?
auto segmentCopyRightH = TOwningContainer(host(infH).begin() + endPositionH(seedOld), host(infH).begin() + endPositionH(seed));
auto segmentCopyRightV = TOwningContainer(host(infV).begin() + endPositionV(seedOld), host(infV).begin() + endPositionV(seed));

sequencesRight.emplace_back(infix(segmentCopyRightH, 0, segmentCopyRightH.size()));
sequencesRight.emplace_back(infix(segmentCopyRightV, 0, segmentCopyRightV.size()));
_fillMatrixBestEndsRight(matrixRight, possibleEndsRight, sequencesRight, diagLowerRight, diagUpperRight, scoreMatrix);
SEQAN_ASSERT_NOT(possibleEndsRight.empty());
}); // measure_time
Expand Down Expand Up @@ -623,7 +639,7 @@ _extendAndExtract(Align<Segment<Segment<TSequence const, InfixSegment>, InfixSeg
typedef typename Position<TSequence>::Type TPos;
typedef Seed<Simple> TSeed;

//!TODO: what are infH and infV segments?
//!NOTE: TSequence = std::span<TAlphabet>

// std::cerr << "LOCAL ALIGN\n" << row(localAlign, 0) << "\n" << row(localAlign, 1) << "\n";
// std::cerr << "ALIGN\n" << row(align, 0) << "\n" << row(align, 1) << "\n";
Expand Down Expand Up @@ -660,7 +676,6 @@ _extendAndExtract(Align<Segment<Segment<TSequence const, InfixSegment>, InfixSeg
Segment<TSequence const, InfixSegment> infixSequenceV = host(infV); // inner nested Segment
extension_runtime.extend_seed_time.measure_time([&]()
{
//!TODO: The seed extension should take place in the complete sequence not the segment
extendSeed(seed, infixSequenceH, infixSequenceV, direction, scoreMatrix, scoreDropOff, GappedXDrop());
});
if (static_cast<int64_t>(seedSize(seed)) < minLength - (int)floor(minLength*eps))
Expand Down
36 changes: 36 additions & 0 deletions include/utilities/alphabet_wrapper/std/span.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#pragma once

#include <span>
#include <ranges>
#include <algorithm>

namespace std
{

// https://brevzin.github.io/c++/2020/03/30/span-comparisons/
template <typename R, typename T>
concept const_contiguous_range_of =
ranges::contiguous_range<R const> &&
same_as<
remove_cvref_t<T>,
ranges::range_value_t<R const>>;

namespace std {
template <equality_comparable T, size_t E,
const_contiguous_range_of<T> R>
bool operator==(span<T, E> lhs, R const& rhs)
{
return ranges::equal(lhs, rhs);
}

template <three_way_comparable T, size_t E,
const_contiguous_range_of<T> R>
auto operator<=>(span<T, E> lhs, R const& rhs)
{
return lexicographical_compare_three_way(
lhs.begin(), lhs.end(),
ranges::begin(rhs), ranges::end(rhs));
}
}

} // namespace std
1 change: 1 addition & 0 deletions include/valik/search/search_local.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include <utilities/threshold/search_kmer_profile.hpp>
#include <utilities/threshold/filtering_request.hpp>
#include <utilities/alphabet_wrapper/matcher/stellar_matcher.hpp>
#include <utilities/alphabet_wrapper/std/span.hpp>

#include <dream_stellar/diagnostics/print.tpp>
#include <dream_stellar/io/import_sequence.hpp>
Expand Down

0 comments on commit 81d38eb

Please sign in to comment.