From d1a4a15f876ad97c41b75c94e4e32e274e8c3f06 Mon Sep 17 00:00:00 2001 From: Evelin Date: Wed, 11 Sep 2024 15:56:15 +0200 Subject: [PATCH] Remove faulty global alignment --- .../options/verifier_options.hpp | 14 +--- include/dream_stellar/stellar.hpp | 2 - include/dream_stellar/stellar_launcher.hpp | 4 - .../verification/banded_global.hpp | 78 ------------------- .../verification/banded_global_extend.hpp | 68 ---------------- src/argument_parsing/search.cpp | 4 +- 6 files changed, 5 insertions(+), 165 deletions(-) delete mode 100644 include/dream_stellar/verification/banded_global.hpp delete mode 100644 include/dream_stellar/verification/banded_global_extend.hpp diff --git a/include/dream_stellar/options/verifier_options.hpp b/include/dream_stellar/options/verifier_options.hpp index 31151581..a3539d22 100644 --- a/include/dream_stellar/options/verifier_options.hpp +++ b/include/dream_stellar/options/verifier_options.hpp @@ -13,19 +13,11 @@ using AllLocal = seqan2::Tag const; struct VerifyBestLocal_; using BestLocal = seqan2::Tag const; -struct VerifyBandedGlobal_; -using BandedGlobal = seqan2::Tag const; - -struct VerifyBandedGlobalExtend_; -using BandedGlobalExtend = seqan2::Tag const; - -// basically a std::variant +// basically a std::variant struct StellarVerificationMethod { StellarVerificationMethod(AllLocal) : _index{0} {} StellarVerificationMethod(BestLocal) : _index{1} {} - StellarVerificationMethod(BandedGlobal) : _index{2} {} - StellarVerificationMethod(BandedGlobalExtend) : _index{3} {} constexpr std::size_t index() const noexcept { @@ -42,7 +34,7 @@ struct StellarVerificationMethod friend inline std::string to_string(StellarVerificationMethod method) { using cstring_t = char const * const; - cstring_t method_names[] = {"exact", "bestLocal", "bandedGlobal", "bandedGlobalExtend"}; + cstring_t method_names[] = {"exact", "bestLocal"}; return method_names[method.index()]; } @@ -54,7 +46,7 @@ struct VerifierOptions { double xDrop{5}; // maximal x-drop - // verification strategy: exact, bestLocal, bandedGlobal + // verification strategy: exact, bestLocal std::string strVerificationMethod{"exact"}; StellarVerificationMethod verificationMethod{AllLocal{}}; }; diff --git a/include/dream_stellar/stellar.hpp b/include/dream_stellar/stellar.hpp index 8894299b..35767198 100755 --- a/include/dream_stellar/stellar.hpp +++ b/include/dream_stellar/stellar.hpp @@ -35,8 +35,6 @@ #include #include #include -#include -#include #include #include diff --git a/include/dream_stellar/stellar_launcher.hpp b/include/dream_stellar/stellar_launcher.hpp index 7f37e07b..4a59fe32 100644 --- a/include/dream_stellar/stellar_launcher.hpp +++ b/include/dream_stellar/stellar_launcher.hpp @@ -46,10 +46,6 @@ struct StellarLauncher return visitor_fn(AllLocal()); else if (verificationMethod == StellarVerificationMethod{BestLocal{}}) return visitor_fn(BestLocal()); - else if (verificationMethod == StellarVerificationMethod{BandedGlobal{}}) - return visitor_fn(BandedGlobal()); - else if (verificationMethod == StellarVerificationMethod{BandedGlobalExtend{}}) - return visitor_fn(BandedGlobalExtend()); return StellarComputeStatistics{}; } diff --git a/include/dream_stellar/verification/banded_global.hpp b/include/dream_stellar/verification/banded_global.hpp deleted file mode 100644 index 45ea5954..00000000 --- a/include/dream_stellar/verification/banded_global.hpp +++ /dev/null @@ -1,78 +0,0 @@ - -#pragma once - -#include -#include - -namespace dream_stellar -{ - -/////////////////////////////////////////////////////////////////////////////// -// Conducts banded alignment on swift hit and extracts longest contained eps-match. -template -void -verifySwiftHit(Segment, InfixSegment> const & infH, - Segment, InfixSegment> const & infV, - TEpsilon const eps, - TSize const minLength, - TDrop /*xDrop*/, - TDelta const delta, - TOnAlignmentResultFn && onAlignmentResult, - stellar_verification_time & verification_runtime, - BandedGlobal) { - using TInfix = Segment; - typedef Segment TSegment; - typedef typename StellarMatch::TAlign TAlign; - - // define a scoring scheme - typedef int TScore; - TScore match = 1; - // large negative scoring scheme values lead to excessive seed extension - TScore scoringSchemeLowerBound = -(TScore)1000; - TScore mismatchIndel = scoringSchemeLowerBound; - if (eps > -1/scoringSchemeLowerBound) // avoid division by 0 - mismatchIndel = (TScore)_max((TScore) std::ceil(-1/eps) + 1, -(TScore)length(host(infH))); - - Score scoreMatrix(match, mismatchIndel, mismatchIndel); - - // diagonals for banded alignment - int64_t upperDiag = 0; - int64_t lowerDiag = endPosition(infH) - (int64_t)endPosition(infV) - beginPosition(infH) + beginPosition(infV); - if (beginPosition(infV) == 0) upperDiag = lowerDiag + delta; - if (endPosition(infV) == endPosition(host(infV))) lowerDiag = -(int64_t)delta; - - // banded alignment on parallelogram - Align bandedAlign; - resize(rows(bandedAlign), 2); - assignSource(row(bandedAlign, 0), infH); - assignSource(row(bandedAlign, 1), infV); - globalAlignment(bandedAlign, scoreMatrix, lowerDiag, upperDiag, NeedlemanWunsch()); - - longestEpsMatch(bandedAlign, minLength, eps); - - // integrate alignment in object of type TAlign - TAlign align; - resize(rows(align), 2); - setSource(row(align, 0), host(host(infH))); - setSource(row(align, 1), host(host(infV))); - integrateAlign(align, bandedAlign); - - // TODO(holtgrew): The following has not been adapted to the new clipping interface yet! - // set begin and end positions of align - SEQAN_FAIL("TODO(bkehr): Adapt to new clipping interface!"); - setClippedBeginPosition(row(align, 0), beginPosition(infH) + clippedBeginPosition(row(bandedAlign, 0))); - setClippedBeginPosition(row(align, 1), beginPosition(infV) + beginPosition(host(infV)) + clippedBeginPosition(row(bandedAlign, 1))); - setBeginPosition(row(align, 0), 0); - setBeginPosition(row(align, 1), 0); - setClippedEndPosition(row(align, 0), beginPosition(infH) + clippedEndPosition(row(bandedAlign, 0))); - setClippedEndPosition(row(align, 1), beginPosition(infV) + beginPosition(host(infV)) + clippedEndPosition(row(bandedAlign, 1))); - - if ((TSize)length(row(align, 0)) < minLength) - return; - - // insert eps-match in matches string - onAlignmentResult(align); -} - -} // namespace dream_stellar diff --git a/include/dream_stellar/verification/banded_global_extend.hpp b/include/dream_stellar/verification/banded_global_extend.hpp deleted file mode 100644 index 7d35bd06..00000000 --- a/include/dream_stellar/verification/banded_global_extend.hpp +++ /dev/null @@ -1,68 +0,0 @@ - -#pragma once - -#include -#include -#include - -namespace dream_stellar -{ - -/////////////////////////////////////////////////////////////////////////////// -// Conducts banded alignment on swift hit, extends alignment, and extracts longest contained eps-match. -template -void -verifySwiftHit(Segment, InfixSegment> const & infH, - Segment, InfixSegment> const & infV, - TEpsilon const eps, - TSize const minLength, - TDrop const xDrop, - TDelta const delta, - TOnAlignmentResultFn && onAlignmentResult, - stellar_verification_time & verification_runtime, - BandedGlobalExtend) { - using TInfix = Segment; - typedef Segment TSegment; - typedef typename StellarMatch::TAlign TAlign; - - // define a scoring scheme - typedef int TScore; - TScore match = 1; - // large negative scoring scheme values lead to excessive seed extension - TScore scoringSchemeLowerBound = -(TScore)1000; - TScore mismatchIndel = scoringSchemeLowerBound; - if (eps > -1/scoringSchemeLowerBound) // avoid division by 0 - mismatchIndel = (TScore)_max((TScore) std::ceil(-1/eps) + 1, -(TScore)length(host(infH))); - Score scoreMatrix(match, mismatchIndel, mismatchIndel); - TScore scoreDropOff = (TScore) _max((TScore) xDrop * (-mismatchIndel), MinValue::VALUE + 1); - - // diagonals for banded alignment - int64_t upperDiag = 0; - int64_t lowerDiag = endPosition(infH) - (int64_t)endPosition(infV) - beginPosition(infH) + beginPosition(infV); - if (beginPosition(infV) == 0) upperDiag = lowerDiag + delta; - if (endPosition(infV) == endPosition(host(infV))) lowerDiag = -(int64_t)delta; - - // banded alignment on parallelogram - Align bandedAlign; - resize(rows(bandedAlign), 2); - assignSource(row(bandedAlign, 0), infH); - assignSource(row(bandedAlign, 1), infV); - globalAlignment(bandedAlign, scoreMatrix, lowerDiag, upperDiag, NeedlemanWunsch()); - - // create alignment object for the complete sequences - TAlign align; - resize(rows(align), 2); - setSource(row(align, 0), host(host(infH))); - setSource(row(align, 1), host(host(infV))); - - // extend alignment and obtain longest contained eps-match - // TODO: something is wrong here, e.g. extract around seed, but also something else - if (!_extendAndExtract(bandedAlign, scoreDropOff, scoreMatrix, infH, infV, EXTEND_BOTH, minLength, eps, align, verification_runtime.extension_time)) - return; - - // insert eps-match in matches string - onAlignmentResult(align); -} - -} // namespace dream_stellar diff --git a/src/argument_parsing/search.cpp b/src/argument_parsing/search.cpp index e8da06c3..131d0492 100644 --- a/src/argument_parsing/search.cpp +++ b/src/argument_parsing/search.cpp @@ -172,9 +172,9 @@ void init_search_parser(sharg::parser & parser, search_arguments & arguments) parser.add_option(arguments.strVerificationMethod, sharg::config{.short_id = '\0', .long_id = "verification", - .description = "STELLAR: Verification strategy: exact or bestLocal or bandedGlobal.", + .description = "STELLAR: Verification strategy: exact or bestLocal.", .advanced = true, - .validator = sharg::value_list_validator{"exact", "bestLocal", "bandedGlobal", "bandedGlobalExtend"}}); + .validator = sharg::value_list_validator{"exact", "bestLocal"}}); parser.add_option(arguments.numMatches, sharg::config{.short_id = '\0', .long_id = "numMatches",