diff --git a/include/raptor/threshold/threshold.hpp b/include/raptor/threshold/threshold.hpp index 658ceec6..bacd6949 100644 --- a/include/raptor/threshold/threshold.hpp +++ b/include/raptor/threshold/threshold.hpp @@ -32,7 +32,7 @@ class threshold size_t get(size_t const minimiser_count) const noexcept; - size_t mean_number_of_minimizers() const noexcept; + size_t minimizer_count_repeat_cutoff() const noexcept; private: enum class threshold_kinds @@ -49,6 +49,18 @@ class threshold size_t minimal_number_of_minimizers{}; size_t maximal_number_of_minimizers{}; double threshold_percentage{}; + +public: + size_t get_min_minimizer_count() const noexcept + { + return minimal_number_of_minimizers; + } + + size_t get_max_minimizer_count() const noexcept + { + return maximal_number_of_minimizers; + } + }; } // namespace raptor::threshold diff --git a/include/valik/search/local_prefilter.hpp b/include/valik/search/local_prefilter.hpp index fefbf8a0..4c098ab1 100644 --- a/include/valik/search/local_prefilter.hpp +++ b/include/valik/search/local_prefilter.hpp @@ -93,14 +93,16 @@ std::optional make_pattern_bounds(size_t const & begin, if (arguments.keep_all_repeats || (arguments.keep_best_repeats && - (minimiser_count >= (thresholder.mean_number_of_minimizers())))) + (minimiser_count >= (thresholder.minimizer_count_repeat_cutoff())))) // ignore low entropy repeat patterns { pattern.threshold = thresholder.get(minimiser_count); return pattern; } else + { return std::nullopt; + } } /** diff --git a/include/valik/search/search_local.hpp b/include/valik/search/search_local.hpp index 1c4d9409..d5191a10 100644 --- a/include/valik/search/search_local.hpp +++ b/include/valik/search/search_local.hpp @@ -398,6 +398,9 @@ bool search_local(search_arguments & arguments, search_time_statistics & time_st { using ibf_t = decltype(index.ibf()); raptor::threshold::threshold const thresholder{arguments.make_threshold_parameters()}; + seqan3::debug_stream << "min_min" << thresholder.get_min_minimizer_count() << '\n'; + seqan3::debug_stream << "max_min" << thresholder.get_max_minimizer_count() << '\n'; + seqan3::debug_stream << "minimizer_count_repeat_cutoff\t" << thresholder.minimizer_count_repeat_cutoff() << '\n'; if constexpr (is_split) { iterate_split_queries(arguments, index.ibf(), thresholder, queue, query_meta.value()); diff --git a/src/raptor/threshold/threshold.cpp b/src/raptor/threshold/threshold.cpp index 5d669280..d8ca5c23 100644 --- a/src/raptor/threshold/threshold.cpp +++ b/src/raptor/threshold/threshold.cpp @@ -61,9 +61,9 @@ size_t threshold::get(size_t const minimiser_count) const noexcept } } -size_t threshold::mean_number_of_minimizers() const noexcept -{ - return (size_t) std::round((maximal_number_of_minimizers - minimal_number_of_minimizers) / 2.0); +size_t threshold::minimizer_count_repeat_cutoff() const noexcept +{ + return (size_t) std::round((maximal_number_of_minimizers - minimal_number_of_minimizers) / 4.0); } } // namespace raptor::threshold