From 51421de3b5845d265d74946ecfca23f9b6dd6489 Mon Sep 17 00:00:00 2001 From: Evelin Aasna Date: Mon, 29 Jan 2024 11:16:36 +0100 Subject: [PATCH] Revert back to raptor::threshold percentage --- include/utilities/threshold/shared.hpp | 15 ------------ include/valik/search/iterate_queries.hpp | 12 +++++----- .../search/prefilter_queries_parallel.hpp | 4 ++-- include/valik/search/search_distributed.hpp | 12 ++-------- include/valik/search/search_local.hpp | 24 ++++--------------- include/valik/shared.hpp | 8 +++---- src/argument_parsing/search.cpp | 18 +++++++------- src/threshold/find.cpp | 3 ++- src/valik_split.cpp | 4 ++-- 9 files changed, 31 insertions(+), 69 deletions(-) diff --git a/include/utilities/threshold/shared.hpp b/include/utilities/threshold/shared.hpp index f40d9304..57e32bbd 100755 --- a/include/utilities/threshold/shared.hpp +++ b/include/utilities/threshold/shared.hpp @@ -70,21 +70,6 @@ struct param_space constexpr static std::pair kmer_range{9, 21}; }; -namespace threshold -{ - -struct threshold -{ - size_t value; - - size_t get(size_t dummy) const - { - return value; - } -}; - -} // namespace valik::threshold - /** * @brief Total number of error configurations. Same as the number of combinations of len take error_count. * diff --git a/include/valik/search/iterate_queries.hpp b/include/valik/search/iterate_queries.hpp index 57ee654e..f4bc5ccf 100644 --- a/include/valik/search/iterate_queries.hpp +++ b/include/valik/search/iterate_queries.hpp @@ -20,10 +20,10 @@ namespace valik::app * @param thresholder Threshold for number of shared k-mers. * @param queue Shopping cart queue for load balancing between prefiltering and Stellar search. */ -template +template void iterate_distributed_queries(search_arguments const & arguments, ibf_t const & ibf, - thresh_t const & thresholder, + raptor::threshold::threshold const & thresholder, cart_queue_t & queue) { using fields = seqan3::fields; @@ -48,10 +48,10 @@ void iterate_distributed_queries(search_arguments const & arguments, * @param thresholder Threshold for number of shared k-mers. * @param queue Shopping cart queue for load balancing between Valik prefiltering and Stellar search. */ -template +template void iterate_short_queries(search_arguments const & arguments, ibf_t const & ibf, - thresh_t const & thresholder, + raptor::threshold::threshold const & thresholder, cart_queue>> & queue) { using TSequence = seqan2::String; @@ -110,10 +110,10 @@ void iterate_short_queries(search_arguments const & arguments, * @param queue Shopping cart queue for load balancing between Valik prefiltering and Stellar search. * @param meta Metadata table for split query segments. */ -template +template void iterate_split_queries(search_arguments const & arguments, ibf_t const & ibf, - thresh_t const & thresholder, + raptor::threshold::threshold const & thresholder, cart_queue>> & queue, metadata & meta) { diff --git a/include/valik/search/prefilter_queries_parallel.hpp b/include/valik/search/prefilter_queries_parallel.hpp index 32692e83..9ecd5318 100644 --- a/include/valik/search/prefilter_queries_parallel.hpp +++ b/include/valik/search/prefilter_queries_parallel.hpp @@ -21,11 +21,11 @@ namespace valik::app /** * @brief Create parallel prefiltering jobs. */ -template +template inline void prefilter_queries_parallel(seqan3::interleaved_bloom_filter const & ibf, search_arguments const & arguments, std::vector const & records, - thresh_t const & thresholder, + raptor::threshold::threshold const & thresholder, cart_queue & queue) { if (records.empty()) diff --git a/include/valik/search/search_distributed.hpp b/include/valik/search/search_distributed.hpp index 0ab4301d..44a1b18d 100644 --- a/include/valik/search/search_distributed.hpp +++ b/include/valik/search/search_distributed.hpp @@ -129,16 +129,8 @@ bool search_distributed(search_arguments const & arguments, search_time_statisti } auto start = std::chrono::high_resolution_clock::now(); - if (arguments.manual_threshold) - { - valik::threshold::threshold const thresholder(arguments.threshold); - iterate_distributed_queries(arguments, index.ibf(), thresholder, queue); - } - else - { - raptor::threshold::threshold const thresholder{arguments.make_threshold_parameters()}; - iterate_distributed_queries(arguments, index.ibf(), thresholder, queue); - } + raptor::threshold::threshold const thresholder{arguments.make_threshold_parameters()}; + iterate_distributed_queries(arguments, index.ibf(), thresholder, queue); queue.finish(); // Flush carts that are not empty yet consumerThreads.clear(); diff --git a/include/valik/search/search_local.hpp b/include/valik/search/search_local.hpp index f6db0f76..50ee1a80 100644 --- a/include/valik/search/search_local.hpp +++ b/include/valik/search/search_local.hpp @@ -323,29 +323,13 @@ bool search_local(search_arguments const & arguments, search_time_statistics & t auto start = std::chrono::high_resolution_clock::now(); if constexpr (is_split) { - if (arguments.manual_threshold) - { - valik::threshold::threshold const thresholder(arguments.threshold); - iterate_split_queries(arguments, index.ibf(), thresholder, queue, *query_meta); - } - else - { - raptor::threshold::threshold const thresholder{arguments.make_threshold_parameters()}; - iterate_split_queries(arguments, index.ibf(), thresholder, queue, *query_meta); - } + raptor::threshold::threshold const thresholder{arguments.make_threshold_parameters()}; + iterate_split_queries(arguments, index.ibf(), thresholder, queue, *query_meta); } else { - if (arguments.manual_threshold) - { - valik::threshold::threshold const thresholder(arguments.threshold); - iterate_short_queries(arguments, index.ibf(), thresholder, queue); - } - else - { - raptor::threshold::threshold const thresholder{arguments.make_threshold_parameters()}; - iterate_short_queries(arguments, index.ibf(), thresholder, queue); - } + raptor::threshold::threshold const thresholder{arguments.make_threshold_parameters()}; + iterate_short_queries(arguments, index.ibf(), thresholder, queue); } queue.finish(); // Flush carts that are not empty yet diff --git a/include/valik/shared.hpp b/include/valik/shared.hpp index 585874eb..fb777079 100644 --- a/include/valik/shared.hpp +++ b/include/valik/shared.hpp @@ -96,7 +96,8 @@ struct minimiser_threshold_arguments double fpr{0.05}; uint8_t errors{0}; size_t pattern_size{}; - bool treshold_was_set{false}; + double threshold_percentage{std::numeric_limits::quiet_NaN()}; + bool threshold_was_set{false}; bool cache_thresholds{false}; protected: @@ -124,6 +125,7 @@ struct search_arguments final : public minimiser_threshold_arguments, public ste uint8_t shape_size{shape.size()}; uint8_t shape_weight{shape.count()}; uint64_t overlap{}; + size_t threshold{}; uint8_t threads{1u}; @@ -149,7 +151,7 @@ struct search_arguments final : public minimiser_threshold_arguments, public ste .shape{shape}, .query_length{pattern_size}, .errors{errors}, - .percentage{std::numeric_limits::quiet_NaN()}, + .percentage{threshold_percentage}, .p_max{p_max}, .fpr{fpr}, .tau{tau}, @@ -159,8 +161,6 @@ struct search_arguments final : public minimiser_threshold_arguments, public ste } float error_rate{}; - size_t threshold{}; - bool manual_threshold{false}; std::filesystem::path ref_meta_path{}; std::filesystem::path query_meta_path{}; bool distribute{false}; diff --git a/src/argument_parsing/search.cpp b/src/argument_parsing/search.cpp index 2ab0c6d8..c63d8635 100644 --- a/src/argument_parsing/search.cpp +++ b/src/argument_parsing/search.cpp @@ -192,7 +192,6 @@ void run_search(sharg::parser & parser) // ========================================== sharg::input_file_validator{}(arguments.query_file); - arguments.treshold_was_set = parser.is_option_set("threshold"); if (parser.is_option_set("disableThresh") && parser.is_option_set("numMatches")) { if (arguments.numMatches > arguments.disableThresh) @@ -217,14 +216,6 @@ void run_search(sharg::parser & parser) arguments.bin_path = tmp.bin_path(); } - // ========================================== - // Process --threshold. - // ========================================== - if (parser.is_option_set("threshold")) - { - arguments.manual_threshold = true; // otherwise use raptor::threshold - } - // ========================================== // Process --pattern. // ========================================== @@ -246,6 +237,15 @@ void run_search(sharg::parser & parser) } } + // ========================================== + // Process --threshold. + // ========================================== + if (parser.is_option_set("threshold")) + { + arguments.threshold_was_set = true; // use raptor::threshold_kinds::percentage + arguments.threshold_percentage = arguments.threshold / (double) (arguments.pattern_size - arguments.shape.size() + 1); + } + // ========================================== // Create temporary file path for merging distributed Stellar runs. // ========================================== diff --git a/src/threshold/find.cpp b/src/threshold/find.cpp index fa566d64..26fca48f 100755 --- a/src/threshold/find.cpp +++ b/src/threshold/find.cpp @@ -65,7 +65,8 @@ void get_best_params(param_space const & space, */ std::cout << best_params.k << '\t' << best_params.t << '\t' - << fn_rates[best_params.k - std::get<0>(space.kmer_range)][best_params.t - 1] << '\n'; + << fn_rates[best_params.k - std::get<0>(space.kmer_range)][best_params.t - 1] << '\t' + << fp_rates[best_params.k - std::get<0>(space.kmer_range)] << '\n'; } } // namespace valik diff --git a/src/valik_split.cpp b/src/valik_split.cpp index 38cdca53..289ff921 100644 --- a/src/valik_split.cpp +++ b/src/valik_split.cpp @@ -30,8 +30,8 @@ void valik_split(split_arguments & arguments) std::cout << "db length: " << meta.total_len << "bp\n"; std::cout << "min local match length: " << arguments.overlap << "bp\n"; - std::cout << "Recommended parameters for a chosen error rate:\n"; - std::cout << "max_error_rate\tkmer_size\tthreshold\tFNR\n"; + std::cout << "Recommended parameters depending on the chosen error rate\n\n"; + std::cout << "max_error_rate\tkmer_size\tthreshold\tFNR\tFP_per_bin\n"; for (size_t errors{1}; errors <= std::round(arguments.overlap * 0.1); errors++) { std::cout.precision(3);