From cb4abb956ab336be81c4c51386e50449866cc01f Mon Sep 17 00:00:00 2001 From: Evelin Aasna Date: Wed, 8 May 2024 18:29:56 +0200 Subject: [PATCH] Segment FPR function --- include/utilities/threshold/filtering_request.hpp | 3 +-- include/utilities/threshold/find.hpp | 9 +++++++++ src/threshold/find.cpp | 2 +- 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/include/utilities/threshold/filtering_request.hpp b/include/utilities/threshold/filtering_request.hpp index bbd015e1..d6f9d8f5 100755 --- a/include/utilities/threshold/filtering_request.hpp +++ b/include/utilities/threshold/filtering_request.hpp @@ -41,8 +41,7 @@ struct filtering_request { double pattern_p = ref_meta.pattern_spurious_match_prob(params); uint64_t patterns_per_segment = std::round((query_meta.total_len / (double) query_meta.seg_count - pattern.l + 1) / (double) query_every); - double none_match_p = pow(1 - pattern_p, patterns_per_segment); - return std::min(1 - none_match_p, 1.0); + return segment_fpr(pattern_p, patterns_per_segment); } }; diff --git a/include/utilities/threshold/find.hpp b/include/utilities/threshold/find.hpp index a84efae3..97115569 100755 --- a/include/utilities/threshold/find.hpp +++ b/include/utilities/threshold/find.hpp @@ -8,6 +8,15 @@ namespace valik { +/** + * @brief The false positive probability of a query segment that contains partially overlapping patterns. +*/ +inline double segment_fpr(double const pattern_p, size_t const patterns_per_segment) +{ + double none_match_p = pow(1 - pattern_p, patterns_per_segment); + return std::min(1 - none_match_p, 1.0); +} + /** * @brief Score of the objective function for a parameter set. Smaller values are better. */ diff --git a/src/threshold/find.cpp b/src/threshold/find.cpp index 82c25cdb..a1aa568e 100755 --- a/src/threshold/find.cpp +++ b/src/threshold/find.cpp @@ -111,7 +111,7 @@ search_kmer_profile find_thresholds_for_kmer_size(metadata const & ref_meta, auto best_params = param_set(attr.k, kmer_lemma_threshold(pattern.l, attr.k, errors), space); if ((best_params.t < THRESH_LOWER) || - (1 - pow(1 - ref_meta.pattern_spurious_match_prob(best_params), PATTERNS_PER_SEGMENT)) > FPR_UPPER) + segment_fpr(ref_meta.pattern_spurious_match_prob(best_params), PATTERNS_PER_SEGMENT) > FPR_UPPER) { search_type = search_kind::HEURISTIC; double best_score = pattern.l;