Skip to content

Commit

Permalink
Revert back to raptor::threshold percentage
Browse files Browse the repository at this point in the history
  • Loading branch information
eaasna committed Jan 29, 2024
1 parent c354736 commit 51421de
Show file tree
Hide file tree
Showing 9 changed files with 31 additions and 69 deletions.
15 changes: 0 additions & 15 deletions include/utilities/threshold/shared.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,21 +70,6 @@ struct param_space
constexpr static std::pair<size_t, size_t> kmer_range{9, 21};
};

namespace threshold
{

struct threshold
{
size_t value;

size_t get(size_t dummy) const
{
return value;
}
};

} // namespace valik::threshold

/**
* @brief Total number of error configurations. Same as the number of combinations of len take error_count.
*
Expand Down
12 changes: 6 additions & 6 deletions include/valik/search/iterate_queries.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,10 @@ namespace valik::app
* @param thresholder Threshold for number of shared k-mers.
* @param queue Shopping cart queue for load balancing between prefiltering and Stellar search.
*/
template <typename ibf_t, typename cart_queue_t, typename thresh_t>
template <typename ibf_t, typename cart_queue_t>
void iterate_distributed_queries(search_arguments const & arguments,
ibf_t const & ibf,
thresh_t const & thresholder,
raptor::threshold::threshold const & thresholder,
cart_queue_t & queue)
{
using fields = seqan3::fields<seqan3::field::id, seqan3::field::seq>;
Expand All @@ -48,10 +48,10 @@ void iterate_distributed_queries(search_arguments const & arguments,
* @param thresholder Threshold for number of shared k-mers.
* @param queue Shopping cart queue for load balancing between Valik prefiltering and Stellar search.
*/
template <typename ibf_t, typename thresh_t>
template <typename ibf_t>
void iterate_short_queries(search_arguments const & arguments,
ibf_t const & ibf,
thresh_t const & thresholder,
raptor::threshold::threshold const & thresholder,
cart_queue<shared_query_record<seqan2::String<seqan2::Dna>>> & queue)
{
using TSequence = seqan2::String<seqan2::Dna>;
Expand Down Expand Up @@ -110,10 +110,10 @@ void iterate_short_queries(search_arguments const & arguments,
* @param queue Shopping cart queue for load balancing between Valik prefiltering and Stellar search.
* @param meta Metadata table for split query segments.
*/
template <typename ibf_t, typename thresh_t>
template <typename ibf_t>
void iterate_split_queries(search_arguments const & arguments,
ibf_t const & ibf,
thresh_t const & thresholder,
raptor::threshold::threshold const & thresholder,
cart_queue<shared_query_record<seqan2::String<seqan2::Dna>>> & queue,
metadata & meta)
{
Expand Down
4 changes: 2 additions & 2 deletions include/valik/search/prefilter_queries_parallel.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,11 @@ namespace valik::app
/**
* @brief Create parallel prefiltering jobs.
*/
template <typename query_t, seqan3::data_layout ibf_data_layout, typename thresh_t>
template <typename query_t, seqan3::data_layout ibf_data_layout>
inline void prefilter_queries_parallel(seqan3::interleaved_bloom_filter<ibf_data_layout> const & ibf,
search_arguments const & arguments,
std::vector<query_t> const & records,
thresh_t const & thresholder,
raptor::threshold::threshold const & thresholder,
cart_queue<query_t> & queue)
{
if (records.empty())
Expand Down
12 changes: 2 additions & 10 deletions include/valik/search/search_distributed.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -129,16 +129,8 @@ bool search_distributed(search_arguments const & arguments, search_time_statisti
}

auto start = std::chrono::high_resolution_clock::now();
if (arguments.manual_threshold)
{
valik::threshold::threshold const thresholder(arguments.threshold);
iterate_distributed_queries(arguments, index.ibf(), thresholder, queue);
}
else
{
raptor::threshold::threshold const thresholder{arguments.make_threshold_parameters()};
iterate_distributed_queries(arguments, index.ibf(), thresholder, queue);
}
raptor::threshold::threshold const thresholder{arguments.make_threshold_parameters()};
iterate_distributed_queries(arguments, index.ibf(), thresholder, queue);

queue.finish(); // Flush carts that are not empty yet
consumerThreads.clear();
Expand Down
24 changes: 4 additions & 20 deletions include/valik/search/search_local.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -323,29 +323,13 @@ bool search_local(search_arguments const & arguments, search_time_statistics & t
auto start = std::chrono::high_resolution_clock::now();
if constexpr (is_split)
{
if (arguments.manual_threshold)
{
valik::threshold::threshold const thresholder(arguments.threshold);
iterate_split_queries(arguments, index.ibf(), thresholder, queue, *query_meta);
}
else
{
raptor::threshold::threshold const thresholder{arguments.make_threshold_parameters()};
iterate_split_queries(arguments, index.ibf(), thresholder, queue, *query_meta);
}
raptor::threshold::threshold const thresholder{arguments.make_threshold_parameters()};
iterate_split_queries(arguments, index.ibf(), thresholder, queue, *query_meta);
}
else
{
if (arguments.manual_threshold)
{
valik::threshold::threshold const thresholder(arguments.threshold);
iterate_short_queries(arguments, index.ibf(), thresholder, queue);
}
else
{
raptor::threshold::threshold const thresholder{arguments.make_threshold_parameters()};
iterate_short_queries(arguments, index.ibf(), thresholder, queue);
}
raptor::threshold::threshold const thresholder{arguments.make_threshold_parameters()};
iterate_short_queries(arguments, index.ibf(), thresholder, queue);
}

queue.finish(); // Flush carts that are not empty yet
Expand Down
8 changes: 4 additions & 4 deletions include/valik/shared.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,8 @@ struct minimiser_threshold_arguments
double fpr{0.05};
uint8_t errors{0};
size_t pattern_size{};
bool treshold_was_set{false};
double threshold_percentage{std::numeric_limits<double>::quiet_NaN()};
bool threshold_was_set{false};
bool cache_thresholds{false};

protected:
Expand Down Expand Up @@ -124,6 +125,7 @@ struct search_arguments final : public minimiser_threshold_arguments, public ste
uint8_t shape_size{shape.size()};
uint8_t shape_weight{shape.count()};
uint64_t overlap{};
size_t threshold{};

uint8_t threads{1u};

Expand All @@ -149,7 +151,7 @@ struct search_arguments final : public minimiser_threshold_arguments, public ste
.shape{shape},
.query_length{pattern_size},
.errors{errors},
.percentage{std::numeric_limits<double>::quiet_NaN()},
.percentage{threshold_percentage},
.p_max{p_max},
.fpr{fpr},
.tau{tau},
Expand All @@ -159,8 +161,6 @@ struct search_arguments final : public minimiser_threshold_arguments, public ste
}

float error_rate{};
size_t threshold{};
bool manual_threshold{false};
std::filesystem::path ref_meta_path{};
std::filesystem::path query_meta_path{};
bool distribute{false};
Expand Down
18 changes: 9 additions & 9 deletions src/argument_parsing/search.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,6 @@ void run_search(sharg::parser & parser)
// ==========================================

sharg::input_file_validator{}(arguments.query_file);
arguments.treshold_was_set = parser.is_option_set("threshold");
if (parser.is_option_set("disableThresh") && parser.is_option_set("numMatches"))
{
if (arguments.numMatches > arguments.disableThresh)
Expand All @@ -217,14 +216,6 @@ void run_search(sharg::parser & parser)
arguments.bin_path = tmp.bin_path();
}

// ==========================================
// Process --threshold.
// ==========================================
if (parser.is_option_set("threshold"))
{
arguments.manual_threshold = true; // otherwise use raptor::threshold
}

// ==========================================
// Process --pattern.
// ==========================================
Expand All @@ -246,6 +237,15 @@ void run_search(sharg::parser & parser)
}
}

// ==========================================
// Process --threshold.
// ==========================================
if (parser.is_option_set("threshold"))
{
arguments.threshold_was_set = true; // use raptor::threshold_kinds::percentage
arguments.threshold_percentage = arguments.threshold / (double) (arguments.pattern_size - arguments.shape.size() + 1);
}

// ==========================================
// Create temporary file path for merging distributed Stellar runs.
// ==========================================
Expand Down
3 changes: 2 additions & 1 deletion src/threshold/find.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,8 @@ void get_best_params(param_space const & space,
*/

std::cout << best_params.k << '\t' << best_params.t << '\t'
<< fn_rates[best_params.k - std::get<0>(space.kmer_range)][best_params.t - 1] << '\n';
<< fn_rates[best_params.k - std::get<0>(space.kmer_range)][best_params.t - 1] << '\t'
<< fp_rates[best_params.k - std::get<0>(space.kmer_range)] << '\n';
}

} // namespace valik
4 changes: 2 additions & 2 deletions src/valik_split.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ void valik_split(split_arguments & arguments)

std::cout << "db length: " << meta.total_len << "bp\n";
std::cout << "min local match length: " << arguments.overlap << "bp\n";
std::cout << "Recommended parameters for a chosen error rate:\n";
std::cout << "max_error_rate\tkmer_size\tthreshold\tFNR\n";
std::cout << "Recommended parameters depending on the chosen error rate\n\n";
std::cout << "max_error_rate\tkmer_size\tthreshold\tFNR\tFP_per_bin\n";
for (size_t errors{1}; errors <= std::round(arguments.overlap * 0.1); errors++)
{
std::cout.precision(3);
Expand Down

0 comments on commit 51421de

Please sign in to comment.