Skip to content

Commit

Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Keep all or best repeats
Browse files Browse the repository at this point in the history
eaasna committed Oct 7, 2024
1 parent fbd6199 commit 277cc12
Showing 3 changed files with 18 additions and 5 deletions.
9 changes: 8 additions & 1 deletion include/valik/search/producer_threads_parallel.hpp
Original file line number Diff line number Diff line change
@@ -51,7 +51,7 @@ inline void prefilter_queries_parallel(seqan3::interleaved_bloom_filter<ibf_data
{
if (arguments.verbose)
verbose_out.write_warning(record, bin_hits.size());
if (arguments.keep_repeats) // keep bin hits that are supported by the most patterns per query segment
if (arguments.keep_best_repeats) // keep bin hits that are supported by the most patterns per query segment
{
size_t mean_bin_support = std::max((size_t) 2, (size_t) std::round((double) total_pattern_hits / (double) bin_hits.size()));
for (auto const [bin, count] : bin_hits)
@@ -60,6 +60,13 @@ inline void prefilter_queries_parallel(seqan3::interleaved_bloom_filter<ibf_data
queue.insert(bin, record);
}
}
else if (arguments.keep_all_repeats)
{
for (auto const [bin, count] : bin_hits)
{
queue.insert(bin, record);
}
}
return;
}

3 changes: 2 additions & 1 deletion include/valik/shared.hpp
Original file line number Diff line number Diff line change
@@ -181,7 +181,8 @@ struct search_arguments final : public minimiser_threshold_arguments, search_pro
bool write_time{false};
bool fast{false};
bool verbose{false};
bool keep_repeats{false};
bool keep_best_repeats{false};
bool keep_all_repeats{false};

size_t cart_max_capacity{1000};
size_t max_queued_carts{std::numeric_limits<size_t>::max()};
11 changes: 8 additions & 3 deletions src/argument_parsing/search.cpp
Original file line number Diff line number Diff line change
@@ -79,10 +79,15 @@ void init_search_parser(sharg::parser & parser, search_arguments & arguments)
.long_id = "without-parameter-tuning",
.description = "Preprocess database without setting default parameters.",
.advanced = true});
parser.add_flag(arguments.keep_repeats,
parser.add_flag(arguments.keep_best_repeats,
sharg::config{.short_id = '\0',
.long_id = "keep-repeats",
.description = "Do not filter out query matches from repeat regions.",
.long_id = "keep-best-repeats",
.description = "Find only highest similarity matches for repeat regions.",
.advanced = true});
parser.add_flag(arguments.keep_all_repeats,
sharg::config{.short_id = '\0',
.long_id = "keep-all-repeats",
.description = "Do not filter out query matches from repeat regions. This may significantly increase the runtime.",
.advanced = true});
parser.add_option(arguments.seg_count_in,
sharg::config{.short_id = 'n',

0 comments on commit 277cc12

Please sign in to comment.