Skip to content

Commit

Permalink
Disable queries in repeats
Browse files Browse the repository at this point in the history
  • Loading branch information
eaasna committed Sep 25, 2024
1 parent 54af274 commit db0dce5
Show file tree
Hide file tree
Showing 4 changed files with 32 additions and 21 deletions.
9 changes: 6 additions & 3 deletions include/valik/search/producer_threads_parallel.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,12 +47,15 @@ inline void prefilter_queries_parallel(seqan3::interleaved_bloom_filter<ibf_data
{
if (bin_hits.size() > std::max((size_t) 4, (size_t) std::round(ibf.bin_count() / 2.0)))
{
if (arguments.very_verbose)
verbose_out.write_record(record, bin_hits.size());
if (!arguments.keep_repeats)
{
verbose_out.write_disabled_record(record, bin_hits.size(), arguments.verbose);
return;
}
else if (arguments.verbose)
verbose_out.write_warning(record, bin_hits.size());
}

for (size_t const bin : bin_hits)
{
queue.insert(bin, record);
Expand Down
36 changes: 22 additions & 14 deletions include/valik/search/sync_out.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include <fstream>
#include <mutex>

#include <seqan3/io/sequence_file/output.hpp>
#include <seqan3/core/debug_stream.hpp>
#include <seqan3/io/all.hpp>

Expand All @@ -12,6 +13,10 @@ namespace valik::app

class sync_out
{
using types = seqan3::type_list<seqan3::dna4_vector, std::string>;
using fields = seqan3::fields<seqan3::field::seq, seqan3::field::id>;
using sequence_record_type = seqan3::sequence_record<types, fields>;
using output_format = seqan3::type_list<seqan3::format_fasta>;

public:
sync_out() = default;
Expand All @@ -21,34 +26,37 @@ class sync_out
sync_out & operator=(sync_out &&) = default;
~sync_out() = default;

sync_out(std::filesystem::path const & path) : file{path} {}
sync_out(std::filesystem::path const & path) : fout{path}
{
warning_message = [](size_t const bin_count, size_t const query_length)
{
seqan3::debug_stream << "[Warning] Insufficient prefiltering. " << bin_count <<
" bins match query of length " << query_length << '\n';
};
}

template <typename t>
void write_warning(t && query_record, size_t const & bin_count)
{
std::lock_guard<std::mutex> lock(write_mutex);
seqan3::debug_stream << "[Warning] Insufficient prefiltering. " << bin_count << " bins match query of length " << query_record.sequence.size() << '\n';
warning_message(bin_count, query_record.sequence.size());
}
// outfile gets unlocked as soon as the current thread exits the write function

template <typename t>
void write_record(t && query_record, size_t const & bin_count)
{
std::string fasta_string = ">";
fasta_string += query_record.sequence_id;
fasta_string += '\n';
for (auto & n : query_record.sequence)
fasta_string += seqan3::to_char(n);
fasta_string += '\n';

void write_disabled_record(t && query_record, size_t const & bin_count, bool const verbose)
{
std::lock_guard<std::mutex> lock(write_mutex);
seqan3::debug_stream << "[Warning] Insufficient prefiltering. " << bin_count << " bins match query:\n" << fasta_string << '\n';
if (verbose)
warning_message(bin_count, query_record.sequence.size());
sequence_record_type output_record{query_record.sequence, query_record.sequence_id};
fout.push_back(output_record);
}
// outfile gets unlocked as soon as the current thread exits the write function

private:
//seqan3::sequence_file_output<fields, output_format_types> fout;
std::ofstream file;
seqan3::sequence_file_output<fields, output_format> fout;
std::function<void(size_t, size_t)> warning_message;
std::mutex write_mutex;
};

Expand Down
2 changes: 1 addition & 1 deletion include/valik/shared.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ struct search_arguments final : public minimiser_threshold_arguments, search_pro
bool write_time{false};
bool fast{false};
bool verbose{false};
bool very_verbose{false};
bool keep_repeats{false};

size_t cart_max_capacity{1000};
size_t max_queued_carts{std::numeric_limits<size_t>::max()};
Expand Down
6 changes: 3 additions & 3 deletions src/argument_parsing/search.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,10 +79,10 @@ void init_search_parser(sharg::parser & parser, search_arguments & arguments)
.long_id = "without-parameter-tuning",
.description = "Preprocess database without setting default parameters.",
.advanced = true});
parser.add_flag(arguments.very_verbose,
parser.add_flag(arguments.keep_repeats,
sharg::config{.short_id = '\0',
.long_id = "very-verbose",
.description = "Print very verbose output.",
.long_id = "keep-repeats",
.description = "Do not filter out query matches from repeat regions.",
.advanced = true});
parser.add_option(arguments.seg_count_in,
sharg::config{.short_id = 'n',
Expand Down

0 comments on commit db0dce5

Please sign in to comment.