Skip to content

Commit

Permalink
Allow stellar only search
Browse files Browse the repository at this point in the history
  • Loading branch information
eaasna committed Dec 4, 2024
1 parent d62bc5e commit 1956a77
Show file tree
Hide file tree
Showing 4 changed files with 87 additions and 64 deletions.
3 changes: 3 additions & 0 deletions include/valik/search/search_local.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,9 @@ bool search_local(search_arguments & arguments, search_time_statistics & time_st
metadata ref_meta = metadata(arguments.ref_meta_path);
if (stellar_only)
{
for (auto & f : ref_meta.files)
arguments.bin_path.push_back(std::vector<std::string>{f.path});

auto prefilter_bin_count = ref_meta.seg_count;
split_arguments stellar_dist_arguments;
// distribute stellar search
Expand Down
1 change: 1 addition & 0 deletions include/valik/shared.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,7 @@ struct search_arguments final : public minimiser_threshold_arguments, search_pro
bool verbose{false};
bool keep_best_repeats{false};
bool keep_all_repeats{false};
bool stellar_only{false};

size_t cart_max_capacity{1000};
size_t max_queued_carts{std::numeric_limits<size_t>::max()};
Expand Down
2 changes: 1 addition & 1 deletion lib/seqan
145 changes: 82 additions & 63 deletions src/argument_parsing/search.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ void init_search_parser(sharg::parser & parser, search_arguments & arguments)
sharg::config{.short_id = '\0',
.long_id = "index",
.description = "Provide a valid path to an IBF.",
.required = true,
.validator = sharg::input_file_validator{}});
parser.add_option(arguments.query_file,
sharg::config{.short_id = '\0',
Expand Down Expand Up @@ -74,6 +73,11 @@ void init_search_parser(sharg::parser & parser, search_arguments & arguments)
/////////////////////////////////////////
// Advanced options
/////////////////////////////////////////
parser.add_flag(arguments.stellar_only,
sharg::config{.short_id = '\0',
.long_id = "stellar-only",
.description = "Do not prefilter before Stellar search.",
.advanced = true});
parser.add_flag(arguments.manual_parameters,
sharg::config{.short_id = '\0',
.long_id = "without-parameter-tuning",
Expand Down Expand Up @@ -244,19 +248,27 @@ void run_search(sharg::parser & parser)
throw sharg::validation_error{"Invalid parameter values: Please choose numMatches <= sortThresh.\n"};


// ==========================================
// Read window and kmer size, and the bin paths.
// ==========================================
if (!arguments.stellar_only && !parser.is_option_set("index"))
{
throw sharg::parser_error{"Option --index is required but not set."};
}

if (!arguments.stellar_only)
{
std::ifstream is{arguments.index_file.string(),std::ios::binary};
cereal::BinaryInputArchive iarchive{is};
valik_index<> tmp{};
tmp.load_parameters(iarchive);
arguments.shape = tmp.shape();
arguments.shape_size = arguments.shape.size();
arguments.shape_weight = arguments.shape.count();
arguments.window_size = tmp.window_size();
arguments.bin_path = tmp.bin_path();
// ==========================================
// Read window and kmer size, and the bin paths.
// ==========================================
{
std::ifstream is{arguments.index_file.string(),std::ios::binary};
cereal::BinaryInputArchive iarchive{is};
valik_index<> tmp{};
tmp.load_parameters(iarchive);
arguments.shape = tmp.shape();
arguments.shape_size = arguments.shape.size();
arguments.shape_weight = arguments.shape.count();
arguments.window_size = tmp.window_size();
arguments.bin_path = tmp.bin_path();
}
}

// ==========================================
Expand Down Expand Up @@ -286,7 +298,7 @@ void run_search(sharg::parser & parser)
// ==========================================
// Process manual threshold.
// ==========================================
if (parser.is_option_set("threshold"))
if (parser.is_option_set("threshold") && !arguments.stellar_only)
{
if (!arguments.manual_parameters)
{
Expand Down Expand Up @@ -317,71 +329,78 @@ void run_search(sharg::parser & parser)
}
}

// ==========================================
// Extract data from reference metadata.
// ==========================================
if (!arguments.manual_parameters)
if (!arguments.stellar_only)
{
if (!parser.is_option_set("ref-meta"))
throw sharg::validation_error("Provide --ref-meta to deduce suitable search parameters or set --without-parameter-tuning and --pattern size.");
// ==========================================
// Extract data from reference metadata.
// ==========================================
if (!arguments.manual_parameters)
{
if (!parser.is_option_set("ref-meta"))
throw sharg::validation_error("Provide --ref-meta to deduce suitable search parameters or set --without-parameter-tuning and --pattern size.");

std::filesystem::path search_profile_file{arguments.ref_meta_path};
search_profile_file.replace_extension("arg");
sharg::input_file_validator argument_input_validator{{"arg"}};
argument_input_validator(search_profile_file);
search_kmer_profile search_profile{search_profile_file};
std::filesystem::path search_profile_file{arguments.ref_meta_path};
search_profile_file.replace_extension("arg");
sharg::input_file_validator argument_input_validator{{"arg"}};
argument_input_validator(search_profile_file);
search_kmer_profile search_profile{search_profile_file};



arguments.pattern_size = search_profile.l;
arguments.errors = std::ceil(arguments.error_rate * arguments.pattern_size); // update based on pattern size in metadata
search_error_profile error_profile = search_profile.get_error_profile(arguments.errors);
// seg_count is inferred in metadata constructor
arguments.search_type = error_profile.search_type;
if (arguments.search_type == search_kind::STELLAR)

arguments.pattern_size = search_profile.l;
arguments.errors = std::ceil(arguments.error_rate * arguments.pattern_size); // update based on pattern size in metadata
search_error_profile error_profile = search_profile.get_error_profile(arguments.errors);
// seg_count is inferred in metadata constructor
arguments.search_type = error_profile.search_type;
if (arguments.search_type == search_kind::STELLAR)
{
std::cout << "Can not prefilter matches of length " << std::to_string(error_profile.pattern.l) <<
" with " << std::to_string(error_profile.pattern.e) << " errors. Searching without prefiltering.\n";
arguments.fnr = 0.0;
}
else
{
arguments.max_segment_len = max_segment_len(error_profile.fp_per_pattern, arguments.pattern_size, arguments.query_every);
arguments.threshold = error_profile.params.t;
arguments.threshold_was_set = true; // use raptor::threshold_kinds::percentage
if (arguments.threshold > arguments.pattern_size - arguments.shape.size() + 1)
throw sharg::validation_error("Threshold can not be larger than the number of k-mers per pattern.");
arguments.threshold_percentage = arguments.threshold / (double) (arguments.pattern_size - arguments.shape.size() + 1);
arguments.fnr = error_profile.fnr;
}

if (arguments.window_size > arguments.shape_size)
arguments.search_type = search_kind::MINIMISER;
}

// ==========================================
// More checks.
// ==========================================
if (parser.is_option_set("query-every"))
{
std::cout << "Can not prefilter matches of length " << std::to_string(error_profile.pattern.l) <<
" with " << std::to_string(error_profile.pattern.e) << " errors. Searching without prefiltering.\n";
arguments.fnr = 0.0;
if (arguments.query_every > arguments.pattern_size)
throw sharg::validation_error("Reduce --query-every so that all positions in the query would be considered at least once.");
}
else
{
arguments.max_segment_len = max_segment_len(error_profile.fp_per_pattern, arguments.pattern_size, arguments.query_every);
arguments.threshold = error_profile.params.t;
arguments.threshold_was_set = true; // use raptor::threshold_kinds::percentage
if (arguments.threshold > arguments.pattern_size - arguments.shape.size() + 1)
throw sharg::validation_error("Threshold can not be larger than the number of k-mers per pattern.");
arguments.threshold_percentage = arguments.threshold / (double) (arguments.pattern_size - arguments.shape.size() + 1);
arguments.fnr = error_profile.fnr;
if (arguments.fast)
arguments.query_every = 3;
}

if (arguments.window_size > arguments.shape_size)
arguments.search_type = search_kind::MINIMISER;
}
// ==========================================
// Set strict thresholding parameters for fast mode.
// ==========================================
if (!parser.is_option_set("tau") && arguments.fast)
arguments.tau = 0.99999;

// ==========================================
// More checks.
// ==========================================
if (parser.is_option_set("query-every"))
{
if (arguments.query_every > arguments.pattern_size)
throw sharg::validation_error("Reduce --query-every so that all positions in the query would be considered at least once.");
if (!parser.is_option_set("p_max") && arguments.fast)
arguments.p_max = 0.05;
}
else
{
if (arguments.fast)
arguments.query_every = 3;
arguments.search_type = search_kind::STELLAR;
}

// ==========================================
// Set strict thresholding parameters for fast mode.
// ==========================================
if (!parser.is_option_set("tau") && arguments.fast)
arguments.tau = 0.99999;

if (!parser.is_option_set("p_max") && arguments.fast)
arguments.p_max = 0.05;

// ==========================================
// Dispatch
// ==========================================
Expand Down

0 comments on commit 1956a77

Please sign in to comment.