From 1f6509e160a9039fe43237920742eca1314aec75 Mon Sep 17 00:00:00 2001 From: Evelin Aasna Date: Wed, 10 Apr 2024 17:08:46 +0200 Subject: [PATCH] Find IBF bin size for a genome minimiser set --- include/utilities/threshold/basics.hpp | 2 +- include/valik/search/search_local.hpp | 3 +- src/argument_parsing/build.cpp | 52 +++++++++++++------------- src/argument_parsing/search.cpp | 3 ++ src/prepare/compute_bin_size.cpp | 1 + 5 files changed, 33 insertions(+), 28 deletions(-) diff --git a/include/utilities/threshold/basics.hpp b/include/utilities/threshold/basics.hpp index 8969e3bb..98682b22 100644 --- a/include/utilities/threshold/basics.hpp +++ b/include/utilities/threshold/basics.hpp @@ -107,6 +107,6 @@ inline uint64_t combinations(size_t const k, size_t const n) return 0; } -enum class search_kind {LEMMA, HEURISTIC, STELLAR}; +enum class search_kind {LEMMA, HEURISTIC, MINIMISER, STELLAR}; } //namespace valik diff --git a/include/valik/search/search_local.hpp b/include/valik/search/search_local.hpp index 39c3c1bf..00677207 100644 --- a/include/valik/search/search_local.hpp +++ b/include/valik/search/search_local.hpp @@ -100,10 +100,11 @@ bool search_local(search_arguments & arguments, search_time_statistics & time_st std::cout << "\n-----------Search parameters-----------\n"; std::cout << "kmer size " << std::to_string(arguments.shape_size) << '\n'; + std::cout << "window size " << std::to_string(arguments.window_size) << '\n'; switch (arguments.search_type) { case search_kind::LEMMA: std::cout << "k-mer lemma "; break; - //case search_kind::MINIMISER: std::cout << "minimiser "; break; + case search_kind::MINIMISER: std::cout << "minimiser "; break; case search_kind::HEURISTIC: std::cout << "heuristic "; break; default: break; } diff --git a/src/argument_parsing/build.cpp b/src/argument_parsing/build.cpp index 656d8139..d199c38f 100644 --- a/src/argument_parsing/build.cpp +++ b/src/argument_parsing/build.cpp @@ -112,32 +112,6 @@ void run_build(sharg::parser & parser) arguments.window_size = search_profile.k; } - // ========================================== - // Various checks. - // ========================================== - if (parser.is_option_set("kmer-count-cutoff") && parser.is_option_set("use-filesize-dependent-cutoff")) - throw sharg::parser_error{"You cannot use both --kmer-count-cutoff and --use-filesize-dependent-cutoff."}; - - if (parser.is_option_set("window")) - { - if (arguments.kmer_size > arguments.window_size) - throw sharg::parser_error{"The k-mer size cannot be bigger than the window size."}; - } - else - { - if (arguments.fast) - { - arguments.window_size = arguments.kmer_size + 2; - raptor::compute_minimiser(arguments); - arguments.input_is_minimiser = true; - } - else - arguments.window_size = arguments.kmer_size; - } - - arguments.shape = seqan3::shape{seqan3::ungapped{arguments.kmer_size}}; - arguments.shape_weight = arguments.shape.count(); - // ========================================== // Process bin_path: // if building from clustered sequences each line in input corresponds to a bin @@ -167,6 +141,32 @@ void run_build(sharg::parser & parser) } } + // ========================================== + // Process minimiser parameters for IBF size calculation. + // ========================================== + if (parser.is_option_set("kmer-count-cutoff") && parser.is_option_set("use-filesize-dependent-cutoff")) + throw sharg::parser_error{"You cannot use both --kmer-count-cutoff and --use-filesize-dependent-cutoff."}; + + arguments.shape = seqan3::shape{seqan3::ungapped{arguments.kmer_size}}; + arguments.shape_weight = arguments.shape.count(); + + if (parser.is_option_set("window")) + { + if (arguments.kmer_size > arguments.window_size) + throw sharg::parser_error{"The k-mer size cannot be bigger than the window size."}; + } + else + { + if (arguments.fast) + { + arguments.window_size = arguments.kmer_size + 2; + raptor::compute_minimiser(arguments); + arguments.input_is_minimiser = true; + } + else + arguments.window_size = arguments.kmer_size; + } + try { sharg::output_file_validator{sharg::output_file_open_options::open_or_create}(arguments.out_path); diff --git a/src/argument_parsing/search.cpp b/src/argument_parsing/search.cpp index 7be85dc1..6a56d5c5 100644 --- a/src/argument_parsing/search.cpp +++ b/src/argument_parsing/search.cpp @@ -341,6 +341,9 @@ void run_search(sharg::parser & parser) arguments.threshold_percentage = arguments.threshold / (double) (arguments.pattern_size - arguments.shape.size() + 1); arguments.fnr = error_profile.fnr; } + + if (arguments.window_size > arguments.shape_size) + arguments.search_type = search_kind::MINIMISER; } // ========================================== diff --git a/src/prepare/compute_bin_size.cpp b/src/prepare/compute_bin_size.cpp index 598802e8..404b7de5 100644 --- a/src/prepare/compute_bin_size.cpp +++ b/src/prepare/compute_bin_size.cpp @@ -8,6 +8,7 @@ */ #include +#include namespace raptor {