From da25260aa01343730158bbd1d39fc4dc1a2b94f9 Mon Sep 17 00:00:00 2001 From: Evelin Aasna Date: Thu, 12 Dec 2024 14:01:13 +0100 Subject: [PATCH] Always distribute metagenome search (#135) --- include/valik/search/search_local.hpp | 8 +------- include/valik/shared.hpp | 1 - src/argument_parsing/search.cpp | 2 ++ test/cli/valik_options_test.cpp | 15 --------------- test/cli/valik_test.cpp | 1 - 5 files changed, 3 insertions(+), 24 deletions(-) diff --git a/include/valik/search/search_local.hpp b/include/valik/search/search_local.hpp index 49d308e7..f065d9a8 100644 --- a/include/valik/search/search_local.hpp +++ b/include/valik/search/search_local.hpp @@ -34,7 +34,6 @@ static inline dream_stellar::StellarOptions make_thread_options(search_arguments threadOptions.referenceLength = refLen; threadOptions.searchSegment = true; auto seg = ref_meta.segment_from_bin(bin_id); - //!TODO: deal with metagenome database threadOptions.binSequences.emplace_back(seg.seq_vec[0]); threadOptions.segmentBegin = seg.start; threadOptions.segmentEnd = seg.start + seg.len; @@ -66,10 +65,6 @@ static inline dream_stellar::StellarOptions make_thread_options(search_arguments template bool search_local(search_arguments & arguments, search_time_statistics & time_statistics) { - if (arguments.bin_path.size() > 1 || (arguments.bin_path.size() > 0 && arguments.bin_path[0].size() > 1)) - throw std::runtime_error("Multiple reference files can not be searched in shared memory mode. " - "Add --distribute argument to launch multiple distributed instances of DREAM-Stellar search."); - using index_structure_t = index_structure::ibf; auto index = valik_index{}; @@ -90,7 +85,7 @@ bool search_local(search_arguments & arguments, search_time_statistics & time_st auto prefilter_bin_count = ref_meta.seg_count; split_arguments stellar_dist_arguments; - // distribute stellar search + // stellar search without prefiltering // for some number of reference sequences split sequences into as many segments as is the next multiple of thread count if (ref_meta.seq_count % arguments.threads > 0) @@ -188,7 +183,6 @@ bool search_local(search_arguments & arguments, search_time_statistics & time_st bool const databasesSuccess = input_databases_time.measure_time([&]() { std::cout << "Launching stellar search on a shared memory machine...\n"; - //!TODO: allow metagenome database return dream_stellar::_importAllSequences(arguments.bin_path[0][0].c_str(), "database", databases, databaseIDs, refLen, std::cout, std::cerr); }); if (!databasesSuccess) diff --git a/include/valik/shared.hpp b/include/valik/shared.hpp index 4cb54666..8d0613b3 100644 --- a/include/valik/shared.hpp +++ b/include/valik/shared.hpp @@ -136,7 +136,6 @@ struct search_profile_arguments //!TODO: deduce this automatically bool split_query{false}; bool manual_parameters{false}; - //!TODO: make fourth option: MINIMISER search_kind search_type{search_kind::LEMMA}; double fnr; diff --git a/src/argument_parsing/search.cpp b/src/argument_parsing/search.cpp index 0eb58ba4..fdeaa469 100644 --- a/src/argument_parsing/search.cpp +++ b/src/argument_parsing/search.cpp @@ -275,6 +275,8 @@ void run_search(sharg::parser & parser) arguments.shape_weight = arguments.shape.count(); arguments.window_size = tmp.window_size(); arguments.bin_path = tmp.bin_path(); + if (arguments.bin_path.size() > 1) + arguments.distribute = true; } // ========================================== diff --git a/test/cli/valik_options_test.cpp b/test/cli/valik_options_test.cpp index 3652395f..64ca43f8 100644 --- a/test/cli/valik_options_test.cpp +++ b/test/cli/valik_options_test.cpp @@ -307,18 +307,3 @@ TEST_F(argparse_search, not_manual_no_meta) EXPECT_EQ(result.out, std::string{}); EXPECT_EQ(result.err, std::string{"[Error] Provide --ref-meta to deduce suitable search parameters or set --without-parameter-tuning and --pattern size.\n"}); } - -TEST_F(argparse_search, shared_mem_metagenome) -{ - cli_test_result const result = execute_app("valik", "search", - "--query ", data("query.fq"), - "--index ", data("8bins19window.ibf"), - "--output search.gff", - "--ref-meta ", data("150overlap4bins.bin"), - "--pattern 100", - "--without-parameter-tuning"); - EXPECT_NE(result.exit_code, 0); - EXPECT_EQ(result.out, std::string{}); - EXPECT_EQ(result.err, std::string{"[Error] Multiple reference files can not be searched in shared memory mode. " - "Add --distribute argument to launch multiple distributed instances of DREAM-Stellar search.\n"}); -} diff --git a/test/cli/valik_test.cpp b/test/cli/valik_test.cpp index 2de1f137..c090673d 100644 --- a/test/cli/valik_test.cpp +++ b/test/cli/valik_test.cpp @@ -345,7 +345,6 @@ TEST_P(valik_search_clusters, search) cli_test_result const result = execute_app("valik", "search", "--output search.gff", - "--distribute", "--pattern", std::to_string(pattern_size), "--query-every", std::to_string(query_every), "--error-rate ", std::to_string(error_rate),