Skip to content

Commit

Permalink
Use shared pointers over underlying sequences and create copies of qu…
Browse files Browse the repository at this point in the history
…ery records
  • Loading branch information
eaasna committed Sep 7, 2023
1 parent 6c3146c commit f198265
Show file tree
Hide file tree
Showing 8 changed files with 87 additions and 238 deletions.
4 changes: 0 additions & 4 deletions include/valik/search/cart_query_io.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,6 @@ inline bool get_cart_queries(rec_vec_t const & records,
std::set<TId> uniqueIds; // set of short IDs (cut at first whitespace)
bool idsUnique = true;

std::cout << "\nget_cart_queries\n";

size_t seqCount{0};
for (auto & record : records)
{
Expand All @@ -46,8 +44,6 @@ inline bool get_cart_queries(rec_vec_t const & records,
seqan2::appendValue(ids, query_id, seqan2::Generous());
seqCount++;
idsUnique &= stellar::_checkUniqueId(uniqueIds, (seqan2::String<char>) record.sequence_id);
//!ERROR: infix sequence is copied to seqs invalidating the pointers
std::cout << std::addressof(seqs[seqan2::length(seqs) - 1]) << '\t' << seqs[seqan2::length(seqs) - 1] << '\n';
}

strOut << "Loaded " << seqCount << " query sequence" << ((seqCount > 1) ? "s " : " ") << "from cart." << std::endl;
Expand Down
266 changes: 77 additions & 189 deletions include/valik/search/iterate_queries.hpp

Large diffs are not rendered by default.

1 change: 0 additions & 1 deletion include/valik/search/local_prefilter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,6 @@ void local_prefilter(
for (query_t const & record : records)
{
std::vector<seqan3::dna4> const & seq = record.sequence;
// seqan3::debug_stream << "Prefiltering sequence: \n";

// sequence can't contain local match if it's shorter than pattern length
if (seq.size() < arguments.pattern_size)
Expand Down
13 changes: 4 additions & 9 deletions include/valik/search/prefilter_queries_parallel.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,13 @@ namespace valik::app
template <typename query_t, seqan3::data_layout ibf_data_layout>
inline void prefilter_queries_parallel(seqan3::interleaved_bloom_filter<ibf_data_layout> const & ibf,
search_arguments const & arguments,
std::vector<query_t> & records,
std::vector<query_t> const & records,
raptor::threshold::threshold const & thresholder,
cart_queue<query_t> & queue)
{
if (records.empty())
return;

std::vector<std::jthread> tasks;
size_t const num_records = records.size();
size_t const records_per_thread = num_records / arguments.threads;
Expand All @@ -40,14 +43,6 @@ inline void prefilter_queries_parallel(seqan3::interleaved_bloom_filter<ibf_data
{
for (size_t const bin : bin_hits)
{
/*
seqan3::debug_stream << "Cart insertion of sequence: " << '\n';
for (auto & n : record.sequence)
seqan3::debug_stream << n;
seqan3::debug_stream << '\n';
*/
queue.insert(bin, record);
}
};
Expand Down
34 changes: 2 additions & 32 deletions include/valik/search/search_local.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,21 +103,6 @@ bool search_local(search_arguments const & arguments, search_time_statistics & t
stellar::DatabaseIDMap<TAlphabet> databaseIDMap{databases, databaseIDs};
stellar::DatabaseIDMap<TAlphabet> reverseDatabaseIDMap{reverseDatabases, databaseIDs};

seqan2::StringSet<TSequence> underlyingQueries;
seqan2::StringSet<seqan2::CharString> underlyingQueryIDs;

TSize queryLen;
// import underlying query sequences (SPLIT)
stellar::stellar_runtime input_queries_time{};
bool const queriesSuccess = input_queries_time.measure_time([&]()
{
std::cout << "Finding all local alignments between two genomes...\n";
return stellar::_importAllSequences(arguments.query_file.c_str(), "query", underlyingQueries, underlyingQueryIDs, queryLen, std::cout, std::cerr);
});
if (!queriesSuccess)
return false;
time_statistics.reads_io_time += input_queries_time.milliseconds() / 1000;

bool error_in_search = false; // indicates if an error happened inside this lambda
auto consumerThreads = std::vector<std::jthread>{};
for (size_t threadNbr = 0; threadNbr < arguments.threads; ++threadNbr)
Expand Down Expand Up @@ -224,12 +209,6 @@ bool search_local(search_arguments const & arguments, search_time_statistics & t
thread_meta.text_out << std::endl; // swift filter output is on same line
stellar::_printDatabaseIdAndStellarKernelStatistics(threadOptions.verbose, databaseStrand, databaseID, statistics, thread_meta.text_out);

seqan3::debug_stream << "before consolidation\n";

for (auto & query_matches : forwardMatches)
seqan3::debug_stream << seqan2::length(query_matches.matches) << '\t';
seqan3::debug_stream << '\n';

stellarThreadTime.forward_strand_stellar_time.post_process_eps_matches_time.measure_time([&]()
{
// forwardMatches is an in-out parameter
Expand All @@ -238,12 +217,6 @@ bool search_local(search_arguments const & arguments, search_time_statistics & t
forwardMatches, disabledQueryIDs);
}); // measure_time

seqan3::debug_stream << "after consolidation\n";

for (auto & query_matches : forwardMatches)
seqan3::debug_stream << seqan2::length(query_matches.matches) << '\t';
seqan3::debug_stream << '\n';

// open output files
std::ofstream outputFile(threadOptions.outputFile.c_str(), ::std::ios_base::out);
if (!outputFile.is_open())
Expand Down Expand Up @@ -348,13 +321,10 @@ bool search_local(search_arguments const & arguments, search_time_statistics & t
});
}

std::vector<seqan2::Segment<seqan2::String<seqan2::Dna> const, seqan2::InfixSegment>> hostQueries;
std::vector<seqan2::CharString> hostQueryIDs;

if constexpr (is_split)
read_split_queries(underlyingQueries, underlyingQueryIDs, hostQueries, arguments, time_statistics, index.ibf(), queue, *query_segments);
iterate_split_queries(arguments, time_statistics, index.ibf(), queue, *query_segments);
else
read_short_queries(underlyingQueries, underlyingQueryIDs, hostQueries, arguments, time_statistics, index.ibf(), queue);
iterate_short_queries(arguments, time_statistics, index.ibf(), queue);

queue.finish(); // Flush carts that are not empty yet
consumerThreads.clear();
Expand Down
1 change: 1 addition & 0 deletions src/valik_search.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ void valik_search(search_arguments const & arguments)
{
search_time_statistics time_statistics{};

//!TODO: this switch doesn't work, make is_compressed template parameter
using index_structure_t = index_structure::ibf;
if (arguments.compressed)
using index_structure_t = index_structure::ibf_compressed;
Expand Down
4 changes: 2 additions & 2 deletions test/cli/dream_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ TEST_P(dream_search, shared_mem)
"--seg-meta", seg_meta_path);
EXPECT_EQ(result.exit_code, 0);
EXPECT_EQ(result.out, std::string{"Launching stellar search on a shared memory machine...\nLoaded 3 database sequences.\n"});
EXPECT_EQ(result.err, std::string{});
EXPECT_EQ(result.err, std::string{"WARNING: Non-unique query ids. Output can be ambiguous.\n"});

auto distributed = valik::read_stellar_output(search_result_path(number_of_bins, window_size, number_of_errors), reference, std::ios::binary);
auto local = valik::read_stellar_output("search.gff", reference);
Expand Down Expand Up @@ -92,7 +92,7 @@ TEST_F(dream_search, no_matches)
"--seg-meta", data("seg_meta150overlap" + std::to_string(number_of_bins) + "bins.txt"));
EXPECT_EQ(result.exit_code, 0);
EXPECT_EQ(result.out, std::string{"Launching stellar search on a shared memory machine...\nLoaded 3 database sequences.\n"});
EXPECT_EQ(result.err, std::string{});
EXPECT_EQ(result.err, std::string{}); // Stellar shortens query IDs until the first whitespace

auto actual = string_list_from_file("search.gff");

Expand Down

0 comments on commit f198265

Please sign in to comment.