Skip to content

Commit

Permalink
Try replace stellar matcher
Browse files Browse the repository at this point in the history
  • Loading branch information
eaasna committed Jul 3, 2024
1 parent 0cc3495 commit bb362e0
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 21 deletions.
37 changes: 19 additions & 18 deletions include/valik/search/iterate_queries.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,37 +55,38 @@ struct adaptor_traits : seqan3::sequence_file_input_default_traits_dna
* @param arguments Command line arguments.
* @param queue Shopping cart queue for sending queries over to Stellar search.
*/
template <typename TSequence>
template <typename sequence_t>
void iterate_all_queries(size_t const ref_seg_count,
search_arguments const & arguments,
cart_queue<shared_query_record<TSequence>> & queue)
{
seqan3::sequence_file_input<adaptor_traits> fin{std::istringstream{arguments.query_file}, seqan3::format_fasta{}};
using record_type = typename decltype(fin)::record_type;

using sequence_type = std::remove_cvref_t<decltype(std::declval<record_type>().sequence())>;
std::vector<sequence_type> rec_vec{};
for (auto & record : fin)
cart_queue<shared_query_record<sequence_t>> & queue)
{
/*
std::vector<sequence_type> seq_vec{};
for (auto & seq : rec_vec | std::views::transform([](record_type record) { return record.sequence(); }))
{
rec_vec.emplace_back(record.sequence());
seq_vec.emplace_back(seq);
}
jst::contrib::stellar_matcher<sequence_type> matcher(rec_vec, (double) arguments.error_rate, (unsigned) arguments.minLength);
jst::contrib::stellar_matcher<sequence_type> matcher(seq_vec, (double) arguments.error_rate, (unsigned) arguments.minLength);
*/

using TId = seqan2::CharString;
std::vector<shared_query_record<TSequence>> query_records{};
std::vector<shared_query_record<sequence_t>> query_records{};
constexpr uint64_t chunk_size = (1ULL << 20) * 10;

seqan2::SeqFileIn inSeqs;
if (!open(inSeqs, arguments.query_file.c_str()))
{
throw std::runtime_error("Failed to open " + arguments.query_file.string() + " file.");
}
seqan3::sequence_file_input<adaptor_traits> fin{std::istringstream{arguments.query_file}, seqan3::format_fasta{}};
using record_type = typename decltype(fin)::record_type;
using sequence_type = std::remove_cvref_t<decltype(std::declval<record_type>().sequence())>;

std::set<TId> uniqueIds; // set of short IDs (cut at first whitespace)
bool idsUnique = true;

size_t seqCount{0};
std::vector<record_type> rec_vec{};
for (auto & record : fin)
{
rec_vec.emplace_back(record);
}

for (; !atEnd(inSeqs); ++seqCount)
{
TSequence seq{};
Expand Down
2 changes: 1 addition & 1 deletion include/valik/search/query_record.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ template <typename TSequence>
struct shared_query_record
{
std::string sequence_id;
std::vector<seqan3::dna4> sequence;
std::vector<seqan2::alphabet_adaptor<seqan3::dna4>> sequence;
seqan2::Segment<TSequence const, seqan2::InfixSegment> querySegment;
std::shared_ptr<TSequence> underlyingData;

Expand Down
5 changes: 3 additions & 2 deletions include/valik/search/search_local.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -141,8 +141,9 @@ bool search_local(search_arguments & arguments, search_time_statistics & time_st
}
}

using TAlphabet = seqan2::Dna;
using TSequence = seqan2::String<TAlphabet>;
using TAlphabet = seqan2::alphabet_adaptor<seqan3::dna4>;
using TSequence = std::vector<TAlphabet>;

// the queue hands records over from the producer threads (valik prefiltering) to the consumer threads (stellar search)
auto queue = cart_queue<shared_query_record<TSequence>>{ref_meta.seg_count, arguments.cart_max_capacity, arguments.max_queued_carts};

Expand Down

0 comments on commit bb362e0

Please sign in to comment.