Skip to content

Commit

Permalink
Call Stellar search functions (#90)
Browse files Browse the repository at this point in the history
* Add stellar3 submodule

* Add seqan2 submodule

* Update lib/stellar3 (seqan2 namespace)

* Load Stellar3 database sequences

* Load database sequences from all reference files

* Add flag to toggle shared memory vs distributed (default) execution

* Each threads imports cart query sequences

* sharg::parser instead of seqan3::argument_parser

* Workaround for g++-10 template deduction

* [FIX] error handling; lambda with implicit void return type

* Environment variable parsing as struct

* Supress stellar3 warnings

* Overwrite (not append) stellar search output files

* Compile stellar diagnostics

* Stream stellar diagnostics

* Share databaseIDMap between threads

* Test data for DREAM-Stellar CLI tests

* [FIX] empty output when no valik matches found

* [TEST] DREAM-Stellar output shared memory vs distributed

* [FIX] always write out matches

* Compare DREAM-Stellar matches

* Launch Stellar3 search

* Calculate Stellar qGram length

* Refactor search time printing

* Pump stellar version

* Stellar search reverse strand

* Update CLI test suit after raptor_data_simulation update

* Delete stellar_call.hpp

* Apply suggestions from code review

* Pump lib/stellar3 version
  • Loading branch information
eaasna authored Jul 4, 2023
1 parent a36c697 commit 65b091d
Show file tree
Hide file tree
Showing 62 changed files with 7,060 additions and 1,381 deletions.
9 changes: 9 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,12 @@
[submodule "lib/raptor_data_simulation"]
path = lib/raptor_data_simulation
url = [email protected]:eaasna/raptor_data_simulation.git
[submodule "lib/stellar3"]
path = lib/stellar3
url = [email protected]:seqan/stellar3.git
[submodule "lib/seqan"]
path = lib/seqan
url = [email protected]:seqan/seqan.git
[submodule "lib/sharg"]
path = lib/sharg
url = https://github.com/seqan/sharg-parser.git
7 changes: 5 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,11 @@ list (APPEND CMAKE_MODULE_PATH "${SEQAN3_CLONE_DIR}/test/cmake/")
list (APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/test/cmake/")

# Use ccache.
include (seqan_require_ccache)
seqan_require_ccache ()
include (seqan3_require_ccache)
seqan3_require_ccache ()

# Dependency: Sharg.
find_package (Sharg REQUIRED PATHS lib/sharg/build_system)

# Add the application.
add_subdirectory (src)
Expand Down
4 changes: 2 additions & 2 deletions include/utilities/argument_parsing/consolidate.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
namespace valik::app
{

void init_consolidation_parser(seqan3::argument_parser & parser, consolidation_arguments & arguments);
void run_consolidate(seqan3::argument_parser & parser);
void init_consolidation_parser(sharg::parser & parser, consolidation_arguments & arguments);
void run_consolidate(sharg::parser & parser);

} // namespace valik::app
2 changes: 1 addition & 1 deletion include/utilities/consolidate/stellar_match.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ struct stellar_match
}
}

std::string to_string()
std::string to_string() const
{
std::string match_str = dname;
match_str += "\tStellar\teps-matches\t";
Expand Down
4 changes: 2 additions & 2 deletions include/valik/argument_parsing/build.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
namespace valik::app
{

void init_build_parser(seqan3::argument_parser & parser, build_arguments & arguments);
void run_build(seqan3::argument_parser & parser);
void init_build_parser(sharg::parser & parser, build_arguments & arguments);
void run_build(sharg::parser & parser);

} // namespace valik::app
4 changes: 2 additions & 2 deletions include/valik/argument_parsing/search.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
namespace valik::app
{

void init_search_parser(seqan3::argument_parser & parser, search_arguments & arguments);
void run_search(seqan3::argument_parser & parser);
void init_search_parser(sharg::parser & parser, search_arguments & arguments);
void run_search(sharg::parser & parser);

} // namespace valik::app
17 changes: 2 additions & 15 deletions include/valik/argument_parsing/shared.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,20 +6,7 @@
namespace valik::app
{

void init_shared_meta(seqan3::argument_parser & parser);
void try_parsing(seqan3::argument_parser & parser);

template <typename arguments_t>
void init_shared_options(seqan3::argument_parser & parser, arguments_t & arguments)
{
static_assert(std::same_as<arguments_t, build_arguments> || std::same_as<arguments_t, search_arguments>);

parser.add_option(arguments.threads,
'\0',
"threads",
"Choose the number of threads.",
seqan3::option_spec::standard,
positive_integer_validator{});
}
void init_shared_meta(sharg::parser & parser);
void try_parsing(sharg::parser & parser);

} // namespace valik::app
4 changes: 2 additions & 2 deletions include/valik/argument_parsing/split.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
namespace valik::app
{

void init_split_parser(seqan3::argument_parser & parser, split_arguments & arguments);
void run_split(seqan3::argument_parser & parser);
void init_split_parser(sharg::parser & parser, split_arguments & arguments);
void run_split(sharg::parser & parser);

} // namespace valik::app
2 changes: 1 addition & 1 deletion include/valik/argument_parsing/top_level.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,6 @@
namespace valik::app
{

void init_top_level_parser(seqan3::argument_parser & parser);
void init_top_level_parser(sharg::parser & parser);

} // namespace valik::app
18 changes: 9 additions & 9 deletions include/valik/argument_parsing/validators.hpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#pragma once

#include <seqan3/argument_parser/all.hpp>
#include <sharg/all.hpp>
#include <seqan3/io/sequence_file/input.hpp>

namespace valik::app
Expand All @@ -13,7 +13,7 @@ struct power_of_two_validator
void operator() (option_value_type const & val) const
{
if (!std::has_single_bit(val))
throw seqan3::validation_error{"The value must be a power of two."};
throw sharg::validation_error{"The value must be a power of two."};
}

static std::string get_help_page_message ()
Expand All @@ -39,7 +39,7 @@ class positive_integer_validator
void operator()(option_value_type const & val) const
{
if (!is_zero_positive && !val)
throw seqan3::validation_error{"The value must be a positive integer."};
throw sharg::validation_error{"The value must be a positive integer."};
}

std::string get_help_page_message () const
Expand Down Expand Up @@ -71,7 +71,7 @@ class size_validator
void operator()(option_value_type const & cmp) const
{
if (!std::regex_match(cmp, expression))
throw seqan3::validation_error{seqan3::detail::to_string("Value ", cmp, " must be an integer followed by [k,m,g,t] (case insensitive).")};
throw sharg::validation_error{seqan3::detail::to_string("Value ", cmp, " must be an integer followed by [k,m,g,t] (case insensitive).")};
}

template <std::ranges::forward_range range_type>
Expand Down Expand Up @@ -105,15 +105,15 @@ class bin_validator
void operator() (option_value_type const & values) const
{
if (values.empty())
throw seqan3::validation_error{"The list of input files cannot be empty."};
throw sharg::validation_error{"The list of input files cannot be empty."};

for (auto && value : values)
{
try
{
sequence_file_validator(value);
}
catch (seqan3::validation_error const & exception)
catch (sharg::validation_error const & exception)
{
if (value.extension() == ".minimiser")
minimiser_file_validator(value);
Expand Down Expand Up @@ -142,7 +142,7 @@ class bin_validator

for (auto && value : values)
if (is_minimiser_input != (value.extension() == ".minimiser"))
throw seqan3::validation_error{"You cannot mix sequence and minimiser files as input."};
throw sharg::validation_error{"You cannot mix sequence and minimiser files as input."};
}

std::string get_help_page_message() const
Expand Down Expand Up @@ -184,10 +184,10 @@ class bin_validator
}
return result;
}()};
seqan3::input_file_validator<> minimiser_file_validator{{"minimiser"}};
sharg::input_file_validator minimiser_file_validator{{"minimiser"}};

public:
seqan3::input_file_validator<> sequence_file_validator{{combined_extensions}};
sharg::input_file_validator sequence_file_validator{{combined_extensions}};
};

} // namespace valik::app
12 changes: 6 additions & 6 deletions include/valik/index.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

#pragma once

#include <seqan3/argument_parser/exceptions.hpp>
#include <sharg/exceptions.hpp>
#include <seqan3/search/dream_index/interleaved_bloom_filter.hpp>

#include <../lib/raptor/include/raptor/hierarchical_interleaved_bloom_filter.hpp>
Expand Down Expand Up @@ -155,19 +155,19 @@ class valik_index
if ((data_layout_mode == seqan3::data_layout::compressed && !compressed_) ||
(data_layout_mode == seqan3::data_layout::uncompressed && compressed_))
{
throw seqan3::argument_parser_error{"Data layouts of serialised and specified index differ."};
throw sharg::validation_error{"Data layouts of serialised and specified index differ."};
}
archive(bin_path_);
archive(ibf_);
}
catch (std::exception const & e)
{
throw seqan3::argument_parser_error{"Cannot read index: " + std::string{e.what()}};
throw sharg::validation_error{"Cannot read index: " + std::string{e.what()}};
}
}
else
{
throw seqan3::argument_parser_error{"Unsupported index version. Check valik upgrade."}; // GCOVR_EXCL_LINE
throw sharg::validation_error{"Unsupported index version. Check valik upgrade."}; // GCOVR_EXCL_LINE
}
}

Expand Down Expand Up @@ -195,13 +195,13 @@ class valik_index
// GCOVR_EXCL_START
catch (std::exception const & e)
{
throw seqan3::argument_parser_error{"Cannot read index: " + std::string{e.what()}};
throw sharg::validation_error{"Cannot read index: " + std::string{e.what()}};
}
// GCOVR_EXCL_STOP
}
else
{
throw seqan3::argument_parser_error{"Unsupported index version. Check valik upgrade."}; // GCOVR_EXCL_LINE
throw sharg::validation_error{"Unsupported index version. Check valik upgrade."}; // GCOVR_EXCL_LINE
}
}
//!\endcond
Expand Down
71 changes: 71 additions & 0 deletions include/valik/search/env_var_pack.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
#pragma once

#include <cstdlib>
#include <unistd.h>
#include <filesystem>
#include <vector>
#include <algorithm>

namespace valik
{

struct env_var_pack
{
std::filesystem::path tmp_path;
std::string stellar_exec{"stellar"};
std::string merge_exec{"cat"};

env_var_pack()
{
// the location of bin-query fasta files can be overwritten with an environment variable
if (auto ptr = std::getenv("VALIK_TMP"); ptr != nullptr)
{
tmp_path = std::string(ptr);
std::filesystem::file_status s = status(tmp_path);
std::filesystem::perms p = s.permissions();
bool is_writable = (std::filesystem::perms::none != (p & std::filesystem::perms::owner_write)) |
(std::filesystem::perms::none != (p & std::filesystem::perms::group_write)) |
(std::filesystem::perms::none != (p & std::filesystem::perms::others_write));

if (!exists(tmp_path) | !is_directory(s) | !is_writable )
throw std::runtime_error("Directory $VALIK_TMP=" + std::string(ptr) + " must exist and write permission must be granted");
}

else
tmp_path = create_temporary_path("valik/stellar_call_XXXXXX");

if (auto ptr = std::getenv("VALIK_STELLAR"); ptr != nullptr)
stellar_exec = std::string(ptr);

if (auto ptr = std::getenv("VALIK_MERGE"); ptr != nullptr)
merge_exec = std::string(ptr);
}

/* Creates a temporary folder in the temporary path of the OS
*
* \param name: a name with 'XXXXXX' at the end, e.g.: valik/call_XXXXXX
* \return returns the name with the 'XXXXXX' replaced and the directory created
*
* throws if any errors occurs
*/
static std::filesystem::path create_temporary_path(std::filesystem::path name)
{
if (!name.is_relative())
{
throw std::runtime_error("Must be given a relative file");
}
auto path = std::filesystem::temp_directory_path() / name;
auto path_str = path.native();
create_directories(path.parent_path());
auto str = std::vector<char>(path_str.size()+1, '\0'); // Must include an extra character to include a 0
std::copy_n(path_str.data(), path_str.size(), str.data());
auto ptr = mkdtemp(str.data());
if (!ptr)
{
throw std::runtime_error("Could not create temporary folder: " + path_str);
}
return str.data();
}
};

}
19 changes: 11 additions & 8 deletions include/valik/search/search_time_statistics.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,22 +35,25 @@ struct search_time_statistics
}
};

inline void write_time_statistics(search_time_statistics const & time_statistics, search_arguments const & arguments)
inline void write_time_statistics(search_time_statistics const & time_statistics, std::string const & time_file)
{
std::filesystem::path file_path{arguments.out_file};
file_path += ".time";
std::filesystem::path file_path{time_file};
std::ofstream file_handle(file_path, std::ofstream::app);

file_handle << "IBF I/O\tReads I/O\tPrefilter\tMin cart time\tAvg cart time\tMax cart time\tNr carts\n";
file_handle << std::fixed
<< std::setprecision(2)
<< time_statistics.index_io_time << '\t'
<< time_statistics.reads_io_time << '\t'
<< time_statistics.prefilter_time << '\t'
<< time_statistics.get_cart_min() << '\t'
<< time_statistics.get_cart_avg() << '\t'
<< time_statistics.get_cart_max() << '\t'
<< time_statistics.cart_processing_times.size() << '\n';
<< time_statistics.prefilter_time << '\t';
if (!time_statistics.cart_processing_times.empty())
{
file_handle << time_statistics.get_cart_min() << '\t'
<< time_statistics.get_cart_avg() << '\t'
<< time_statistics.get_cart_max() << '\t'
<< time_statistics.cart_processing_times.size() << '\n';

}

}

Expand Down
Loading

0 comments on commit 65b091d

Please sign in to comment.