diff --git a/.gitmodules b/.gitmodules index aa4c926d..bf13091e 100644 --- a/.gitmodules +++ b/.gitmodules @@ -11,3 +11,12 @@ [submodule "lib/raptor_data_simulation"] path = lib/raptor_data_simulation url = git@github.com:eaasna/raptor_data_simulation.git +[submodule "lib/stellar3"] + path = lib/stellar3 + url = git@github.com:seqan/stellar3.git +[submodule "lib/seqan"] + path = lib/seqan + url = git@github.com:seqan/seqan.git +[submodule "lib/sharg"] + path = lib/sharg + url = https://github.com/seqan/sharg-parser.git diff --git a/CMakeLists.txt b/CMakeLists.txt index 8c2d3eae..c3934893 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -35,8 +35,11 @@ list (APPEND CMAKE_MODULE_PATH "${SEQAN3_CLONE_DIR}/test/cmake/") list (APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/test/cmake/") # Use ccache. -include (seqan_require_ccache) -seqan_require_ccache () +include (seqan3_require_ccache) +seqan3_require_ccache () + +# Dependency: Sharg. +find_package (Sharg REQUIRED PATHS lib/sharg/build_system) # Add the application. add_subdirectory (src) diff --git a/include/utilities/argument_parsing/consolidate.hpp b/include/utilities/argument_parsing/consolidate.hpp index 585bda78..0c74d49e 100644 --- a/include/utilities/argument_parsing/consolidate.hpp +++ b/include/utilities/argument_parsing/consolidate.hpp @@ -6,7 +6,7 @@ namespace valik::app { -void init_consolidation_parser(seqan3::argument_parser & parser, consolidation_arguments & arguments); -void run_consolidate(seqan3::argument_parser & parser); +void init_consolidation_parser(sharg::parser & parser, consolidation_arguments & arguments); +void run_consolidate(sharg::parser & parser); } // namespace valik::app diff --git a/include/utilities/consolidate/stellar_match.hpp b/include/utilities/consolidate/stellar_match.hpp index 6c453f03..7f7fdec7 100644 --- a/include/utilities/consolidate/stellar_match.hpp +++ b/include/utilities/consolidate/stellar_match.hpp @@ -64,7 +64,7 @@ struct stellar_match } } - std::string to_string() + std::string to_string() const { std::string match_str = dname; match_str += "\tStellar\teps-matches\t"; diff --git a/include/valik/argument_parsing/build.hpp b/include/valik/argument_parsing/build.hpp index 6df5a87b..9a363844 100644 --- a/include/valik/argument_parsing/build.hpp +++ b/include/valik/argument_parsing/build.hpp @@ -5,7 +5,7 @@ namespace valik::app { -void init_build_parser(seqan3::argument_parser & parser, build_arguments & arguments); -void run_build(seqan3::argument_parser & parser); +void init_build_parser(sharg::parser & parser, build_arguments & arguments); +void run_build(sharg::parser & parser); } // namespace valik::app diff --git a/include/valik/argument_parsing/search.hpp b/include/valik/argument_parsing/search.hpp index 916e3833..38ddbee2 100644 --- a/include/valik/argument_parsing/search.hpp +++ b/include/valik/argument_parsing/search.hpp @@ -5,7 +5,7 @@ namespace valik::app { -void init_search_parser(seqan3::argument_parser & parser, search_arguments & arguments); -void run_search(seqan3::argument_parser & parser); +void init_search_parser(sharg::parser & parser, search_arguments & arguments); +void run_search(sharg::parser & parser); } // namespace valik::app diff --git a/include/valik/argument_parsing/shared.hpp b/include/valik/argument_parsing/shared.hpp index 3427129e..bb10ebd7 100644 --- a/include/valik/argument_parsing/shared.hpp +++ b/include/valik/argument_parsing/shared.hpp @@ -6,20 +6,7 @@ namespace valik::app { -void init_shared_meta(seqan3::argument_parser & parser); -void try_parsing(seqan3::argument_parser & parser); - -template -void init_shared_options(seqan3::argument_parser & parser, arguments_t & arguments) -{ - static_assert(std::same_as || std::same_as); - - parser.add_option(arguments.threads, - '\0', - "threads", - "Choose the number of threads.", - seqan3::option_spec::standard, - positive_integer_validator{}); -} +void init_shared_meta(sharg::parser & parser); +void try_parsing(sharg::parser & parser); } // namespace valik::app diff --git a/include/valik/argument_parsing/split.hpp b/include/valik/argument_parsing/split.hpp index 17dedd53..393936d8 100644 --- a/include/valik/argument_parsing/split.hpp +++ b/include/valik/argument_parsing/split.hpp @@ -5,7 +5,7 @@ namespace valik::app { -void init_split_parser(seqan3::argument_parser & parser, split_arguments & arguments); -void run_split(seqan3::argument_parser & parser); +void init_split_parser(sharg::parser & parser, split_arguments & arguments); +void run_split(sharg::parser & parser); } // namespace valik::app diff --git a/include/valik/argument_parsing/top_level.hpp b/include/valik/argument_parsing/top_level.hpp index eb4766e6..b0937486 100644 --- a/include/valik/argument_parsing/top_level.hpp +++ b/include/valik/argument_parsing/top_level.hpp @@ -5,6 +5,6 @@ namespace valik::app { -void init_top_level_parser(seqan3::argument_parser & parser); +void init_top_level_parser(sharg::parser & parser); } // namespace valik::app diff --git a/include/valik/argument_parsing/validators.hpp b/include/valik/argument_parsing/validators.hpp index e3bfdb07..df593c46 100644 --- a/include/valik/argument_parsing/validators.hpp +++ b/include/valik/argument_parsing/validators.hpp @@ -1,6 +1,6 @@ #pragma once -#include +#include #include namespace valik::app @@ -13,7 +13,7 @@ struct power_of_two_validator void operator() (option_value_type const & val) const { if (!std::has_single_bit(val)) - throw seqan3::validation_error{"The value must be a power of two."}; + throw sharg::validation_error{"The value must be a power of two."}; } static std::string get_help_page_message () @@ -39,7 +39,7 @@ class positive_integer_validator void operator()(option_value_type const & val) const { if (!is_zero_positive && !val) - throw seqan3::validation_error{"The value must be a positive integer."}; + throw sharg::validation_error{"The value must be a positive integer."}; } std::string get_help_page_message () const @@ -71,7 +71,7 @@ class size_validator void operator()(option_value_type const & cmp) const { if (!std::regex_match(cmp, expression)) - throw seqan3::validation_error{seqan3::detail::to_string("Value ", cmp, " must be an integer followed by [k,m,g,t] (case insensitive).")}; + throw sharg::validation_error{seqan3::detail::to_string("Value ", cmp, " must be an integer followed by [k,m,g,t] (case insensitive).")}; } template @@ -105,7 +105,7 @@ class bin_validator void operator() (option_value_type const & values) const { if (values.empty()) - throw seqan3::validation_error{"The list of input files cannot be empty."}; + throw sharg::validation_error{"The list of input files cannot be empty."}; for (auto && value : values) { @@ -113,7 +113,7 @@ class bin_validator { sequence_file_validator(value); } - catch (seqan3::validation_error const & exception) + catch (sharg::validation_error const & exception) { if (value.extension() == ".minimiser") minimiser_file_validator(value); @@ -142,7 +142,7 @@ class bin_validator for (auto && value : values) if (is_minimiser_input != (value.extension() == ".minimiser")) - throw seqan3::validation_error{"You cannot mix sequence and minimiser files as input."}; + throw sharg::validation_error{"You cannot mix sequence and minimiser files as input."}; } std::string get_help_page_message() const @@ -184,10 +184,10 @@ class bin_validator } return result; }()}; - seqan3::input_file_validator<> minimiser_file_validator{{"minimiser"}}; + sharg::input_file_validator minimiser_file_validator{{"minimiser"}}; public: - seqan3::input_file_validator<> sequence_file_validator{{combined_extensions}}; + sharg::input_file_validator sequence_file_validator{{combined_extensions}}; }; } // namespace valik::app diff --git a/include/valik/index.hpp b/include/valik/index.hpp index 1b55a7f3..58066e5c 100644 --- a/include/valik/index.hpp +++ b/include/valik/index.hpp @@ -7,7 +7,7 @@ #pragma once -#include +#include #include #include <../lib/raptor/include/raptor/hierarchical_interleaved_bloom_filter.hpp> @@ -155,19 +155,19 @@ class valik_index if ((data_layout_mode == seqan3::data_layout::compressed && !compressed_) || (data_layout_mode == seqan3::data_layout::uncompressed && compressed_)) { - throw seqan3::argument_parser_error{"Data layouts of serialised and specified index differ."}; + throw sharg::validation_error{"Data layouts of serialised and specified index differ."}; } archive(bin_path_); archive(ibf_); } catch (std::exception const & e) { - throw seqan3::argument_parser_error{"Cannot read index: " + std::string{e.what()}}; + throw sharg::validation_error{"Cannot read index: " + std::string{e.what()}}; } } else { - throw seqan3::argument_parser_error{"Unsupported index version. Check valik upgrade."}; // GCOVR_EXCL_LINE + throw sharg::validation_error{"Unsupported index version. Check valik upgrade."}; // GCOVR_EXCL_LINE } } @@ -195,13 +195,13 @@ class valik_index // GCOVR_EXCL_START catch (std::exception const & e) { - throw seqan3::argument_parser_error{"Cannot read index: " + std::string{e.what()}}; + throw sharg::validation_error{"Cannot read index: " + std::string{e.what()}}; } // GCOVR_EXCL_STOP } else { - throw seqan3::argument_parser_error{"Unsupported index version. Check valik upgrade."}; // GCOVR_EXCL_LINE + throw sharg::validation_error{"Unsupported index version. Check valik upgrade."}; // GCOVR_EXCL_LINE } } //!\endcond diff --git a/include/valik/search/env_var_pack.hpp b/include/valik/search/env_var_pack.hpp new file mode 100644 index 00000000..1b3f5752 --- /dev/null +++ b/include/valik/search/env_var_pack.hpp @@ -0,0 +1,71 @@ +#pragma once + +#include +#include +#include +#include +#include + +namespace valik +{ + +struct env_var_pack +{ + std::filesystem::path tmp_path; + std::string stellar_exec{"stellar"}; + std::string merge_exec{"cat"}; + + env_var_pack() + { + // the location of bin-query fasta files can be overwritten with an environment variable + if (auto ptr = std::getenv("VALIK_TMP"); ptr != nullptr) + { + tmp_path = std::string(ptr); + std::filesystem::file_status s = status(tmp_path); + std::filesystem::perms p = s.permissions(); + bool is_writable = (std::filesystem::perms::none != (p & std::filesystem::perms::owner_write)) | + (std::filesystem::perms::none != (p & std::filesystem::perms::group_write)) | + (std::filesystem::perms::none != (p & std::filesystem::perms::others_write)); + + if (!exists(tmp_path) | !is_directory(s) | !is_writable ) + throw std::runtime_error("Directory $VALIK_TMP=" + std::string(ptr) + " must exist and write permission must be granted"); + } + + else + tmp_path = create_temporary_path("valik/stellar_call_XXXXXX"); + + if (auto ptr = std::getenv("VALIK_STELLAR"); ptr != nullptr) + stellar_exec = std::string(ptr); + + if (auto ptr = std::getenv("VALIK_MERGE"); ptr != nullptr) + merge_exec = std::string(ptr); + } + + /* Creates a temporary folder in the temporary path of the OS + * + * \param name: a name with 'XXXXXX' at the end, e.g.: valik/call_XXXXXX + * \return returns the name with the 'XXXXXX' replaced and the directory created + * + * throws if any errors occurs + */ + static std::filesystem::path create_temporary_path(std::filesystem::path name) + { + if (!name.is_relative()) + { + throw std::runtime_error("Must be given a relative file"); + } + auto path = std::filesystem::temp_directory_path() / name; + auto path_str = path.native(); + create_directories(path.parent_path()); + auto str = std::vector(path_str.size()+1, '\0'); // Must include an extra character to include a 0 + std::copy_n(path_str.data(), path_str.size(), str.data()); + auto ptr = mkdtemp(str.data()); + if (!ptr) + { + throw std::runtime_error("Could not create temporary folder: " + path_str); + } + return str.data(); + } +}; + +} diff --git a/include/valik/search/search_time_statistics.hpp b/include/valik/search/search_time_statistics.hpp index 7a7f13a8..98f7edf8 100644 --- a/include/valik/search/search_time_statistics.hpp +++ b/include/valik/search/search_time_statistics.hpp @@ -35,10 +35,9 @@ struct search_time_statistics } }; -inline void write_time_statistics(search_time_statistics const & time_statistics, search_arguments const & arguments) +inline void write_time_statistics(search_time_statistics const & time_statistics, std::string const & time_file) { - std::filesystem::path file_path{arguments.out_file}; - file_path += ".time"; + std::filesystem::path file_path{time_file}; std::ofstream file_handle(file_path, std::ofstream::app); file_handle << "IBF I/O\tReads I/O\tPrefilter\tMin cart time\tAvg cart time\tMax cart time\tNr carts\n"; @@ -46,11 +45,15 @@ inline void write_time_statistics(search_time_statistics const & time_statistics << std::setprecision(2) << time_statistics.index_io_time << '\t' << time_statistics.reads_io_time << '\t' - << time_statistics.prefilter_time << '\t' - << time_statistics.get_cart_min() << '\t' - << time_statistics.get_cart_avg() << '\t' - << time_statistics.get_cart_max() << '\t' - << time_statistics.cart_processing_times.size() << '\n'; + << time_statistics.prefilter_time << '\t'; + if (!time_statistics.cart_processing_times.empty()) + { + file_handle << time_statistics.get_cart_min() << '\t' + << time_statistics.get_cart_avg() << '\t' + << time_statistics.get_cart_max() << '\t' + << time_statistics.cart_processing_times.size() << '\n'; + + } } diff --git a/include/valik/shared.hpp b/include/valik/shared.hpp index 8656feac..1976a0f8 100644 --- a/include/valik/shared.hpp +++ b/include/valik/shared.hpp @@ -8,6 +8,8 @@ #include +#include + namespace valik { @@ -65,8 +67,39 @@ struct build_arguments std::filesystem::path ref_meta_path{}; }; -struct search_arguments +struct minimiser_threshold_arguments +{ + virtual ~minimiser_threshold_arguments() = 0; // make an abstract base struct + + double tau{0.9999}; + double threshold{std::numeric_limits::quiet_NaN()}; + double p_max{0.15}; + double fpr{0.05}; + uint8_t errors{0}; + size_t pattern_size{}; + bool treshold_was_set{false}; + bool cache_thresholds{false}; + + protected: + // prevent creating, assigning or moving base struct instances + minimiser_threshold_arguments() = default; + minimiser_threshold_arguments(minimiser_threshold_arguments const&) = default; + minimiser_threshold_arguments(minimiser_threshold_arguments&&) = default; + minimiser_threshold_arguments& operator=(minimiser_threshold_arguments const&) = default; + minimiser_threshold_arguments& operator=(minimiser_threshold_arguments&&) = default; +}; + +inline minimiser_threshold_arguments::~minimiser_threshold_arguments() = default; + +struct search_arguments final : public minimiser_threshold_arguments { + ~search_arguments() override = default; + search_arguments() = default; + search_arguments(search_arguments const&) = default; + search_arguments(search_arguments&&) = default; + search_arguments& operator=(search_arguments const&) = default; + search_arguments& operator=(search_arguments&&) = default; + uint32_t window_size{23u}; seqan3::shape shape{seqan3::ungapped{20u}}; uint8_t shape_size{shape.size()}; @@ -80,16 +113,6 @@ struct search_arguments std::filesystem::path index_file{}; std::filesystem::path out_file{"search.gff"}; - // Related to thresholding - double tau{0.9999}; - double threshold{std::numeric_limits::quiet_NaN()}; - double p_max{0.15}; - double fpr{0.05}; - uint8_t errors{0}; - uint64_t pattern_size{}; - bool treshold_was_set{false}; - bool cache_thresholds{false}; - bool compressed{false}; bool write_time{false}; @@ -115,6 +138,8 @@ struct search_arguments std::filesystem::path seg_path{}; std::filesystem::path ref_meta_path{}; + bool shared_memory{false}; + }; } // namespace valik diff --git a/include/valik/valik.hpp b/include/valik/valik.hpp index 78e2b569..2090755b 100644 --- a/include/valik/valik.hpp +++ b/include/valik/valik.hpp @@ -1,17 +1,17 @@ #pragma once -#include +#include #include namespace valik::app { -void try_parsing(seqan3::argument_parser & parser); -void init_top_level_parser(seqan3::argument_parser & parser); -void run_split(seqan3::argument_parser & parser); -void run_build(seqan3::argument_parser & parser); -void run_search(seqan3::argument_parser & parser); -void run_consolidation(seqan3::argument_parser & parser); +void try_parsing(sharg::parser & parser); +void init_top_level_parser(sharg::parser & parser); +void run_split(sharg::parser & parser); +void run_build(sharg::parser & parser); +void run_search(sharg::parser & parser); +void run_consolidation(sharg::parser & parser); } // namespace valik::app diff --git a/lib/raptor_data_simulation b/lib/raptor_data_simulation index 1c0b309b..768f2e84 160000 --- a/lib/raptor_data_simulation +++ b/lib/raptor_data_simulation @@ -1 +1 @@ -Subproject commit 1c0b309bc485bc995fe71e07af070ce39126fc6e +Subproject commit 768f2e84f41d6fb348544d68006b661a6abc2120 diff --git a/lib/seqan b/lib/seqan new file mode 160000 index 00000000..8ce355dd --- /dev/null +++ b/lib/seqan @@ -0,0 +1 @@ +Subproject commit 8ce355dd960bbf7a5fa0292b49f7342f7e456da6 diff --git a/lib/sharg b/lib/sharg new file mode 160000 index 00000000..a59e3682 --- /dev/null +++ b/lib/sharg @@ -0,0 +1 @@ +Subproject commit a59e3682e8976f44f693f9f736b3f7b2761b9248 diff --git a/lib/stellar3 b/lib/stellar3 new file mode 160000 index 00000000..35c7f5dc --- /dev/null +++ b/lib/stellar3 @@ -0,0 +1 @@ +Subproject commit 35c7f5dc821e9f1ed8970b64e9daf08d9568e4fe diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index bd7f9c4c..941d4835 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -6,8 +6,11 @@ cmake_minimum_required (VERSION 3.16) # Shared interface_ add_library ("${PROJECT_NAME}_interface" INTERFACE) target_link_libraries ("${PROJECT_NAME}_interface" INTERFACE seqan3::seqan3) +target_link_libraries ("${PROJECT_NAME}_interface" INTERFACE sharg::sharg) target_include_directories ("${PROJECT_NAME}_interface" INTERFACE ../include) target_include_directories ("${PROJECT_NAME}_interface" INTERFACE ../lib/raptor/include) +target_include_directories ("${PROJECT_NAME}_interface" SYSTEM INTERFACE ../lib/stellar3/include) +target_include_directories ("${PROJECT_NAME}_interface" INTERFACE ../lib/seqan/include) target_include_directories ("${PROJECT_NAME}_interface" INTERFACE ../lib/robin-hood-hashing/src/include) target_compile_options ("${PROJECT_NAME}_interface" INTERFACE "-pedantic" "-Wall" "-Wextra") @@ -29,6 +32,7 @@ add_library ("raptor_threshold" STATIC ../lib/raptor/src/threshold/precompute_threshold.cpp ) target_link_libraries ("raptor_threshold" PUBLIC "${PROJECT_NAME}_interface") + add_library ("${PROJECT_NAME}_search_lib" STATIC valik_search.cpp) target_link_libraries ("${PROJECT_NAME}_search_lib" PUBLIC "raptor_threshold") diff --git a/src/argument_parsing/build.cpp b/src/argument_parsing/build.cpp index 2bf26eb3..de75f374 100644 --- a/src/argument_parsing/build.cpp +++ b/src/argument_parsing/build.cpp @@ -6,67 +6,66 @@ namespace valik::app { -void init_build_parser(seqan3::argument_parser & parser, build_arguments & arguments) +void init_build_parser(sharg::parser & parser, build_arguments & arguments) { init_shared_meta(parser); - init_shared_options(parser, arguments); parser.add_positional_option(arguments.bin_file, - "File containing one file per line per bin when building from clustered sequences. " - "Input sequence file when building from overlapping segments.", - seqan3::input_file_validator{}); + sharg::config{.description = "File containing one file per line per bin when building from clustered sequences. " + "Input sequence file when building from overlapping segments.", + .validator = sharg::input_file_validator{}}); parser.add_option(arguments.window_size, - '\0', - "window", - "Choose the window size.", - seqan3::option_spec::standard, - positive_integer_validator{}); + sharg::config{.short_id = '\0', + .long_id = "window", + .description = "Choose the window size.", + .validator = positive_integer_validator{}}); parser.add_option(arguments.kmer_size, - '\0', - "kmer", - "Choose the kmer size.", - seqan3::option_spec::standard, - seqan3::arithmetic_range_validator{1, 32}); + sharg::config{.short_id = '\0', + .long_id = "kmer", + .description = "Choose the kmer size.", + .validator = sharg::arithmetic_range_validator{1, 32}}); parser.add_option(arguments.out_path, - '\0', - "output", - "Provide an output filepath.", - seqan3::option_spec::required); + sharg::config{.short_id = '\0', + .long_id = "output", + .description = "Provide an output filepath.", + .required = true, + .validator = sharg::output_file_validator{sharg::output_file_open_options::open_or_create, {}}}); parser.add_option(arguments.size, - '\0', - "size", - "Choose the size of the resulting IBF.", - seqan3::option_spec::required, - size_validator{"\\d+\\s{0,1}[k,m,g,t,K,M,G,T]"}); + sharg::config{.short_id = '\0', + .long_id = "size", + .description = "Choose the size of the resulting IBF.", + .required = true, + .validator = size_validator{"\\d+\\s{0,1}[k,m,g,t,K,M,G,T]"}}); parser.add_option(arguments.hash, - '\0', - "hash", - "Choose the number of hashes.", - seqan3::option_spec::standard, - seqan3::arithmetic_range_validator{1, 5}); + sharg::config{.short_id = '\0', + .long_id = "hash", + .description = "Choose the number of hashes.", + .validator = sharg::arithmetic_range_validator{1, 5}}); parser.add_flag(arguments.compressed, - '\0', - "compressed", - "Build a compressed IBF."); + sharg::config{.short_id = '\0', + .long_id = "compressed", + .description = "Build a compressed IBF."}); parser.add_flag(arguments.from_segments, - '\0', - "from-segments", - "Creates IBF from split reference database instead of reference clusters.", - seqan3::option_spec::standard); + sharg::config{.short_id = '\0', + .long_id = "from-segments", + .description = "Creates IBF from split reference database instead of reference clusters."}); parser.add_option(arguments.seg_path, - '\0', - "seg-meta", - "Path to segment metadata file created by split.", - seqan3::option_spec::standard, - seqan3::input_file_validator{}); + sharg::config{.short_id = '\0', + .long_id = "seg-meta", + .description = "Path to segment metadata file created by split.", + .validator = sharg::input_file_validator{}}); parser.add_option(arguments.ref_meta_path, - '\0', - "ref-meta", - "Path to reference metadata file created by split.", - seqan3::option_spec::standard, - seqan3::input_file_validator{}); + sharg::config{.short_id = '\0', + .long_id = "ref-meta", + .description = "Path to reference metadata file created by split.", + .validator = sharg::input_file_validator{}}); + parser.add_option(arguments.threads, + sharg::config{.short_id = '\0', + .long_id = "threads", + .description = "Choose the number of threads.", + .validator = positive_integer_validator{}}); } -void run_build(seqan3::argument_parser & parser) +void run_build(sharg::parser & parser) { build_arguments arguments{}; init_build_parser(parser, arguments); @@ -114,16 +113,16 @@ void run_build(seqan3::argument_parser & parser) if (parser.is_option_set("window")) { if (arguments.kmer_size > arguments.window_size) - throw seqan3::argument_parser_error{"The k-mer size cannot be bigger than the window size."}; + throw sharg::parser_error{"The k-mer size cannot be bigger than the window size."}; } else arguments.window_size = arguments.shape.size(); try { - seqan3::output_file_validator{seqan3::output_file_open_options::open_or_create}(arguments.out_path); + sharg::output_file_validator{sharg::output_file_open_options::open_or_create}(arguments.out_path); } - catch (seqan3::argument_parser_error const & ext) + catch (sharg::parser_error const & ext) { std::cerr << "[Error] " << ext.what() << '\n'; std::exit(-1); @@ -151,7 +150,7 @@ void run_build(seqan3::argument_parser & parser) multiplier = 8ull * 1024ull; break; default: - throw seqan3::argument_parser_error{"Use {k, m, g, t} to pass size. E.g., --size 8g."}; + throw sharg::parser_error{"Use {k, m, g, t} to pass size. E.g., --size 8g."}; } size_t size{}; diff --git a/src/argument_parsing/consolidate.cpp b/src/argument_parsing/consolidate.cpp index 55b74e16..b7d9e72a 100644 --- a/src/argument_parsing/consolidate.cpp +++ b/src/argument_parsing/consolidate.cpp @@ -4,30 +4,29 @@ namespace valik::app { -void init_consolidation_parser(seqan3::argument_parser & parser, consolidation_arguments & arguments) +void init_consolidation_parser(sharg::parser & parser, consolidation_arguments & arguments) { init_shared_meta(parser); parser.add_option(arguments.matches_in, - 'i', - "input", - "DREAM Stellar matches.", - seqan3::option_spec::required, - seqan3::input_file_validator{{"gff"}}); + sharg::config{.short_id = 'i', + .long_id = "input", + .description = "DREAM Stellar matches.", + .required = true, + .validator = sharg::input_file_validator{{"gff"}}}); parser.add_option(arguments.ref_meta_path, - '\0', - "ref-meta", - "Path to reference metadata file created by split.", - seqan3::option_spec::standard, - seqan3::input_file_validator{}); + sharg::config{.short_id = '\0', + .long_id = "ref-meta", + .description = "Path to reference metadata file created by split.", + .validator = sharg::input_file_validator{}}); parser.add_option(arguments.matches_out, - 'o', - "output", - "Consolidated output.", - seqan3::option_spec::required, - seqan3::output_file_validator{seqan3::output_file_open_options::open_or_create, {"gff"}}); + sharg::config{.short_id = 'o', + .long_id = "output", + .description = "Consolidated output.", + .required = true, + .validator = sharg::output_file_validator{sharg::output_file_open_options::open_or_create, {"gff"}}}); } -void run_consolidation(seqan3::argument_parser & parser) +void run_consolidation(sharg::parser & parser) { consolidation_arguments arguments{}; init_consolidation_parser(parser, arguments); diff --git a/src/argument_parsing/search.cpp b/src/argument_parsing/search.cpp index 0ca13a9d..5947a83f 100644 --- a/src/argument_parsing/search.cpp +++ b/src/argument_parsing/search.cpp @@ -8,102 +8,159 @@ namespace valik::app { -void init_search_parser(seqan3::argument_parser & parser, search_arguments & arguments) +void init_search_parser(sharg::parser & parser, search_arguments & arguments) { init_shared_meta(parser); - init_shared_options(parser, arguments); parser.add_option(arguments.index_file, - '\0', - "index", - "Provide a valid path to an IBF.", - seqan3::option_spec::required, - seqan3::input_file_validator{}); + sharg::config{.short_id = '\0', + .long_id = "index", + .description = "Provide a valid path to an IBF.", + .required = true, + .validator = sharg::input_file_validator{}}); parser.add_option(arguments.query_file, - '\0', - "query", - "Provide a path to the query file.", - seqan3::option_spec::required, - seqan3::input_file_validator{}); + sharg::config{.short_id = '\0', + .long_id = "query", + .description = "Provide a path to the query file.", + .required = true, + .validator = sharg::input_file_validator{}}); parser.add_option(arguments.out_file, - '\0', - "output", - "Please provide a valid path to the output.", - seqan3::option_spec::required, - seqan3::output_file_validator{seqan3::output_file_open_options::open_or_create, {"gff"}}); + sharg::config{.short_id = '\0', + .long_id = "output", + .description = "Please provide a valid path to the output.", + .required = true, + .validator = sharg::output_file_validator{sharg::output_file_open_options::open_or_create, {"gff"}}}); parser.add_option(arguments.errors, - '\0', - "error", - "Choose the number of errors.", - seqan3::option_spec::standard, - positive_integer_validator{true}); + sharg::config{.short_id = '\0', + .long_id = "error", + .description = "Choose the number of errors.", + .validator = positive_integer_validator{true}}); parser.add_option(arguments.tau, - '\0', - "tau", - "Used in the dynamic thresholding. The higher tau, the lower the threshold.", - seqan3::option_spec::standard, - seqan3::arithmetic_range_validator{0, 1}); + sharg::config{.short_id = '\0', + .long_id = "tau", + .description = "Used in the dynamic thresholding. The higher tau, the lower the threshold.", + .validator = sharg::arithmetic_range_validator{0, 1}}); parser.add_option(arguments.threshold, - '\0', - "threshold", - "If set, this threshold is used instead of the probabilistic models.", - seqan3::option_spec::standard, - seqan3::arithmetic_range_validator{0, 1}); + sharg::config{.short_id = '\0', + .long_id = "threshold", + .description = "If set, this threshold is used instead of the probabilistic models.", + .validator = sharg::arithmetic_range_validator{0, 1}}); parser.add_option(arguments.p_max, - '\0', - "p_max", - "Used in the dynamic thresholding. The higher p_max, the lower the threshold.", - seqan3::option_spec::standard, - seqan3::arithmetic_range_validator{0, 1}); + sharg::config{.short_id = '\0', + .long_id = "p_max", + .description = "Used in the dynamic thresholding. The higher p_max, the lower the threshold.", + .validator = sharg::arithmetic_range_validator{0, 1}}); parser.add_option(arguments.pattern_size, - '\0', - "pattern", - "Choose the pattern size. Default: half of first query sequence.", - seqan3::option_spec::standard); + sharg::config{.short_id = '\0', + .long_id = "pattern", + .description = "Choose the pattern size. Default: half of first query sequence."}); parser.add_option(arguments.overlap, - '\0', - "overlap", - "Choose how much sequential patterns overlap. Default: pattern size - 1.", - seqan3::option_spec::standard); + sharg::config{.short_id = '\0', + .long_id = "overlap", + .description = "Choose how much sequential patterns overlap. Default: pattern size - 1."}); parser.add_flag(arguments.compressed, - '\0', - "compressed", - "Build a compressed IBF."); + sharg::config{.short_id = '\0', + .long_id = "compressed", + .description = "Build a compressed IBF."}); parser.add_flag(arguments.cache_thresholds, - '\0', - "cache-thresholds", - "Stores the computed thresholds with an unique name next to the index. In the next search call " - "using this option, the stored thresholds are re-used.\n" - "Two files are stored:\n" - "\\fBthreshold_*.bin\\fP: Depends on pattern, window, kmer/shape, errors, and tau.\n" - "\\fBcorrection_*.bin\\fP: Depends on pattern, window, kmer/shape, p_max, and fpr."); + sharg::config{.short_id = '\0', + .long_id = "cache-thresholds", + .description = "Stores the computed thresholds with an unique name next to the index. In the next search call " + "using this option, the stored thresholds are re-used.\n" + "Two files are stored:\n" + "\\fBthreshold_*.bin\\fP: Depends on pattern, window, kmer/shape, errors, and tau.\n" + "\\fBcorrection_*.bin\\fP: Depends on pattern, window, kmer/shape, p_max, and fpr."}); parser.add_flag(arguments.write_time, - '\0', - "time", - "Write timing file.", - seqan3::option_spec::advanced); + sharg::config{.short_id = '\0', + .long_id = "time", + .description = "Write timing file.", + .advanced = true}); parser.add_option(arguments.cart_max_capacity, - '\0', - "cart_max_capacity", - "Number of elements to be stored in a single cart before it is send for processing."); + sharg::config{.short_id = '\0', + .long_id = "cart_max_capacity", + .description = "Number of elements to be stored in a single cart before it is send for processing."}); parser.add_option(arguments.max_queued_carts, - '\0', - "max_queued_carts", - "Maximal number of carts that are full and are waiting to be processed."); + sharg::config{.short_id = '\0', + .long_id = "max_queued_carts", + .description = "Maximal number of carts that are full and are waiting to be processed."}); parser.add_option(arguments.ref_meta_path, - '\0', - "ref-meta", - "Path to reference metadata file created by split.", - seqan3::option_spec::standard, - seqan3::input_file_validator{}); + sharg::config{.short_id = '\0', + .long_id = "ref-meta", + .description = "Path to reference metadata file created by split.", + .validator = sharg::input_file_validator{}}); parser.add_option(arguments.seg_path, - '\0', - "seg-meta", - "Path to segment metadata file created by split.", - seqan3::option_spec::standard, - seqan3::input_file_validator{}); + sharg::config{.short_id = '\0', + .long_id = "seg-meta", + .description = "Path to segment metadata file created by split.", + .validator = sharg::input_file_validator{}}); + parser.add_flag(arguments.shared_memory, + sharg::config{.short_id = '\0', + .long_id = "shared-memory", + .description = "Launch Stellar instances on a single machine with shared memory."}); + parser.add_option(arguments.threads, + sharg::config{.short_id = '\0', + .long_id = "threads", + .description = "Choose the number of threads.", + .validator = positive_integer_validator{}}); + + ///////////////////////////////////////// + // Stellar options + ///////////////////////////////////////// + /* + // Filtering options + parser.add_option(options.qGram, + sharg::config{.short_id = 'k', + .long_id = "kmer", + .description = "Length of the q-grams.", + .validator = sharg::arithmetic_range_validator{1, 32}}); + parser.add_option(options.maxRepeatPeriod, + sharg::config{.short_id = '\0', + .long_id = "repeatPeriod", + .description = "Maximal period of low complexity repeats to be filtered.", + .validator = sharg::arithmetic_range_validator{1, 32}}); + parser.add_option(options.minRepeatLength, + sharg::config{.short_id = '\0', + .long_id = "repeatLength", + .description = "Minimal length of low complexity repeats to be filtered.", + .validator = sharg::arithmetic_range_validator{1u, std::numeric_limits::max()}}); + parser.add_option(options.qgramAbundanceCut, + sharg::config{.short_id = 'c', + .long_id = "abundanceCut", + .description = "k-mer overabundance cut ratio.", + .validator = float_in_range_validator{0, 1}}); + + // Verification options + parser.add_option(options.xDrop, + sharg::config{.short_id = 'x', + .long_id = "xDrop", + .description = "Maximal x-drop for extension."}); + parser.add_option(options.strVerificationMethod, + sharg::config{.short_id = '\0', + .long_id = "verification", + .description = "Verification strategy: exact or bestLocal or bandedGlobal.", + .validator = sharg::value_list_validator{"exact", "bestLocal", "bandedGlobal", "bandedGlobalExtend"}}); + parser.add_option(options.disableThresh, + sharg::config{.short_id = '\0', + .long_id = "disableThresh", + .description = "Maximal number of verified matches before disabling verification for one query sequence (default infinity).", + .validator = sharg::arithmetic_range_validator{1, 10000}}); + parser.add_option(options.numMatches, + sharg::config{.short_id = 'n', + .long_id = "numMatches", + .description = "Maximal number of kept matches per query and database. If STELLAR finds more matches, only the longest ones are kept."}); + parser.add_option(options.compactThresh, + sharg::config{.short_id = 's', + .long_id = "sortThresh", + .description = "Number of matches triggering removal of duplicates. Choose a smaller value for saving space."}); + + parser.add_option(options.disabledQueriesFile, + sharg::config{.short_id = '\0', + .long_id = "disabledQueriesFile", + .description = "Name of output file for disabled query sequences.", + .validator = sharg::output_file_validator{sharg::output_file_open_options::open_or_create, {"fa", "fasta"}}}); + */ } -void run_search(seqan3::argument_parser & parser) +void run_search(sharg::parser & parser) { search_arguments arguments{}; @@ -115,7 +172,7 @@ void run_search(seqan3::argument_parser & parser) // Various checks. // ========================================== - seqan3::input_file_validator>{}(arguments.query_file); + sharg::input_file_validator{}(arguments.query_file); arguments.treshold_was_set = parser.is_option_set("threshold"); // ========================================== @@ -141,7 +198,7 @@ void run_search(seqan3::argument_parser & parser) if (parser.is_option_set("pattern")) { if (arguments.pattern_size < arguments.window_size) - throw seqan3::argument_parser_error{"The minimiser window cannot be bigger than the pattern."}; + throw sharg::validation_error{"The minimiser window cannot be bigger than the pattern."}; } else @@ -164,7 +221,7 @@ void run_search(seqan3::argument_parser & parser) if (parser.is_option_set("overlap")) { if (arguments.overlap >= arguments.pattern_size) - throw seqan3::argument_parser_error{"The overlap size has to be smaller than the pattern size."}; + throw sharg::validation_error{"The overlap size has to be smaller than the pattern size."}; } else arguments.overlap = arguments.pattern_size - 1; diff --git a/src/argument_parsing/shared.cpp b/src/argument_parsing/shared.cpp index e8230365..fc12b48b 100644 --- a/src/argument_parsing/shared.cpp +++ b/src/argument_parsing/shared.cpp @@ -3,7 +3,7 @@ namespace valik::app { -void init_shared_meta(seqan3::argument_parser & parser) +void init_shared_meta(sharg::parser & parser) { parser.info.app_name = "valik"; parser.info.author = "Evelin Aasna"; @@ -43,13 +43,13 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.)"; parser.info.version = "1.0"; } -void try_parsing(seqan3::argument_parser & parser) +void try_parsing(sharg::parser & parser) { try { parser.parse(); } - catch (seqan3::argument_parser_error const & ext) + catch (sharg::parser_error const & ext) { std::cerr << "[Error] " << ext.what() << '\n'; std::exit(-1); diff --git a/src/argument_parsing/split.cpp b/src/argument_parsing/split.cpp index 5e563074..11beb69e 100644 --- a/src/argument_parsing/split.cpp +++ b/src/argument_parsing/split.cpp @@ -6,43 +6,41 @@ namespace valik::app { -void init_split_parser(seqan3::argument_parser & parser, split_arguments & arguments) +void init_split_parser(sharg::parser & parser, split_arguments & arguments) { init_shared_meta(parser); parser.add_positional_option(arguments.ref_file, - "File containing reference sequences.", - seqan3::input_file_validator{}); + sharg::config{.description = "File containing reference sequences.", + .validator = sharg::input_file_validator{}}); parser.add_option(arguments.ref_out, - '\0', - "ref-meta", - "Please provide a valid path to the reference metadata output.", - seqan3::option_spec::required, - seqan3::output_file_validator{seqan3::output_file_open_options::open_or_create}); + sharg::config{.short_id = '\0', + .long_id = "ref-meta", + .description = "Please provide a valid path to the reference metadata output.", + .required = true, + .validator = sharg::output_file_validator{sharg::output_file_open_options::open_or_create}}); parser.add_option(arguments.seg_out, - '\0', - "seg-meta", - "Please provide a valid path to the segment metadata output.", - seqan3::option_spec::required, - seqan3::output_file_validator{seqan3::output_file_open_options::open_or_create}); + sharg::config{.short_id = '\0', + .long_id = "seg-meta", + .description = "Please provide a valid path to the segment metadata output.", + .required = true, + .validator = sharg::output_file_validator{sharg::output_file_open_options::open_or_create}}); parser.add_option(arguments.overlap, - '\0', - "overlap", - "Choose how much consecutive segments overlap.", - seqan3::option_spec::standard, - positive_integer_validator{true}); + sharg::config{.short_id = '\0', + .long_id = "overlap", + .description = "Choose how much consecutive segments overlap.", + .validator = positive_integer_validator{true}}); parser.add_option(arguments.bins, - '\0', - "bins", - "Number of bins in the IBF. Multiples of 64 lead to better performance.", - seqan3::option_spec::standard, - seqan3::arithmetic_range_validator{1, 29952}); + sharg::config{.short_id = '\0', + .long_id = "bins", + .description = "Number of bins in the IBF. Multiples of 64 lead to better performance.", + .validator = sharg::arithmetic_range_validator{1, 29952}}); parser.add_flag(arguments.write_seg, - '\0', - "write-out", - "Write segment sequences to disk."); + sharg::config{.short_id = '\0', + .long_id = "write-out", + .description = "Write segment sequences to disk."}); } -void run_split(seqan3::argument_parser & parser) +void run_split(sharg::parser & parser) { split_arguments arguments{}; init_split_parser(parser, arguments); diff --git a/src/argument_parsing/top_level.cpp b/src/argument_parsing/top_level.cpp index 7028ee67..8ec64d69 100644 --- a/src/argument_parsing/top_level.cpp +++ b/src/argument_parsing/top_level.cpp @@ -3,7 +3,7 @@ namespace valik::app { -void init_top_level_parser(seqan3::argument_parser & parser) +void init_top_level_parser(sharg::parser & parser) { init_shared_meta(parser); parser.info.description.emplace_back("Binning Directories are a datastruture that can be used in various ways. " diff --git a/src/valik_main.cpp b/src/valik_main.cpp index 1d5e047c..97b68d5f 100644 --- a/src/valik_main.cpp +++ b/src/valik_main.cpp @@ -13,12 +13,12 @@ int main(int argc, char ** argv) { try { - seqan3::argument_parser top_level_parser{"valik", argc, argv, seqan3::update_notifications::on, {"split", "build", "search", "consolidate"}}; + sharg::parser top_level_parser{"valik", argc, argv, sharg::update_notifications::off, {"split", "build", "search", "consolidate"}}; valik::app::init_top_level_parser(top_level_parser); valik::app::try_parsing(top_level_parser); - seqan3::argument_parser & sub_parser = top_level_parser.get_sub_parser(); + sharg::parser & sub_parser = top_level_parser.get_sub_parser(); if (sub_parser.info.app_name == std::string_view{"valik-split"}) valik::app::run_split(sub_parser); if (sub_parser.info.app_name == std::string_view{"valik-build"}) diff --git a/src/valik_search.cpp b/src/valik_search.cpp index f824a9b5..8261f8e4 100644 --- a/src/valik_search.cpp +++ b/src/valik_search.cpp @@ -1,7 +1,6 @@ #include -#include -#include +#include #include #include #include @@ -14,35 +13,17 @@ #include +#include +#include +#include +#include +#include +#include +#include namespace valik::app { -/* Creates a temporary folder in the temporary path of the OS - * - * \param name: a name with 'XXXXXX' at the end, e.g.: valik/call_XXXXXX - * \return returns the name with the 'XXXXXX' replaced and the directory created - * - * throws if any errors occures - */ -static std::filesystem::path create_temporary_path(std::filesystem::path name) { - if (!name.is_relative()) { - throw std::runtime_error("Must be given a relative file"); - } - auto path = std::filesystem::temp_directory_path() / name; - auto path_str = path.native(); - create_directories(path.parent_path()); - auto str = std::vector(path_str.size()+1, '\0'); // Must include an extra character to include a 0 - std::copy_n(path_str.data(), path_str.size(), str.data()); - auto ptr = mkdtemp(str.data()); - if (!ptr) { - throw std::runtime_error("Could not create temporary folder: " + path_str); - } - return str.data(); -} - - - //----------------------------- // // Setup IBF and launch multithreaded search. @@ -72,23 +53,7 @@ bool run_program(search_arguments const &arguments, search_time_statistics & tim raptor::threshold::threshold const thresholder{arguments.make_threshold_parameters()}; - // the location of bin-query fasta files can be overwritten with an environment variable - // the $VALIK_TMP directory has to exist and write permission must be granted - std::filesystem::path tmp_path; - if (auto ptr = std::getenv("VALIK_TMP"); ptr != nullptr) - tmp_path = std::string(ptr); - else - tmp_path = create_temporary_path("valik/stellar_call_XXXXXX"); - - std::string stellar_exec = "stellar"; - if (auto ptr = std::getenv("VALIK_STELLAR"); ptr != nullptr) - stellar_exec = std::string(ptr); - - std::string merge_exec = "cat"; - if (auto ptr = std::getenv("VALIK_MERGE"); ptr != nullptr) - merge_exec = std::string(ptr); - - + env_var_pack var_pack{}; sync_out synced_out{arguments.out_file}; auto queue = cart_queue{index.ibf().bin_count(), arguments.cart_max_capacity, arguments.max_queued_carts}; @@ -110,30 +75,73 @@ bool run_program(search_arguments const &arguments, search_time_statistics & tim std::vector output_files; std::stringstream text_out; std::vector timeStatistics; + std::vector stellarTimes; }; std::vector localData(arguments.threads); + using TAlphabet = seqan2::Dna; + using TSequence = seqan2::String; + + // negative (reverse complemented) database strand + bool const reverse = true /*threadOptions.reverse && threadOptions.alphabet != "protein" && threadOptions.alphabet != "char" */; + seqan2::StringSet databases; + seqan2::StringSet reverseDatabases; + seqan2::StringSet databaseIDs; + using TSize = decltype(length(databases[0])); + TSize refLen; + + if (arguments.shared_memory) + { + stellar::stellar_app_runtime stellarTime{}; + + for (auto bin_paths : index.bin_path()) + { + for (auto path : bin_paths) + { + bool const databasesSuccess = stellarTime.input_databases_time.measure_time([&]() + { + std::cout << "Launching stellar search on a shared memory machine...\n"; + return stellar::_importAllSequences(path.c_str(), "database", databases, databaseIDs, refLen, std::cout, std::cerr); + }); + if (!databasesSuccess) + return false; + } + } + + if (reverse) + { + for (auto database : databases) + { + reverseComplement(database); + seqan2::appendValue(reverseDatabases, database, seqan2::Generous()); + } + } + } + stellar::DatabaseIDMap databaseIDMap{databases, databaseIDs}; + stellar::DatabaseIDMap reverseDatabaseIDMap{reverseDatabases, databaseIDs}; + + auto consumerThreads = std::vector{}; for (size_t threadNbr = 0; threadNbr < arguments.threads; ++threadNbr) { consumerThreads.emplace_back( [&, threadNbr]() { auto& ld = localData[threadNbr]; - // this will block until producer threads have added carts to queue for (auto next = queue.dequeue(); next; next = queue.dequeue()) { auto & [bin_id, records] = *next; std::unique_lock g(mutex); - std::filesystem::path path = tmp_path / std::string("query_" + std::to_string(bin_id) + "_" + std::to_string(bin_count[bin_id]++) + ".fasta"); + std::filesystem::path cart_queries_path = var_pack.tmp_path / std::string("query_" + std::to_string(bin_id) + + "_" + std::to_string(bin_count[bin_id]++) + ".fasta"); g.unlock(); - ld.output_files.push_back(path.string() + ".gff"); + ld.output_files.push_back(cart_queries_path.string() + ".gff"); { - seqan3::sequence_file_output fout{path, fields{}}; + seqan3::sequence_file_output fout{cart_queries_path, fields{}}; for (auto & record : records) { @@ -142,57 +150,255 @@ bool run_program(search_arguments const &arguments, search_time_statistics & tim } } - std::vector process_args{}; - if (arguments.write_time) + if (arguments.shared_memory) { - std::filesystem::path time_path = path.string() + std::string(".gff.time"); - process_args.insert(process_args.end(), {"/usr/bin/time", "-o", std::string(time_path), "-f", "\"%e\t%M\t%x\t%C\""}); - } - process_args.insert(process_args.end(), {stellar_exec, "--version-check", "0"}); + stellar::StellarOptions threadOptions{}; + stellar::stellar_app_runtime stellarThreadTime{}; + using TDatabaseSegment = stellar::StellarDatabaseSegment; + + // import query sequences + seqan2::StringSet queries; + seqan2::StringSet queryIDs; + + using TSize = decltype(length(queries[0])); + TSize queryLen{0}; // does not get populated currently + //!TODO: split query sequence + bool const queriesSuccess = stellarThreadTime.input_queries_time.measure_time([&]() + { + return stellar::_importAllSequences(cart_queries_path.c_str(), "query", queries, queryIDs, queryLen, ld.text_out, ld.text_out); + }); + if (!queriesSuccess) + { + std::cerr << "Error importing queries\n"; + error_triggered = true; + } - if (segments && ref_meta) - { - // search segments of a single reference file - auto ref_len = ref_meta->total_len; - auto seg = segments->segment_from_bin(bin_id); - process_args.insert(process_args.end(), {index.bin_path()[0][0], std::string(path), - "--referenceLength", std::to_string(ref_len), - "--sequenceOfInterest", std::to_string(seg.ref_ind), - "--segmentBegin", std::to_string(seg.start), - "--segmentEnd", std::to_string(seg.start + seg.len)}); + threadOptions.alphabet = "dna"; // Possible values: dna, rna, protein, char + threadOptions.queryFile = cart_queries_path.string(); + threadOptions.prefilteredSearch = true; + threadOptions.referenceLength = refLen; + if (segments && ref_meta) + { + threadOptions.searchSegment = true; + auto seg = segments->segment_from_bin(bin_id); + threadOptions.binSequences.emplace_back(seg.ref_ind); + threadOptions.segmentBegin = seg.start; + threadOptions.segmentEnd = seg.start + seg.len; + } + else + { + if (index.bin_path().size() < (size_t) bin_id) { + throw std::runtime_error("Could not find reference file with index " + std::to_string(bin_id) + + ". Did you forget to provide metadata to search segments in a single reference file instead?"); + } + threadOptions.binSequences.push_back(bin_id); //!TODO: what if mutliple sequence files per bin + } + threadOptions.numEpsilon = er_rate; + threadOptions.epsilon = stellar::utils::fraction::from_double(threadOptions.numEpsilon).limit_denominator(); + threadOptions.minLength = arguments.pattern_size; + threadOptions.outputFile = cart_queries_path.string() + ".gff"; + stellar::_writeFileNames(threadOptions, ld.text_out); + stellar::_writeSpecifiedParams(threadOptions, ld.text_out); + stellar::_writeCalculatedParams(threadOptions, ld.text_out); // calculate qGram + ld.text_out << std::endl; + stellar::_writeMoreCalculatedParams(threadOptions, threadOptions.referenceLength, queries, ld.text_out); + + + auto current_time = stellarThreadTime.swift_index_construction_time.now(); + stellar::StellarIndex stellarIndex{queries, threadOptions}; + stellar::StellarSwiftPattern swiftPattern = stellarIndex.createSwiftPattern(); + + // Construct index of the queries + ld.text_out << "Constructing index..." << '\n'; + stellarIndex.construct(); + ld.text_out << std::endl; + stellarThreadTime.swift_index_construction_time.manual_timing(current_time); + + //!TODO: process disabled queries + std::vector disabledQueryIDs{}; + + stellar::StellarOutputStatistics outputStatistics{}; + if (threadOptions.forward) + { + auto databaseSegment = stellar::_getDREAMDatabaseSegment + (databases[threadOptions.binSequences[0]], threadOptions); + stellarThreadTime.forward_strand_stellar_time.measure_time([&]() + { + size_t const databaseRecordID = databaseIDMap.recordID(databaseSegment); + seqan2::CharString const & databaseID = databaseIDMap.databaseID(databaseRecordID); + // container for eps-matches + seqan2::StringSet const, + seqan2::CharString> > > forwardMatches; + seqan2::resize(forwardMatches, length(queries)); + + constexpr bool databaseStrand = true; + stellar::QueryIDMap queryIDMap{queries}; + + stellar::StellarComputeStatistics statistics = stellar::StellarLauncher::search_and_verify + ( + databaseSegment, + databaseID, + queryIDMap, + databaseStrand, + threadOptions, + swiftPattern, + stellarThreadTime.forward_strand_stellar_time.prefiltered_stellar_time, + forwardMatches + ); + + ld.text_out << std::endl; // swift filter output is on same line + stellar::_printDatabaseIdAndStellarKernelStatistics(threadOptions.verbose, databaseStrand, databaseID, statistics, ld.text_out); + + stellarThreadTime.forward_strand_stellar_time.post_process_eps_matches_time.measure_time([&]() + { + // forwardMatches is an in-out parameter + // this is the match consolidation + stellar::_postproccessQueryMatches(databaseStrand, threadOptions.referenceLength, threadOptions, + forwardMatches, disabledQueryIDs); + }); // measure_time + + // open output files + std::ofstream outputFile(threadOptions.outputFile.c_str(), ::std::ios_base::out); + if (!outputFile.is_open()) + { + std::cerr << "Could not open output file." << std::endl; + error_triggered = true; + } + stellarThreadTime.forward_strand_stellar_time.output_eps_matches_time.measure_time([&]() + { + // output forwardMatches on positive database strand + stellar::_writeAllQueryMatchesToFile(forwardMatches, queryIDs, databaseStrand, "gff", outputFile); + }); // measure_time + + outputStatistics = stellar::_computeOutputStatistics(forwardMatches); + }); // measure_time + } + + + if (reverse) + { + TDatabaseSegment databaseSegment{}; + stellarThreadTime.reverse_complement_database_time.measure_time([&]() + { + databaseSegment = _getDREAMDatabaseSegment + (reverseDatabases[threadOptions.binSequences[0]], threadOptions, reverse); + }); // measure_time + + stellarThreadTime.reverse_strand_stellar_time.measure_time([&]() + { + size_t const databaseRecordID = reverseDatabaseIDMap.recordID(databaseSegment); + seqan2::CharString const & databaseID = reverseDatabaseIDMap.databaseID(databaseRecordID); + // container for eps-matches + seqan2::StringSet const, + seqan2::CharString> > > reverseMatches; + seqan2::resize(reverseMatches, length(queries)); + + constexpr bool databaseStrand = false; + stellar::QueryIDMap queryIDMap{queries}; + + stellar::StellarComputeStatistics statistics = stellar::StellarLauncher::search_and_verify + ( + databaseSegment, + databaseID, + queryIDMap, + databaseStrand, + threadOptions, + swiftPattern, + stellarThreadTime.reverse_strand_stellar_time.prefiltered_stellar_time, + reverseMatches + ); + + ld.text_out << std::endl; // swift filter output is on same line + stellar::_printDatabaseIdAndStellarKernelStatistics(threadOptions.verbose, databaseStrand, databaseID, statistics, ld.text_out); + + stellarThreadTime.reverse_strand_stellar_time.post_process_eps_matches_time.measure_time([&]() + { + // reverseMatches is an in-out parameter + // this is the match consolidation + stellar::_postproccessQueryMatches(databaseStrand, threadOptions.referenceLength, threadOptions, + reverseMatches, disabledQueryIDs); + }); // measure_time + + // open output files + std::ofstream outputFile(threadOptions.outputFile.c_str(), ::std::ios_base::app); + if (!outputFile.is_open()) + { + std::cerr << "Could not open output file." << std::endl; + error_triggered = true; + } + stellarThreadTime.reverse_strand_stellar_time.output_eps_matches_time.measure_time([&]() + { + // output reverseMatches on negative database strand + stellar::_writeAllQueryMatchesToFile(reverseMatches, queryIDs, databaseStrand, "gff", outputFile); + }); // measure_time + + outputStatistics.mergeIn(stellar::_computeOutputStatistics(reverseMatches)); + }); // measure_time + } + + stellar::_writeOutputStatistics(outputStatistics, threadOptions.verbose, false /* disabledQueriesFile.is_open() */, ld.text_out); + + ld.timeStatistics.emplace_back(stellarThreadTime.milliseconds()); + if (arguments.write_time) + { + std::filesystem::path time_path = cart_queries_path.string() + std::string(".gff.time"); + + stellar::_print_stellar_app_time(stellarThreadTime, ld.text_out); + } } else { - // search a reference database of bin sequence files - if (index.bin_path().size() < bin_id) { - throw std::runtime_error("Could not find reference file with index " + std::to_string(bin_id) + ". Did you forget to provide metadata to search segments in a single reference file instead?"); + std::vector process_args{}; + process_args.insert(process_args.end(), {var_pack.stellar_exec, "--version-check", "0"}); + + if (segments && ref_meta) + { + // search segments of a single reference file + auto ref_len = ref_meta->total_len; + auto seg = segments->segment_from_bin(bin_id); + process_args.insert(process_args.end(), {index.bin_path()[0][0], std::string(cart_queries_path), + "--referenceLength", std::to_string(ref_len), + "--sequenceOfInterest", std::to_string(seg.ref_ind), + "--segmentBegin", std::to_string(seg.start), + "--segmentEnd", std::to_string(seg.start + seg.len)}); + } + else + { + // search a reference database of bin sequence files + if (index.bin_path().size() < (size_t) bin_id) { + throw std::runtime_error("Could not find reference file with index " + std::to_string(bin_id) + + ". Did you forget to provide metadata to search segments in a single reference file instead?"); + } + process_args.insert(process_args.end(), {index.bin_path()[bin_id][0], std::string(cart_queries_path)}); } - process_args.insert(process_args.end(), {index.bin_path()[bin_id][0], std::string(path)}); - } - process_args.insert(process_args.end(), {"-e", std::to_string(er_rate), - "-l", std::to_string(arguments.pattern_size), - "-o", std::string(path) + ".gff"}); + if (arguments.write_time) + process_args.insert(process_args.end(), "--time"); - auto start = std::chrono::high_resolution_clock::now(); - external_process process(process_args); - auto end = std::chrono::high_resolution_clock::now(); + process_args.insert(process_args.end(), {"-e", std::to_string(er_rate), + "-l", std::to_string(arguments.pattern_size), + "-o", std::string(cart_queries_path) + ".gff"}); - ld.timeStatistics.emplace_back(0.0 + std::chrono::duration_cast>(end - start).count()); + auto start = std::chrono::high_resolution_clock::now(); + external_process process(process_args); + auto end = std::chrono::high_resolution_clock::now(); - ld.text_out << process.cout(); - ld.text_out << process.cerr(); + ld.timeStatistics.emplace_back(0.0 + std::chrono::duration_cast>(end - start).count()); - if (process.status() != 0) { - std::unique_lock g(mutex); // make sure that our output is synchronized - std::cerr << "error running VALIK_STELLAR\n"; - std::cerr << "call:"; - for (auto args : process_args) { - std::cerr << " " << args; + ld.text_out << process.cout(); + ld.text_out << process.cerr(); + + if (process.status() != 0) { + std::unique_lock g(mutex); // make sure that our output is synchronized + std::cerr << "error running VALIK_STELLAR\n"; + std::cerr << "call:"; + for (auto args : process_args) { + std::cerr << " " << args; + } + std::cerr << '\n'; + std::cerr << process.cerr() << '\n'; + error_triggered = true; } - std::cerr << '\n'; - std::cerr << process.cerr() << '\n'; - error_triggered = true; } } }); @@ -228,9 +434,20 @@ bool run_program(search_arguments const &arguments, search_time_statistics & tim text_out << ld.text_out.str(); } - std::vector merge_process_args{merge_exec}; - for (auto & path : output_files) - merge_process_args.push_back(path); + std::vector merge_process_args; + if (output_files.size() > 0) + { + merge_process_args.push_back(var_pack.merge_exec); + for (auto & path : output_files) + merge_process_args.push_back(path); + } + else + { + //!WORKAROUND: merge hangs if no valik matches found + merge_process_args.push_back("echo"); + merge_process_args.push_back("-n"); + } + external_process merge(merge_process_args); auto check_external_process_success = [&](std::vector const & proc_args, external_process const & proc) @@ -249,18 +466,6 @@ bool run_program(search_arguments const &arguments, search_time_statistics & tim return error_triggered; }; - if (arguments.write_time) - { - std::vector merge_time_files{"cat"}; - for (auto & path : output_files) - merge_time_files.push_back(path + ".time"); - external_process merge_time(merge_time_files); - error_triggered = check_external_process_success(merge_time_files, merge_time); - - std::ofstream time_out(arguments.out_file.string() + std::string(".time")); - time_out << merge_time.cout(); - } - error_triggered = check_external_process_success(merge_process_args, merge); std::ofstream matches_out(arguments.out_file); @@ -279,7 +484,7 @@ void valik_search(search_arguments const & arguments) failed = run_program(arguments, time_statistics); if (arguments.write_time) - write_time_statistics(time_statistics, arguments); + write_time_statistics(time_statistics, arguments.out_file.string() + ".time"); if (failed) { throw std::runtime_error("valik_search failed. Run didn't complete correctly."); diff --git a/test/cli/CMakeLists.txt b/test/cli/CMakeLists.txt index b75795b5..1159508f 100644 --- a/test/cli/CMakeLists.txt +++ b/test/cli/CMakeLists.txt @@ -77,3 +77,23 @@ target_use_datasources (valik_test FILES 8bins50overlap_dream_all.gff) target_use_datasources (valik_test FILES 8bins50overlap_reference_metadata.tsv) target_use_datasources (valik_test FILES multi_seq_ref.fasta) target_use_datasources (valik_test FILES query_e0.06.fasta) + +add_cli_test (dream_test.cpp) +target_use_datasources (dream_test FILES 16bins13window1error.gff) +target_use_datasources (dream_test FILES 16bins13window1error.gff.out) +target_use_datasources (dream_test FILES 16bins13window.ibf) +target_use_datasources (dream_test FILES 16bins15window1error.gff) +target_use_datasources (dream_test FILES 16bins15window1error.gff.out) +target_use_datasources (dream_test FILES 16bins15window.ibf) +target_use_datasources (dream_test FILES 4bins13window1error.gff) +target_use_datasources (dream_test FILES 4bins13window1error.gff.out) +target_use_datasources (dream_test FILES 4bins13window.ibf) +target_use_datasources (dream_test FILES 4bins15window1error.gff) +target_use_datasources (dream_test FILES 4bins15window1error.gff.out) +target_use_datasources (dream_test FILES 4bins15window.ibf) +target_use_datasources (dream_test FILES dummy_reads.fastq) +target_use_datasources (dream_test FILES query.fastq) +target_use_datasources (dream_test FILES ref.fasta) +target_use_datasources (dream_test FILES ref_meta.txt) +target_use_datasources (dream_test FILES seg_meta150overlap16bins.txt) +target_use_datasources (dream_test FILES seg_meta150overlap4bins.txt) diff --git a/test/cli/cli_test.hpp b/test/cli/cli_test.hpp index 22eb06b8..70a7f4f5 100644 --- a/test/cli/cli_test.hpp +++ b/test/cli/cli_test.hpp @@ -193,6 +193,20 @@ struct valik_base : public cli_test return cli_test::data(name); } + static std::filesystem::path search_result_path(size_t const number_of_bins, size_t const window_size, + size_t const number_of_errors) noexcept + { + std::string name{}; + name += std::to_string(number_of_bins); + name += "bins"; + name += std::to_string(window_size); + name += "window"; + name += std::to_string(number_of_errors); + name += "error"; + name += ".gff"; + return cli_test::data(name); + } + static std::string string_from_file(std::filesystem::path const & path, std::ios_base::openmode const mode = std::ios_base::in) { std::ifstream file_stream(path, mode); @@ -518,17 +532,25 @@ struct valik_base : public cli_test return cli_test::data(name); } - static void compare_consolidation_out(std::vector const & expected, - std::vector const & actual) + static void compare_gff_out(std::vector const & expected, + std::vector const & actual) { EXPECT_EQ(expected.size(), actual.size()); + size_t not_actually_found{0}; for (auto & match : expected) { auto it = std::find(actual.begin(), actual.end(), match); - EXPECT_TRUE(it != actual.end()); + if (it == actual.end()) + { + not_actually_found++; + seqan3::debug_stream << match.to_string(); + } + // EXPECT_EQ(match.percid, (*it).percid); // EXPECT_EQ(match.attributes, (*it).attributes); } + + EXPECT_EQ(not_actually_found, 0); } }; @@ -539,4 +561,5 @@ struct valik_search_clusters : public valik_base, public testing::WithParamInter size_t, size_t>> {}; struct valik_search_segments : public valik_base, public testing::WithParamInterface> {}; +struct dream_search : public valik_base, public testing::WithParamInterface> {}; struct valik_consolidate : public valik_base, public testing::WithParamInterface> {}; diff --git a/test/cli/dream_test.cpp b/test/cli/dream_test.cpp new file mode 100644 index 00000000..efd7a0a0 --- /dev/null +++ b/test/cli/dream_test.cpp @@ -0,0 +1,100 @@ +#include +#include +#include // range comparisons +#include // strings +#include // vectors + +#include "cli_test.hpp" + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +///////////////////////////////////////////////// DREAM shared memory ////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST_P(dream_search, shared_mem) +{ + auto const [number_of_bins, window_size, number_of_errors] = GetParam(); + + setup_tmp_dir(); + setenv("VALIK_MERGE", "cat", true); + + std::filesystem::path ref_meta_path = data("ref_meta.txt"); + valik::reference_metadata reference(ref_meta_path, false); + std::filesystem::path seg_meta_path = data("seg_meta150overlap" + std::to_string(number_of_bins) + "bins.txt"); + std::filesystem::path index_path = ibf_path(number_of_bins, window_size); + + cli_test_result const build = execute_app("valik", "build", + data("ref.fasta"), + "--window", std::to_string(window_size), + "--kmer 13", + "--size 32k", + "--from-segments", + "--ref-meta", ref_meta_path, + "--seg-meta", seg_meta_path, + "--output ", index_path); + + cli_test_result const result = execute_app("valik", "search", + "--shared-memory", + "--output search.gff", + "--pattern 50", + "--overlap 49", + "--error ", std::to_string(number_of_errors), + "--index ", index_path, + "--query ", data("query.fastq"), + "--ref-meta", ref_meta_path, + "--seg-meta", seg_meta_path); + EXPECT_EQ(result.exit_code, 0); + EXPECT_EQ(result.out, std::string{"Launching stellar search on a shared memory machine...\nLoaded 3 database sequences.\n"}); + EXPECT_EQ(result.err, std::string{}); + + auto distributed = valik::read_stellar_output(search_result_path(number_of_bins, window_size, number_of_errors), reference, std::ios::binary); + auto local = valik::read_stellar_output("search.gff", reference); + + compare_gff_out(distributed, local); +} + +INSTANTIATE_TEST_SUITE_P(shared_memory_dream_suite, + dream_search, + testing::Combine(testing::Values(4, 16), testing::Values(13, 15), testing::Values(1)), + [] (testing::TestParamInfo const & info) + { + std::string name = std::to_string(std::get<0>(info.param)) + "_bins_" + + std::to_string(std::get<1>(info.param)) + "_window_" + + std::to_string(std::get<2>(info.param)) + "_error"; + return name; + }); + +TEST_F(dream_search, no_matches) +{ + setup_tmp_dir(); + setenv("VALIK_MERGE", "cat", true); + + size_t number_of_bins = 4; + size_t window_size = 15; + cli_test_result const build = execute_app("valik", "build", + data("ref.fasta"), + "--window ", std::to_string(window_size), + "--kmer 13", + "--size 32k", + "--from-segments", + "--ref-meta", data("ref_meta.txt"), + "--seg-meta", data("seg_meta150overlap" + std::to_string(number_of_bins) + "bins.txt"), + "--output ", ibf_path(number_of_bins, window_size)); + + cli_test_result const result = execute_app("valik", "search", + "--shared-memory", + "--output search.gff", + "--pattern 50", + "--overlap 49", + "--error 1", + "--index ", ibf_path(number_of_bins, window_size), + "--query ", data("dummy_reads.fastq"), + "--ref-meta", data("ref_meta.txt"), + "--seg-meta", data("seg_meta150overlap" + std::to_string(number_of_bins) + "bins.txt")); + EXPECT_EQ(result.exit_code, 0); + EXPECT_EQ(result.out, std::string{"Launching stellar search on a shared memory machine...\nLoaded 3 database sequences.\n"}); + EXPECT_EQ(result.err, std::string{}); + + auto actual = string_list_from_file("search.gff"); + + EXPECT_EQ(0, actual.size()); +} diff --git a/test/cli/valik_options_test.cpp b/test/cli/valik_options_test.cpp index 5250979e..0a094517 100644 --- a/test/cli/valik_options_test.cpp +++ b/test/cli/valik_options_test.cpp @@ -76,8 +76,8 @@ TEST_F(argparse, no_subparser) cli_test_result const result = execute_app("valik", "foo"); std::string const expected { - "[Error] You either forgot or misspelled the subcommand! Please specify which sub-program you want to use: one " - "of [split,build,search,consolidate]. Use -h/--help for more information.\n" + "[Error] You misspelled the subcommand! Please specify which sub-program you want to use: one of [split, build, search, consolidate]. " + "Use -h/--help for more information.\n" }; EXPECT_NE(result.exit_code, 0); EXPECT_EQ(result.out, std::string{}); @@ -89,8 +89,8 @@ TEST_F(argparse, unknown_option) cli_test_result const result = execute_app("valik", "-v"); std::string const expected { - "[Error] You either forgot or misspelled the subcommand! Please specify which sub-program you want to use: one " - "of [split,build,search,consolidate]. Use -h/--help for more information.\n" + "[Error] You misspelled the subcommand! Please specify which sub-program you want to use: one of [split, build, search, consolidate]. " + "Use -h/--help for more information.\n" }; EXPECT_NE(result.exit_code, 0); EXPECT_EQ(result.out, std::string{}); @@ -169,7 +169,7 @@ TEST_F(argparse_build, output_wrong) tmp_bin_list_file.file_path); EXPECT_NE(result.exit_code, 0); EXPECT_EQ(result.out, std::string{}); - EXPECT_EQ(result.err, std::string{"[Error] Cannot write \"foo/out.ibf\"!\n"}); + EXPECT_EQ(result.err, std::string{"[Error] Validation failed for option --output: Cannot write \"foo/out.ibf\"!\n"}); } TEST_F(argparse_build, size_missing) diff --git a/test/cli/valik_test.cpp b/test/cli/valik_test.cpp index aa2ee2b1..689f370e 100644 --- a/test/cli/valik_test.cpp +++ b/test/cli/valik_test.cpp @@ -103,6 +103,7 @@ TEST_P(valik_build_segments, build_from_segments) std::string ref_meta_path = cli_test::data("reference_metadata.txt"); std::string seg_meta_path = cli_test::data(std::to_string(overlap) + "overlap" + std::to_string(number_of_bins) + "bins.txt"); + //!TODO: the paths in the index are not data(path.fasta) so the file can't be opened by stellar (only a testing issue) cli_test_result const result = execute_app("valik", "build", "--kmer 13", "--window ", std::to_string(window_size), @@ -253,7 +254,7 @@ TEST_P(valik_consolidate, consolidation) auto expected = valik::read_stellar_output(stellar_gold_path(segment_overlap), reference, std::ios::binary); auto actual = valik::read_stellar_output("consolidated.gff", reference); - compare_consolidation_out(expected, actual); + compare_gff_out(expected, actual); } INSTANTIATE_TEST_SUITE_P(consolidation_suite, diff --git a/test/data/consolidate/16bins50overlap_dream_all.gff.out b/test/data/consolidate/16bins50overlap_dream_all.gff.out deleted file mode 100644 index 5ad4a575..00000000 --- a/test/data/consolidate/16bins50overlap_dream_all.gff.out +++ /dev/null @@ -1,672 +0,0 @@ -I/O options: - database file : multi_seq_ref.fasta - query file : /tmp/valik/stellar_call_pcxc7t/query_15_0.fasta - alphabet : dna5 - output file : /tmp/valik/stellar_call_pcxc7t/query_15_0.fasta.gff - output format : gff - -User specified parameters: - minimal match length : 50 - maximal error rate (epsilon) : 0.06 = (3/50) - maximal x-drop : 5 - search forward strand : yes - search reverse complement : yes - - verification strategy : exact - maximal number of matches : 50 - duplicate removal every : 500 - threads : 1 - -Calculated parameters: - k-mer length : 12 - s^min : 12 - threshold : 3 - distance cut : 50 - delta : 16 - overlap : 3 - -Loaded 1 query sequence. -Loaded sequence 3. - -All matches resulting from your search have an E-value of: - 6.4259e-16 or smaller (match score = 1, error penalty = -2) - -Constructing index... - -Aligning all query sequences to database sequence... - 3 - 3, complement - -# Eps-matches : 1 - - -I/O options: - database file : multi_seq_ref.fasta - query file : /tmp/valik/stellar_call_pcxc7t/query_14_0.fasta - alphabet : dna5 - output file : /tmp/valik/stellar_call_pcxc7t/query_14_0.fasta.gff - output format : gff - -User specified parameters: - minimal match length : 50 - maximal error rate (epsilon) : 0.06 = (3/50) - maximal x-drop : 5 - search forward strand : yes - search reverse complement : yes - - verification strategy : exact - maximal number of matches : 50 - duplicate removal every : 500 - threads : 1 - -Calculated parameters: - k-mer length : 12 - s^min : 12 - threshold : 3 - distance cut : 50 - delta : 16 - overlap : 3 - -Loaded 1 query sequence. -Loaded sequence 3. - -All matches resulting from your search have an E-value of: - 6.4259e-16 or smaller (match score = 1, error penalty = -2) - -Constructing index... - -Aligning all query sequences to database sequence... - 3 - 3, complement - -# Eps-matches : 1 - - -I/O options: - database file : multi_seq_ref.fasta - query file : /tmp/valik/stellar_call_pcxc7t/query_13_0.fasta - alphabet : dna5 - output file : /tmp/valik/stellar_call_pcxc7t/query_13_0.fasta.gff - output format : gff - -User specified parameters: - minimal match length : 50 - maximal error rate (epsilon) : 0.06 = (3/50) - maximal x-drop : 5 - search forward strand : yes - search reverse complement : yes - - verification strategy : exact - maximal number of matches : 50 - duplicate removal every : 500 - threads : 1 - -Calculated parameters: - k-mer length : 12 - s^min : 12 - threshold : 3 - distance cut : 50 - delta : 16 - overlap : 3 - -Loaded 1 query sequence. -Loaded sequence 3. - -All matches resulting from your search have an E-value of: - 6.4259e-16 or smaller (match score = 1, error penalty = -2) - -Constructing index... - -Aligning all query sequences to database sequence... - 3 - 3, complement - -# Eps-matches : 1 - - -I/O options: - database file : multi_seq_ref.fasta - query file : /tmp/valik/stellar_call_pcxc7t/query_12_0.fasta - alphabet : dna5 - output file : /tmp/valik/stellar_call_pcxc7t/query_12_0.fasta.gff - output format : gff - -User specified parameters: - minimal match length : 50 - maximal error rate (epsilon) : 0.06 = (3/50) - maximal x-drop : 5 - search forward strand : yes - search reverse complement : yes - - verification strategy : exact - maximal number of matches : 50 - duplicate removal every : 500 - threads : 1 - -Calculated parameters: - k-mer length : 12 - s^min : 12 - threshold : 3 - distance cut : 50 - delta : 16 - overlap : 3 - -Loaded 1 query sequence. -Loaded sequence 2. - -All matches resulting from your search have an E-value of: - 6.4259e-16 or smaller (match score = 1, error penalty = -2) - -Constructing index... - -Aligning all query sequences to database sequence... - 2 - 2, complement - -# Eps-matches : 0 - - -I/O options: - database file : multi_seq_ref.fasta - query file : /tmp/valik/stellar_call_pcxc7t/query_11_0.fasta - alphabet : dna5 - output file : /tmp/valik/stellar_call_pcxc7t/query_11_0.fasta.gff - output format : gff - -User specified parameters: - minimal match length : 50 - maximal error rate (epsilon) : 0.06 = (3/50) - maximal x-drop : 5 - search forward strand : yes - search reverse complement : yes - - verification strategy : exact - maximal number of matches : 50 - duplicate removal every : 500 - threads : 1 - -Calculated parameters: - k-mer length : 12 - s^min : 12 - threshold : 3 - distance cut : 50 - delta : 16 - overlap : 3 - -Loaded 1 query sequence. -Loaded sequence 2. - -All matches resulting from your search have an E-value of: - 6.4259e-16 or smaller (match score = 1, error penalty = -2) - -Constructing index... - -Aligning all query sequences to database sequence... - 2 - 2, complement - -# Eps-matches : 1 - - -I/O options: - database file : multi_seq_ref.fasta - query file : /tmp/valik/stellar_call_pcxc7t/query_10_0.fasta - alphabet : dna5 - output file : /tmp/valik/stellar_call_pcxc7t/query_10_0.fasta.gff - output format : gff - -User specified parameters: - minimal match length : 50 - maximal error rate (epsilon) : 0.06 = (3/50) - maximal x-drop : 5 - search forward strand : yes - search reverse complement : yes - - verification strategy : exact - maximal number of matches : 50 - duplicate removal every : 500 - threads : 1 - -Calculated parameters: - k-mer length : 12 - s^min : 12 - threshold : 3 - distance cut : 50 - delta : 16 - overlap : 3 - -Loaded 1 query sequence. -Loaded sequence 2. - -All matches resulting from your search have an E-value of: - 6.4259e-16 or smaller (match score = 1, error penalty = -2) - -Constructing index... - -Aligning all query sequences to database sequence... - 2 - 2, complement - -# Eps-matches : 1 - - -I/O options: - database file : multi_seq_ref.fasta - query file : /tmp/valik/stellar_call_pcxc7t/query_9_0.fasta - alphabet : dna5 - output file : /tmp/valik/stellar_call_pcxc7t/query_9_0.fasta.gff - output format : gff - -User specified parameters: - minimal match length : 50 - maximal error rate (epsilon) : 0.06 = (3/50) - maximal x-drop : 5 - search forward strand : yes - search reverse complement : yes - - verification strategy : exact - maximal number of matches : 50 - duplicate removal every : 500 - threads : 1 - -Calculated parameters: - k-mer length : 12 - s^min : 12 - threshold : 3 - distance cut : 50 - delta : 16 - overlap : 3 - -Loaded 1 query sequence. -Loaded sequence 2. - -All matches resulting from your search have an E-value of: - 6.4259e-16 or smaller (match score = 1, error penalty = -2) - -Constructing index... - -Aligning all query sequences to database sequence... - 2 - 2, complement - -# Eps-matches : 3 - - -I/O options: - database file : multi_seq_ref.fasta - query file : /tmp/valik/stellar_call_pcxc7t/query_8_0.fasta - alphabet : dna5 - output file : /tmp/valik/stellar_call_pcxc7t/query_8_0.fasta.gff - output format : gff - -User specified parameters: - minimal match length : 50 - maximal error rate (epsilon) : 0.06 = (3/50) - maximal x-drop : 5 - search forward strand : yes - search reverse complement : yes - - verification strategy : exact - maximal number of matches : 50 - duplicate removal every : 500 - threads : 1 - -Calculated parameters: - k-mer length : 12 - s^min : 12 - threshold : 3 - distance cut : 50 - delta : 16 - overlap : 3 - -Loaded 1 query sequence. -Loaded sequence 2. - -All matches resulting from your search have an E-value of: - 6.4259e-16 or smaller (match score = 1, error penalty = -2) - -Constructing index... - -Aligning all query sequences to database sequence... - 2 - 2, complement - -# Eps-matches : 4 - - -I/O options: - database file : multi_seq_ref.fasta - query file : /tmp/valik/stellar_call_pcxc7t/query_7_0.fasta - alphabet : dna5 - output file : /tmp/valik/stellar_call_pcxc7t/query_7_0.fasta.gff - output format : gff - -User specified parameters: - minimal match length : 50 - maximal error rate (epsilon) : 0.06 = (3/50) - maximal x-drop : 5 - search forward strand : yes - search reverse complement : yes - - verification strategy : exact - maximal number of matches : 50 - duplicate removal every : 500 - threads : 1 - -Calculated parameters: - k-mer length : 12 - s^min : 12 - threshold : 3 - distance cut : 50 - delta : 16 - overlap : 3 - -Loaded 1 query sequence. -Loaded sequence 2. - -All matches resulting from your search have an E-value of: - 6.4259e-16 or smaller (match score = 1, error penalty = -2) - -Constructing index... - -Aligning all query sequences to database sequence... - 2 - 2, complement - -# Eps-matches : 2 - - -I/O options: - database file : multi_seq_ref.fasta - query file : /tmp/valik/stellar_call_pcxc7t/query_6_0.fasta - alphabet : dna5 - output file : /tmp/valik/stellar_call_pcxc7t/query_6_0.fasta.gff - output format : gff - -User specified parameters: - minimal match length : 50 - maximal error rate (epsilon) : 0.06 = (3/50) - maximal x-drop : 5 - search forward strand : yes - search reverse complement : yes - - verification strategy : exact - maximal number of matches : 50 - duplicate removal every : 500 - threads : 1 - -Calculated parameters: - k-mer length : 12 - s^min : 12 - threshold : 3 - distance cut : 50 - delta : 16 - overlap : 3 - -Loaded 1 query sequence. -Loaded sequence 1. - -All matches resulting from your search have an E-value of: - 6.4259e-16 or smaller (match score = 1, error penalty = -2) - -Constructing index... - -Aligning all query sequences to database sequence... - 1 - 1, complement - -# Eps-matches : 1 - - -I/O options: - database file : multi_seq_ref.fasta - query file : /tmp/valik/stellar_call_pcxc7t/query_5_0.fasta - alphabet : dna5 - output file : /tmp/valik/stellar_call_pcxc7t/query_5_0.fasta.gff - output format : gff - -User specified parameters: - minimal match length : 50 - maximal error rate (epsilon) : 0.06 = (3/50) - maximal x-drop : 5 - search forward strand : yes - search reverse complement : yes - - verification strategy : exact - maximal number of matches : 50 - duplicate removal every : 500 - threads : 1 - -Calculated parameters: - k-mer length : 12 - s^min : 12 - threshold : 3 - distance cut : 50 - delta : 16 - overlap : 3 - -Loaded 1 query sequence. -Loaded sequence 1. - -All matches resulting from your search have an E-value of: - 6.4259e-16 or smaller (match score = 1, error penalty = -2) - -Constructing index... - -Aligning all query sequences to database sequence... - 1 - 1, complement - -# Eps-matches : 2 - - -I/O options: - database file : multi_seq_ref.fasta - query file : /tmp/valik/stellar_call_pcxc7t/query_4_0.fasta - alphabet : dna5 - output file : /tmp/valik/stellar_call_pcxc7t/query_4_0.fasta.gff - output format : gff - -User specified parameters: - minimal match length : 50 - maximal error rate (epsilon) : 0.06 = (3/50) - maximal x-drop : 5 - search forward strand : yes - search reverse complement : yes - - verification strategy : exact - maximal number of matches : 50 - duplicate removal every : 500 - threads : 1 - -Calculated parameters: - k-mer length : 12 - s^min : 12 - threshold : 3 - distance cut : 50 - delta : 16 - overlap : 3 - -Loaded 1 query sequence. -Loaded sequence 1. - -All matches resulting from your search have an E-value of: - 6.4259e-16 or smaller (match score = 1, error penalty = -2) - -Constructing index... - -Aligning all query sequences to database sequence... - 1 - 1, complement - -# Eps-matches : 2 - - -I/O options: - database file : multi_seq_ref.fasta - query file : /tmp/valik/stellar_call_pcxc7t/query_3_0.fasta - alphabet : dna5 - output file : /tmp/valik/stellar_call_pcxc7t/query_3_0.fasta.gff - output format : gff - -User specified parameters: - minimal match length : 50 - maximal error rate (epsilon) : 0.06 = (3/50) - maximal x-drop : 5 - search forward strand : yes - search reverse complement : yes - - verification strategy : exact - maximal number of matches : 50 - duplicate removal every : 500 - threads : 1 - -Calculated parameters: - k-mer length : 12 - s^min : 12 - threshold : 3 - distance cut : 50 - delta : 16 - overlap : 3 - -Loaded 1 query sequence. -Loaded sequence 1. - -All matches resulting from your search have an E-value of: - 6.4259e-16 or smaller (match score = 1, error penalty = -2) - -Constructing index... - -Aligning all query sequences to database sequence... - 1 - 1, complement - -# Eps-matches : 3 - - -I/O options: - database file : multi_seq_ref.fasta - query file : /tmp/valik/stellar_call_pcxc7t/query_2_0.fasta - alphabet : dna5 - output file : /tmp/valik/stellar_call_pcxc7t/query_2_0.fasta.gff - output format : gff - -User specified parameters: - minimal match length : 50 - maximal error rate (epsilon) : 0.06 = (3/50) - maximal x-drop : 5 - search forward strand : yes - search reverse complement : yes - - verification strategy : exact - maximal number of matches : 50 - duplicate removal every : 500 - threads : 1 - -Calculated parameters: - k-mer length : 12 - s^min : 12 - threshold : 3 - distance cut : 50 - delta : 16 - overlap : 3 - -Loaded 1 query sequence. -Loaded sequence 1. - -All matches resulting from your search have an E-value of: - 6.4259e-16 or smaller (match score = 1, error penalty = -2) - -Constructing index... - -Aligning all query sequences to database sequence... - 1 - 1, complement - -# Eps-matches : 3 - - -I/O options: - database file : multi_seq_ref.fasta - query file : /tmp/valik/stellar_call_pcxc7t/query_1_0.fasta - alphabet : dna5 - output file : /tmp/valik/stellar_call_pcxc7t/query_1_0.fasta.gff - output format : gff - -User specified parameters: - minimal match length : 50 - maximal error rate (epsilon) : 0.06 = (3/50) - maximal x-drop : 5 - search forward strand : yes - search reverse complement : yes - - verification strategy : exact - maximal number of matches : 50 - duplicate removal every : 500 - threads : 1 - -Calculated parameters: - k-mer length : 12 - s^min : 12 - threshold : 3 - distance cut : 50 - delta : 16 - overlap : 3 - -Loaded 1 query sequence. -Loaded sequence 1. - -All matches resulting from your search have an E-value of: - 6.4259e-16 or smaller (match score = 1, error penalty = -2) - -Constructing index... - -Aligning all query sequences to database sequence... - 1 - 1, complement - -# Eps-matches : 4 - - -I/O options: - database file : multi_seq_ref.fasta - query file : /tmp/valik/stellar_call_pcxc7t/query_0_0.fasta - alphabet : dna5 - output file : /tmp/valik/stellar_call_pcxc7t/query_0_0.fasta.gff - output format : gff - -User specified parameters: - minimal match length : 50 - maximal error rate (epsilon) : 0.06 = (3/50) - maximal x-drop : 5 - search forward strand : yes - search reverse complement : yes - - verification strategy : exact - maximal number of matches : 50 - duplicate removal every : 500 - threads : 1 - -Calculated parameters: - k-mer length : 12 - s^min : 12 - threshold : 3 - distance cut : 50 - delta : 16 - overlap : 3 - -Loaded 1 query sequence. -Loaded sequence 1. - -All matches resulting from your search have an E-value of: - 6.4259e-16 or smaller (match score = 1, error penalty = -2) - -Constructing index... - -Aligning all query sequences to database sequence... - 1 - 1, complement - -# Eps-matches : 2 - - diff --git a/test/data/consolidate/8bins50overlap_dream_all.gff.out b/test/data/consolidate/8bins50overlap_dream_all.gff.out deleted file mode 100644 index aa899a4b..00000000 --- a/test/data/consolidate/8bins50overlap_dream_all.gff.out +++ /dev/null @@ -1,336 +0,0 @@ -I/O options: - database file : multi_seq_ref.fasta - query file : /tmp/valik/stellar_call_iIVcaz/query_7_0.fasta - alphabet : dna5 - output file : /tmp/valik/stellar_call_iIVcaz/query_7_0.fasta.gff - output format : gff - -User specified parameters: - minimal match length : 50 - maximal error rate (epsilon) : 0.06 = (3/50) - maximal x-drop : 5 - search forward strand : yes - search reverse complement : yes - - verification strategy : exact - maximal number of matches : 50 - duplicate removal every : 500 - threads : 1 - -Calculated parameters: - k-mer length : 12 - s^min : 12 - threshold : 3 - distance cut : 50 - delta : 16 - overlap : 3 - -Loaded 1 query sequence. -Loaded sequence 3. - -All matches resulting from your search have an E-value of: - 6.4259e-16 or smaller (match score = 1, error penalty = -2) - -Constructing index... - -Aligning all query sequences to database sequence... - 3 - 3, complement - -# Eps-matches : 1 - - -I/O options: - database file : multi_seq_ref.fasta - query file : /tmp/valik/stellar_call_iIVcaz/query_6_0.fasta - alphabet : dna5 - output file : /tmp/valik/stellar_call_iIVcaz/query_6_0.fasta.gff - output format : gff - -User specified parameters: - minimal match length : 50 - maximal error rate (epsilon) : 0.06 = (3/50) - maximal x-drop : 5 - search forward strand : yes - search reverse complement : yes - - verification strategy : exact - maximal number of matches : 50 - duplicate removal every : 500 - threads : 1 - -Calculated parameters: - k-mer length : 12 - s^min : 12 - threshold : 3 - distance cut : 50 - delta : 16 - overlap : 3 - -Loaded 1 query sequence. -Loaded sequence 2. - -All matches resulting from your search have an E-value of: - 6.4259e-16 or smaller (match score = 1, error penalty = -2) - -Constructing index... - -Aligning all query sequences to database sequence... - 2 - 2, complement - -# Eps-matches : 1 - - -I/O options: - database file : multi_seq_ref.fasta - query file : /tmp/valik/stellar_call_iIVcaz/query_5_0.fasta - alphabet : dna5 - output file : /tmp/valik/stellar_call_iIVcaz/query_5_0.fasta.gff - output format : gff - -User specified parameters: - minimal match length : 50 - maximal error rate (epsilon) : 0.06 = (3/50) - maximal x-drop : 5 - search forward strand : yes - search reverse complement : yes - - verification strategy : exact - maximal number of matches : 50 - duplicate removal every : 500 - threads : 1 - -Calculated parameters: - k-mer length : 12 - s^min : 12 - threshold : 3 - distance cut : 50 - delta : 16 - overlap : 3 - -Loaded 1 query sequence. -Loaded sequence 2. - -All matches resulting from your search have an E-value of: - 6.4259e-16 or smaller (match score = 1, error penalty = -2) - -Constructing index... - -Aligning all query sequences to database sequence... - 2 - 2, complement - -# Eps-matches : 3 - - -I/O options: - database file : multi_seq_ref.fasta - query file : /tmp/valik/stellar_call_iIVcaz/query_4_0.fasta - alphabet : dna5 - output file : /tmp/valik/stellar_call_iIVcaz/query_4_0.fasta.gff - output format : gff - -User specified parameters: - minimal match length : 50 - maximal error rate (epsilon) : 0.06 = (3/50) - maximal x-drop : 5 - search forward strand : yes - search reverse complement : yes - - verification strategy : exact - maximal number of matches : 50 - duplicate removal every : 500 - threads : 1 - -Calculated parameters: - k-mer length : 12 - s^min : 12 - threshold : 3 - distance cut : 50 - delta : 16 - overlap : 3 - -Loaded 1 query sequence. -Loaded sequence 2. - -All matches resulting from your search have an E-value of: - 6.4259e-16 or smaller (match score = 1, error penalty = -2) - -Constructing index... - -Aligning all query sequences to database sequence... - 2 - 2, complement - -# Eps-matches : 4 - - -I/O options: - database file : multi_seq_ref.fasta - query file : /tmp/valik/stellar_call_iIVcaz/query_3_0.fasta - alphabet : dna5 - output file : /tmp/valik/stellar_call_iIVcaz/query_3_0.fasta.gff - output format : gff - -User specified parameters: - minimal match length : 50 - maximal error rate (epsilon) : 0.06 = (3/50) - maximal x-drop : 5 - search forward strand : yes - search reverse complement : yes - - verification strategy : exact - maximal number of matches : 50 - duplicate removal every : 500 - threads : 1 - -Calculated parameters: - k-mer length : 12 - s^min : 12 - threshold : 3 - distance cut : 50 - delta : 16 - overlap : 3 - -Loaded 1 query sequence. -Loaded sequence 1. - -All matches resulting from your search have an E-value of: - 6.4259e-16 or smaller (match score = 1, error penalty = -2) - -Constructing index... - -Aligning all query sequences to database sequence... - 1 - 1, complement - -# Eps-matches : 1 - - -I/O options: - database file : multi_seq_ref.fasta - query file : /tmp/valik/stellar_call_iIVcaz/query_2_0.fasta - alphabet : dna5 - output file : /tmp/valik/stellar_call_iIVcaz/query_2_0.fasta.gff - output format : gff - -User specified parameters: - minimal match length : 50 - maximal error rate (epsilon) : 0.06 = (3/50) - maximal x-drop : 5 - search forward strand : yes - search reverse complement : yes - - verification strategy : exact - maximal number of matches : 50 - duplicate removal every : 500 - threads : 1 - -Calculated parameters: - k-mer length : 12 - s^min : 12 - threshold : 3 - distance cut : 50 - delta : 16 - overlap : 3 - -Loaded 1 query sequence. -Loaded sequence 1. - -All matches resulting from your search have an E-value of: - 6.4259e-16 or smaller (match score = 1, error penalty = -2) - -Constructing index... - -Aligning all query sequences to database sequence... - 1 - 1, complement - -# Eps-matches : 2 - - -I/O options: - database file : multi_seq_ref.fasta - query file : /tmp/valik/stellar_call_iIVcaz/query_1_0.fasta - alphabet : dna5 - output file : /tmp/valik/stellar_call_iIVcaz/query_1_0.fasta.gff - output format : gff - -User specified parameters: - minimal match length : 50 - maximal error rate (epsilon) : 0.06 = (3/50) - maximal x-drop : 5 - search forward strand : yes - search reverse complement : yes - - verification strategy : exact - maximal number of matches : 50 - duplicate removal every : 500 - threads : 1 - -Calculated parameters: - k-mer length : 12 - s^min : 12 - threshold : 3 - distance cut : 50 - delta : 16 - overlap : 3 - -Loaded 1 query sequence. -Loaded sequence 1. - -All matches resulting from your search have an E-value of: - 6.4259e-16 or smaller (match score = 1, error penalty = -2) - -Constructing index... - -Aligning all query sequences to database sequence... - 1 - 1, complement - -# Eps-matches : 4 - - -I/O options: - database file : multi_seq_ref.fasta - query file : /tmp/valik/stellar_call_iIVcaz/query_0_0.fasta - alphabet : dna5 - output file : /tmp/valik/stellar_call_iIVcaz/query_0_0.fasta.gff - output format : gff - -User specified parameters: - minimal match length : 50 - maximal error rate (epsilon) : 0.06 = (3/50) - maximal x-drop : 5 - search forward strand : yes - search reverse complement : yes - - verification strategy : exact - maximal number of matches : 50 - duplicate removal every : 500 - threads : 1 - -Calculated parameters: - k-mer length : 12 - s^min : 12 - threshold : 3 - distance cut : 50 - delta : 16 - overlap : 3 - -Loaded 1 query sequence. -Loaded sequence 1. - -All matches resulting from your search have an E-value of: - 6.4259e-16 or smaller (match score = 1, error penalty = -2) - -Constructing index... - -Aligning all query sequences to database sequence... - 1 - 1, complement - -# Eps-matches : 4 - - diff --git a/test/data/consolidate/cli_test_input.sh b/test/data/consolidate/cli_test_input.sh index a535f9e9..b07bd590 100755 --- a/test/data/consolidate/cli_test_input.sh +++ b/test/data/consolidate/cli_test_input.sh @@ -16,21 +16,22 @@ query_file=query_e${errRate}.fasta for minLen in 50 do - stellar -e $errRate -l $minLen -v --suppress-runtime-printing -o ${minLen}overlap_full.gff $ref_file $query_file > /dev/null + stellar -e $errRate -l $minLen -v -o ${minLen}overlap_full.gff $ref_file $query_file > /dev/null for bin in 8 16 do - valik split $ref_file --reference-output ${bin}bins${minLen}overlap_reference_metadata.tsv \ - --segment-output ${bin}bins${minLen}overlap_segment_metadata.tsv \ + valik split $ref_file --ref-meta ${bin}bins${minLen}overlap_reference_metadata.tsv \ + --seg-meta ${bin}bins${minLen}overlap_segment_metadata.tsv \ --bins $bin --overlap $minLen - valik build --from-segments $ref_file --seg-path ${bin}bins${minLen}overlap_segment_metadata.tsv \ + valik build --from-segments $ref_file --seg-meta ${bin}bins${minLen}overlap_segment_metadata.tsv \ --ref-meta ${bin}bins${minLen}overlap_reference_metadata.tsv \ --window 15 --kmer 13 --output ${bin}index.ibf --size 10k errors=$(echo "($errRate*$minLen+0.5)/1;" | bc) valik search --index ${bin}index.ibf --query $query_file --pattern $minLen --error $errors \ - --output ${bin}bins${minLen}overlap_dream_all.gff --seg-path ${bin}bins${minLen}overlap_segment_metadata.tsv + --output ${bin}bins${minLen}overlap_dream_all.gff --ref-meta ${bin}bins${minLen}overlap_reference_metadata.tsv \ + --seg-meta ${bin}bins${minLen}overlap_segment_metadata.tsv rm ${bin}index.ibf rm ${bin}bins${minLen}overlap_segment_metadata.tsv diff --git a/test/data/create_output.sh b/test/data/create_output.sh index bc8d8eed..ce89ba53 100755 --- a/test/data/create_output.sh +++ b/test/data/create_output.sh @@ -23,4 +23,10 @@ echo "### Running valik search ###" echo "### Running valik consolidate ###" ./consolidate/cli_test_output.sh +export VALIK_STELLAR=stellar +export VALIK_MERGE=cat + +echo "### Running distributed DREAM-Stellar ###" +./dream/cli_test_output.sh + echo "### Finished ###" diff --git a/test/data/datasources.cmake b/test/data/datasources.cmake index 45d215c5..f37d9ce9 100644 --- a/test/data/datasources.cmake +++ b/test/data/datasources.cmake @@ -192,9 +192,6 @@ declare_datasource (FILE query.fq declare_datasource (FILE 16bins50overlap_dream_all.gff URL ${CMAKE_SOURCE_DIR}/test/data/consolidate/16bins50overlap_dream_all.gff URL_HASH SHA256=e8f69893d25e86d51399b413a21d6885e4928c3bc1622244c789b30eb4016206) -declare_datasource (FILE 16bins50overlap_dream_all.gff.out - URL ${CMAKE_SOURCE_DIR}/test/data/consolidate/16bins50overlap_dream_all.gff.out - URL_HASH SHA256=6e469a784d3b6ce3e1ac3d60e7d504431fb4342e66d6b4e514f95e058496d5bb) declare_datasource (FILE 16bins50overlap_dream_consolidated.gff URL ${CMAKE_SOURCE_DIR}/test/data/consolidate/16bins50overlap_dream_consolidated.gff URL_HASH SHA256=545709097b8e55a685c132933c3363558c62d4ed1b71892f13de61f35d81c39b) @@ -207,9 +204,6 @@ declare_datasource (FILE 50overlap_full.gff declare_datasource (FILE 8bins50overlap_dream_all.gff URL ${CMAKE_SOURCE_DIR}/test/data/consolidate/8bins50overlap_dream_all.gff URL_HASH SHA256=8786f8b22517bc35b28e3a0406450e6d782a3c47d6ce2ebe4424b1f8a0382fe4) -declare_datasource (FILE 8bins50overlap_dream_all.gff.out - URL ${CMAKE_SOURCE_DIR}/test/data/consolidate/8bins50overlap_dream_all.gff.out - URL_HASH SHA256=c66dc056d5d490a611733cb1d7fae89d785f7fdcd5048e5bbd202eac4afec5e7) declare_datasource (FILE 8bins50overlap_dream_consolidated.gff URL ${CMAKE_SOURCE_DIR}/test/data/consolidate/8bins50overlap_dream_consolidated.gff URL_HASH SHA256=545709097b8e55a685c132933c3363558c62d4ed1b71892f13de61f35d81c39b) @@ -222,3 +216,59 @@ declare_datasource (FILE multi_seq_ref.fasta declare_datasource (FILE query_e0.06.fasta URL ${CMAKE_SOURCE_DIR}/test/data/consolidate/query_e0.06.fasta URL_HASH SHA256=b107238db9e0c0515b33fff570a787a86126789a0341bd2fdb52c6c607772f8d) + + +declare_datasource (FILE 16bins13window1error.gff + URL ${CMAKE_SOURCE_DIR}/test/data/dream/16bins13window1error.gff + URL_HASH SHA256=ae836328a087bfd08fbf03844798ef1f5dc8bfba52a188c08d35f1d755a99ac3) +declare_datasource (FILE 16bins13window1error.gff.out + URL ${CMAKE_SOURCE_DIR}/test/data/dream/16bins13window1error.gff.out + URL_HASH SHA256=beaacc34bd952d8c754f9ab2539bde5ca4683424ba131161c47e4bd0f5cc4f9b) +declare_datasource (FILE 16bins13window.ibf + URL ${CMAKE_SOURCE_DIR}/test/data/dream/16bins13window.ibf + URL_HASH SHA256=0875cd2d90d1320f93c575f3197e7879a5b8e51c5c11de76975561d30e4cf324) +declare_datasource (FILE 16bins15window1error.gff + URL ${CMAKE_SOURCE_DIR}/test/data/dream/16bins15window1error.gff + URL_HASH SHA256=3581ca9b126e98991372990e0599f956430332e09c17aa07e8b607639441fbc2) +declare_datasource (FILE 16bins15window1error.gff.out + URL ${CMAKE_SOURCE_DIR}/test/data/dream/16bins15window1error.gff.out + URL_HASH SHA256=27a5d4e972258d8c286aa78d5fce46d559f7263b4d5b2b32ee3ba65ff342e7bf) +declare_datasource (FILE 16bins15window.ibf + URL ${CMAKE_SOURCE_DIR}/test/data/dream/16bins15window.ibf + URL_HASH SHA256=7f1ce2bbdf8d657da29d39879ab23c68cb19dbe0b58c69a9c5a576f6528ad24c) +declare_datasource (FILE 4bins13window1error.gff + URL ${CMAKE_SOURCE_DIR}/test/data/dream/4bins13window1error.gff + URL_HASH SHA256=00466ddefd51171c59d7e98fab1c1735758ae741f55f2731223b683ba6d667ab) +declare_datasource (FILE 4bins13window1error.gff.out + URL ${CMAKE_SOURCE_DIR}/test/data/dream/4bins13window1error.gff.out + URL_HASH SHA256=7a9eb5a96dc362ca16b28475fda5000b2d60f065209f9653e42473faa58041b9) +declare_datasource (FILE 4bins13window.ibf + URL ${CMAKE_SOURCE_DIR}/test/data/dream/4bins13window.ibf + URL_HASH SHA256=b08ec3c196dc45faf73c24b86113e2c89adaf3d1844799d646e25dc0e77ac6bb) +declare_datasource (FILE 4bins15window1error.gff + URL ${CMAKE_SOURCE_DIR}/test/data/dream/4bins15window1error.gff + URL_HASH SHA256=00466ddefd51171c59d7e98fab1c1735758ae741f55f2731223b683ba6d667ab) +declare_datasource (FILE 4bins15window1error.gff.out + URL ${CMAKE_SOURCE_DIR}/test/data/dream/4bins15window1error.gff.out + URL_HASH SHA256=7a9eb5a96dc362ca16b28475fda5000b2d60f065209f9653e42473faa58041b9) +declare_datasource (FILE 4bins15window.ibf + URL ${CMAKE_SOURCE_DIR}/test/data/dream/4bins15window.ibf + URL_HASH SHA256=8b584d0e55043b3cc0835674dc83f5a7db6143645071f49973a1d085ac4fb919) +declare_datasource (FILE dummy_reads.fastq + URL ${CMAKE_SOURCE_DIR}/test/data/dream/dummy_reads.fastq + URL_HASH SHA256=f1aa9ca0fb0b87393923848f0389cc3fb5cfd4841566afaf72e6c55829b64d73) +declare_datasource (FILE query.fastq + URL ${CMAKE_SOURCE_DIR}/test/data/dream/query.fastq + URL_HASH SHA256=f6df8e312ed67e8044ae2c495259f3bf1eff7a7293b33a6b2d05203218f9dc0c) +declare_datasource (FILE ref.fasta + URL ${CMAKE_SOURCE_DIR}/test/data/dream/ref.fasta + URL_HASH SHA256=30ed460bfe4838a6ce3e97dff22b42c9312b0c801c8d671dbe82f6abca265f4e) +declare_datasource (FILE ref_meta.txt + URL ${CMAKE_SOURCE_DIR}/test/data/dream/ref_meta.txt + URL_HASH SHA256=cfaea330c4abde12e75cec5ae8b74ffd985d2b1d4ad1620b72e064f17488e1d5) +declare_datasource (FILE seg_meta150overlap16bins.txt + URL ${CMAKE_SOURCE_DIR}/test/data/dream/seg_meta150overlap16bins.txt + URL_HASH SHA256=b19d2082c26c72fa58af17d1e0b8220dbf13f71a1aa0aafc145c76bfbd366d1e) +declare_datasource (FILE seg_meta150overlap4bins.txt + URL ${CMAKE_SOURCE_DIR}/test/data/dream/seg_meta150overlap4bins.txt + URL_HASH SHA256=fb1ebef2e9d5a75272ec11f20d4b3a77b9b647bbdbabaf7c4d9f8cfbd2ff5f31) diff --git a/test/data/dream/16bins13window.ibf b/test/data/dream/16bins13window.ibf new file mode 100644 index 00000000..057c65be Binary files /dev/null and b/test/data/dream/16bins13window.ibf differ diff --git a/test/data/dream/16bins13window1error.gff b/test/data/dream/16bins13window1error.gff new file mode 100644 index 00000000..6ce8ddd1 --- /dev/null +++ b/test/data/dream/16bins13window1error.gff @@ -0,0 +1,102 @@ +chr1 Stellar eps-matches 559 708 98 + . 0;seq2Range=1,150;eValue=6.18503e-69;cigar=150M;mutations=15G,56C,113T +chr1 Stellar eps-matches 654 803 98 + . 2;seq2Range=1,150;eValue=6.18503e-69;cigar=150M;mutations=16G,23C,67A +chr1 Stellar eps-matches 283 432 98 + . 3;seq2Range=1,150;eValue=6.18503e-69;cigar=150M;mutations=63C,113A,139G +chr3 Stellar eps-matches 1375 1524 98 + . 0;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=87C,113A,136A +chr3 Stellar eps-matches 841 989 98 + . 1;seq2Range=1,150;eValue=1.85121e-68;cigar=2M1I147M;mutations=3G,103G,124T +chr3 Stellar eps-matches 753 902 98 - . 9;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=56G,79A,94G +chr2 Stellar eps-matches 654 803 98 + . 2;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=16G,23C,67A +chr2 Stellar eps-matches 841 989 98 + . 1;seq2Range=1,150;eValue=1.40284e-68;cigar=1M1I148M;mutations=2G,103G,124T +chr2 Stellar eps-matches 753 902 98 - . 9;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=56G,79A,94G +chr3 Stellar eps-matches 94 243 98 + . 4;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=45A,105C,139C +chr3 Stellar eps-matches 122 271 98 - . 6;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=47C,72T,74C +chr3 Stellar eps-matches 181 330 98 - . 8;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=53C,61T,111G +chr3 Stellar eps-matches 966 1115 98 + . 2;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=8A,16A,113C +chr3 Stellar eps-matches 1141 1290 98 + . 3;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=19G,49T,67T +chr3 Stellar eps-matches 1100 1249 98 - . 5;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=32G,74C,101C +chr2 Stellar eps-matches 1375 1524 98 + . 0;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=87C,113A,136A +chr2 Stellar eps-matches 1141 1290 98 + . 3;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=19G,49T,67T +chr2 Stellar eps-matches 1100 1249 98 - . 5;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=32G,74C,101C +chr2 Stellar eps-matches 966 1115 98 + . 2;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=8A,16A,113C +chr2 Stellar eps-matches 1141 1290 98 + . 3;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=19G,49T,67T +chr2 Stellar eps-matches 1100 1249 98 - . 5;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=32G,74C,101C +chr2 Stellar eps-matches 94 243 98 + . 4;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=45A,105C,139C +chr2 Stellar eps-matches 181 330 98 - . 8;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=53C,61T,111G +chr2 Stellar eps-matches 137 288 98.0263 - . 9;seq2Range=1,150;eValue=1.29834e-69;cigar=2D150M;mutations=111G +chr2 Stellar eps-matches 135 286 98.0263 - . 9;seq2Range=1,150;eValue=1.29834e-69;cigar=150M2D;mutations=111G +chr1 Stellar eps-matches 94 243 98 + . 4;seq2Range=1,150;eValue=6.18503e-69;cigar=150M;mutations=45A,105C,139C +chr1 Stellar eps-matches 181 330 98 - . 8;seq2Range=1,150;eValue=6.18503e-69;cigar=150M;mutations=53C,61T,111G +chr1 Stellar eps-matches 137 288 98.0263 - . 9;seq2Range=1,150;eValue=5.72428e-70;cigar=2D150M;mutations=111G +chr1 Stellar eps-matches 135 286 98.0263 - . 9;seq2Range=1,150;eValue=5.72428e-70;cigar=150M2D;mutations=111G +chr3 Stellar eps-matches 1842 1991 98 + . 0;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=87C,113A,136A +chr3 Stellar eps-matches 2154 2303 98 + . 2;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=16G,23C,67T +chr3 Stellar eps-matches 1954 2103 98 - . 6;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=12C,67G,93G +chr2 Stellar eps-matches 2170 2300 98.4732 + . 2;seq2Range=17,147;eValue=2.60601e-60;cigar=131M;mutations=7C,51T +chr2 Stellar eps-matches 2147 2296 98 + . 3;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=49T,121A,139G +chr2 Stellar eps-matches 1954 2103 98 - . 6;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=12C,67G,93G +chr3 Stellar eps-matches 1125 1274 98 + . 1;seq2Range=2,150;eValue=1.85121e-68;cigar=1M1D148M;mutations=67A,141T +chr3 Stellar eps-matches 858 1007 98 + . 4;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=3G,32G,69C +chr3 Stellar eps-matches 1128 1277 98 - . 7;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=45G,67C,124C +chr3 Stellar eps-matches 841 989 98 + . 1;seq2Range=1,150;eValue=1.85121e-68;cigar=1M1I148M;mutations=2G,103G,124T +chr3 Stellar eps-matches 858 1007 98 + . 4;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=3G,32G,69C +chr3 Stellar eps-matches 509 658 98 - . 8;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=46G,110T,119T +chr2 Stellar eps-matches 1125 1274 98 + . 1;seq2Range=2,150;eValue=1.40284e-68;cigar=1M1D148M;mutations=67A,141T +chr2 Stellar eps-matches 858 1007 98 + . 4;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=3G,32G,69C +chr2 Stellar eps-matches 1128 1277 98 - . 7;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=45G,67C,124C +chr2 Stellar eps-matches 841 989 98 + . 1;seq2Range=1,150;eValue=1.40284e-68;cigar=1M1I148M;mutations=2G,103G,124T +chr2 Stellar eps-matches 858 1007 98 + . 4;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=3G,32G,69C +chr2 Stellar eps-matches 509 658 98 - . 8;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=46G,110T,119T +chr1 Stellar eps-matches 841 989 98 + . 1;seq2Range=1,150;eValue=6.18503e-69;cigar=1M1I148M;mutations=2G,103G,124T +chr1 Stellar eps-matches 974 1023 98 + . 2;seq2Range=9,58;eValue=2.35246e-20;cigar=50M;mutations=8A +chr1 Stellar eps-matches 858 1007 98 + . 4;seq2Range=1,150;eValue=6.18503e-69;cigar=150M;mutations=3G,32G,69C +chr3 Stellar eps-matches 1375 1524 98 + . 0;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=87C,113A,136A +chr3 Stellar eps-matches 1842 1991 98 + . 0;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=87C,113A,136A +chr3 Stellar eps-matches 1775 1924 98 - . 5;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=12C,66A,101C +chr2 Stellar eps-matches 1842 1991 98 + . 0;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=87C,113A,136A +chr2 Stellar eps-matches 1954 2103 98 - . 6;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=12C,67G,93G +chr2 Stellar eps-matches 1775 1924 98 - . 5;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=12C,66A,101C +chr2 Stellar eps-matches 1125 1274 98 + . 1;seq2Range=2,150;eValue=1.40284e-68;cigar=1M1D148M;mutations=67A,141T +chr2 Stellar eps-matches 1128 1277 98 - . 7;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=45G,67C,124C +chr2 Stellar eps-matches 1601 1750 98 - . 7;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=46T,47C,79T +chr3 Stellar eps-matches 2147 2296 98 + . 3;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=49T,121A,139G +chr3 Stellar eps-matches 1775 1924 98 - . 5;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=12C,66A,101C +chr3 Stellar eps-matches 1601 1750 98 - . 7;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=46T,47C,79T +chr3 Stellar eps-matches 1003 1152 98 - . 8;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=53C,111G,120T +chr3 Stellar eps-matches 137 288 98.0263 - . 9;seq2Range=1,150;eValue=1.71331e-69;cigar=2D150M;mutations=111G +chr3 Stellar eps-matches 135 286 98.0263 - . 9;seq2Range=1,150;eValue=1.71331e-69;cigar=150M2D;mutations=111G +chr3 Stellar eps-matches 14 163 98 - . 6;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=72T,87T,90G +chr2 Stellar eps-matches 1601 1750 98 - . 7;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=46T,47C,79T +chr2 Stellar eps-matches 1003 1152 98 - . 8;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=53C,111G,120T +chr2 Stellar eps-matches 14 163 98 - . 6;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=72T,87T,90G +chr1 Stellar eps-matches 753 902 98 - . 9;seq2Range=1,150;eValue=6.18503e-69;cigar=150M;mutations=56G,79A,94G +chr1 Stellar eps-matches 509 658 98 - . 8;seq2Range=1,150;eValue=6.18503e-69;cigar=150M;mutations=46G,110T,119T +chr1 Stellar eps-matches 14 163 98 - . 6;seq2Range=1,150;eValue=6.18503e-69;cigar=150M;mutations=72T,87T,90G +chr3 Stellar eps-matches 2154 2303 98 + . 2;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=16G,23C,67T +chr3 Stellar eps-matches 2147 2296 98 + . 3;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=49T,121A,139G +chr3 Stellar eps-matches 2482 2631 98 - . 9;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=9C,79G,94G +chr1 Stellar eps-matches 654 803 98 + . 2;seq2Range=1,150;eValue=6.18503e-69;cigar=150M;mutations=16G,23C,67A +chr1 Stellar eps-matches 612 761 98 + . 4;seq2Range=1,150;eValue=6.18503e-69;cigar=150M;mutations=69G,74A,92G +chr1 Stellar eps-matches 753 902 98 - . 9;seq2Range=1,150;eValue=6.18503e-69;cigar=150M;mutations=56G,79A,94G +chr2 Stellar eps-matches 495 644 98 - . 5;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=14A,90G,124T +chr2 Stellar eps-matches 556 705 98 - . 7;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=110G,115G,123T +chr2 Stellar eps-matches 753 902 98 - . 9;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=56G,79A,94G +chr3 Stellar eps-matches 495 644 98 - . 5;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=14A,90G,124T +chr3 Stellar eps-matches 556 705 98 - . 7;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=110G,115G,123T +chr3 Stellar eps-matches 753 902 98 - . 9;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=56G,79A,94G +chr1 Stellar eps-matches 612 761 98 + . 4;seq2Range=1,150;eValue=6.18503e-69;cigar=150M;mutations=69G,74A,92G +chr1 Stellar eps-matches 495 644 98 - . 5;seq2Range=1,150;eValue=6.18503e-69;cigar=150M;mutations=14A,90G,124T +chr1 Stellar eps-matches 556 705 98 - . 7;seq2Range=1,150;eValue=6.18503e-69;cigar=150M;mutations=110G,115G,123T +chr1 Stellar eps-matches 11 160 98 + . 1;seq2Range=1,150;eValue=6.18503e-69;cigar=150M;mutations=68A,79T,142T +chr1 Stellar eps-matches 283 432 98 + . 3;seq2Range=1,150;eValue=6.18503e-69;cigar=150M;mutations=63C,113A,139G +chr1 Stellar eps-matches 122 271 98 - . 6;seq2Range=1,150;eValue=6.18503e-69;cigar=150M;mutations=47C,72T,74C +chr2 Stellar eps-matches 11 160 98 + . 1;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=68A,79T,142T +chr2 Stellar eps-matches 283 432 98 + . 3;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=63C,113A,139G +chr2 Stellar eps-matches 122 271 98 - . 6;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=47C,72T,74C +chr3 Stellar eps-matches 11 160 98 + . 1;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=68A,79T,142T +chr3 Stellar eps-matches 283 432 98 + . 3;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=63C,113A,139G +chr3 Stellar eps-matches 495 644 98 - . 5;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=14A,90G,124T +chr2 Stellar eps-matches 559 708 98 + . 0;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=15G,56C,113T +chr2 Stellar eps-matches 654 803 98 + . 2;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=16G,23C,67A +chr2 Stellar eps-matches 612 761 98 + . 4;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=69G,74A,92G +chr3 Stellar eps-matches 559 708 98 + . 0;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=15G,56C,113T +chr3 Stellar eps-matches 654 803 98 + . 2;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=16G,23C,67A +chr3 Stellar eps-matches 612 761 98 + . 4;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=69G,74A,92G diff --git a/test/data/dream/16bins13window1error.gff.out b/test/data/dream/16bins13window1error.gff.out new file mode 100644 index 00000000..23465312 --- /dev/null +++ b/test/data/dream/16bins13window1error.gff.out @@ -0,0 +1,1560 @@ +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_1_0.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_1_0.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr1. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr1 + chr1, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_11_0.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_11_0.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr3. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr3 + chr3, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_5_0.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_5_0.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr2. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr2 + chr2, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_9_0.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_9_0.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr3. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr3 + chr3, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_11_1.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_11_1.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr3. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr3 + chr3, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_6_0.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_6_0.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr2. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr2 + chr2, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_5_1.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_5_1.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr2. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr2 + chr2, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_3_0.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_3_0.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr2. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr2 + chr2, complement + +# Eps-matches : 4 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_0_0.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_0_0.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr1. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr1 + chr1, complement + +# Eps-matches : 4 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_13_0.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_13_0.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr3. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr3 + chr3, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_8_0.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_8_0.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr2. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr2 + chr2, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_11_2.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_11_2.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr3. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr3 + chr3, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_10_0.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_10_0.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr3. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr3 + chr3, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_5_2.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_5_2.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr2. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr2 + chr2, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_4_0.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_4_0.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr2. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr2 + chr2, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_2_0.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_2_0.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr1. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr1 + chr1, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_12_0.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_12_0.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr3. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr3 + chr3, complement + +# Eps-matches : 3 +WARNING: Non-unique query ids. Output can be ambiguous. + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_7_0.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_7_0.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr2. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr2 + chr2, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_6_1.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_6_1.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr2. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr2 + chr2, complement + +# Eps-matches : 3 +WARNING: Non-unique query ids. Output can be ambiguous. + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_13_1.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_13_1.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 2 query sequences. +Loaded sequence chr3. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr3 + chr3, complement + +# Eps-matches : 2 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_12_1.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_12_1.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 1 query sequence. +Loaded sequence chr3. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr3 + chr3, complement + +# Eps-matches : 1 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_11_3.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_11_3.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 1 query sequence. +Loaded sequence chr3. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr3 + chr3, complement + +# Eps-matches : 1 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_9_1.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_9_1.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 2 query sequences. +Loaded sequence chr3. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr3 + chr3, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_7_1.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_7_1.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 1 query sequence. +Loaded sequence chr2. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr2 + chr2, complement + +# Eps-matches : 1 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_5_3.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_5_3.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 1 query sequence. +Loaded sequence chr2. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr2 + chr2, complement + +# Eps-matches : 1 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_3_1.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_3_1.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 1 query sequence. +Loaded sequence chr2. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr2 + chr2, complement + +# Eps-matches : 1 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_1_1.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_1_1.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 2 query sequences. +Loaded sequence chr1. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr1 + chr1, complement + +# Eps-matches : 2 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_0_1.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_0_1.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 1 query sequence. +Loaded sequence chr1. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr1 + chr1, complement + +# Eps-matches : 1 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_14_0.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_14_0.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr3. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr3 + chr3, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_2_1.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_2_1.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr1. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr1 + chr1, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_4_1.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_4_1.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr2. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr2 + chr2, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_10_1.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_10_1.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr3. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr3 + chr3, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_1_2.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_1_2.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr1. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr1 + chr1, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_0_2.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_0_2.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr1. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr1 + chr1, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_3_2.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_3_2.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr2. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr2 + chr2, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_9_2.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_9_2.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr3. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr3 + chr3, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_4_2.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_4_2.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr2. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr2 + chr2, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_10_2.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_10_2.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr3. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr3 + chr3, complement + +# Eps-matches : 3 + + diff --git a/test/data/dream/16bins15window.ibf b/test/data/dream/16bins15window.ibf new file mode 100644 index 00000000..aec9193d Binary files /dev/null and b/test/data/dream/16bins15window.ibf differ diff --git a/test/data/dream/16bins15window1error.gff b/test/data/dream/16bins15window1error.gff new file mode 100644 index 00000000..be5b8068 --- /dev/null +++ b/test/data/dream/16bins15window1error.gff @@ -0,0 +1,104 @@ +chr1 Stellar eps-matches 559 708 98 + . 0;seq2Range=1,150;eValue=6.18503e-69;cigar=150M;mutations=15G,56C,113T +chr1 Stellar eps-matches 654 803 98 + . 2;seq2Range=1,150;eValue=6.18503e-69;cigar=150M;mutations=16G,23C,67A +chr1 Stellar eps-matches 283 432 98 + . 3;seq2Range=1,150;eValue=6.18503e-69;cigar=150M;mutations=63C,113A,139G +chr3 Stellar eps-matches 1375 1524 98 + . 0;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=87C,113A,136A +chr3 Stellar eps-matches 841 989 98 + . 1;seq2Range=1,150;eValue=1.85121e-68;cigar=2M1I147M;mutations=3G,103G,124T +chr3 Stellar eps-matches 753 902 98 - . 9;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=56G,79A,94G +chr2 Stellar eps-matches 841 989 98 + . 1;seq2Range=1,150;eValue=1.40284e-68;cigar=1M1I148M;mutations=2G,103G,124T +chr2 Stellar eps-matches 966 1115 98 + . 2;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=8A,16A,113C +chr2 Stellar eps-matches 753 902 98 - . 9;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=56G,79A,94G +chr3 Stellar eps-matches 94 243 98 + . 4;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=45A,105C,139C +chr3 Stellar eps-matches 122 271 98 - . 6;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=47C,72T,74C +chr3 Stellar eps-matches 181 330 98 - . 8;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=53C,61T,111G +chr3 Stellar eps-matches 966 1115 98 + . 2;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=8A,16A,113C +chr3 Stellar eps-matches 1141 1290 98 + . 3;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=19G,49T,67T +chr3 Stellar eps-matches 1100 1249 98 - . 5;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=32G,74C,101C +chr2 Stellar eps-matches 1375 1524 98 + . 0;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=87C,113A,136A +chr2 Stellar eps-matches 1141 1290 98 + . 3;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=19G,49T,67T +chr2 Stellar eps-matches 1100 1249 98 - . 5;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=32G,74C,101C +chr2 Stellar eps-matches 1141 1290 98 + . 3;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=19G,49T,67T +chr2 Stellar eps-matches 1100 1249 98 - . 5;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=32G,74C,101C +chr2 Stellar eps-matches 1128 1277 98 - . 7;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=45G,67C,124C +chr3 Stellar eps-matches 841 989 98 + . 1;seq2Range=1,150;eValue=1.85121e-68;cigar=1M1I148M;mutations=2G,103G,124T +chr3 Stellar eps-matches 966 1115 98 + . 2;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=8A,16A,113C +chr3 Stellar eps-matches 509 658 98 - . 8;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=46G,110T,119T +chr2 Stellar eps-matches 94 243 98 + . 4;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=45A,105C,139C +chr2 Stellar eps-matches 181 330 98 - . 8;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=53C,61T,111G +chr2 Stellar eps-matches 137 288 98.0263 - . 9;seq2Range=1,150;eValue=1.29834e-69;cigar=2D150M;mutations=111G +chr2 Stellar eps-matches 135 286 98.0263 - . 9;seq2Range=1,150;eValue=1.29834e-69;cigar=150M2D;mutations=111G +chr1 Stellar eps-matches 94 243 98 + . 4;seq2Range=1,150;eValue=6.18503e-69;cigar=150M;mutations=45A,105C,139C +chr1 Stellar eps-matches 181 330 98 - . 8;seq2Range=1,150;eValue=6.18503e-69;cigar=150M;mutations=53C,61T,111G +chr1 Stellar eps-matches 137 288 98.0263 - . 9;seq2Range=1,150;eValue=5.72428e-70;cigar=2D150M;mutations=111G +chr1 Stellar eps-matches 135 286 98.0263 - . 9;seq2Range=1,150;eValue=5.72428e-70;cigar=150M2D;mutations=111G +chr3 Stellar eps-matches 1842 1991 98 + . 0;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=87C,113A,136A +chr3 Stellar eps-matches 2154 2303 98 + . 2;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=16G,23C,67T +chr3 Stellar eps-matches 1954 2103 98 - . 6;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=12C,67G,93G +chr2 Stellar eps-matches 1842 1991 98 + . 0;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=87C,113A,136A +chr2 Stellar eps-matches 2170 2300 98.4732 + . 2;seq2Range=17,147;eValue=2.60601e-60;cigar=131M;mutations=7C,51T +chr2 Stellar eps-matches 1954 2103 98 - . 6;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=12C,67G,93G +chr3 Stellar eps-matches 1125 1274 98 + . 1;seq2Range=2,150;eValue=1.85121e-68;cigar=1M1D148M;mutations=67A,141T +chr3 Stellar eps-matches 858 1007 98 + . 4;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=3G,32G,69C +chr3 Stellar eps-matches 1128 1277 98 - . 7;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=45G,67C,124C +chr2 Stellar eps-matches 841 989 98 + . 1;seq2Range=1,150;eValue=1.40284e-68;cigar=1M1I148M;mutations=2G,103G,124T +chr2 Stellar eps-matches 858 1007 98 + . 4;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=3G,32G,69C +chr2 Stellar eps-matches 509 658 98 - . 8;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=46G,110T,119T +chr1 Stellar eps-matches 841 989 98 + . 1;seq2Range=1,150;eValue=6.18503e-69;cigar=1M1I148M;mutations=2G,103G,124T +chr1 Stellar eps-matches 974 1023 98 + . 2;seq2Range=9,58;eValue=2.35246e-20;cigar=50M;mutations=8A +chr1 Stellar eps-matches 858 1007 98 + . 4;seq2Range=1,150;eValue=6.18503e-69;cigar=150M;mutations=3G,32G,69C +chr3 Stellar eps-matches 1375 1524 98 + . 0;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=87C,113A,136A +chr3 Stellar eps-matches 1842 1991 98 + . 0;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=87C,113A,136A +chr3 Stellar eps-matches 1775 1924 98 - . 5;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=12C,66A,101C +chr2 Stellar eps-matches 1842 1991 98 + . 0;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=87C,113A,136A +chr2 Stellar eps-matches 1954 2103 98 - . 6;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=12C,67G,93G +chr2 Stellar eps-matches 1775 1924 98 - . 5;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=12C,66A,101C +chr3 Stellar eps-matches 509 658 98 - . 8;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=46G,110T,119T +chr3 Stellar eps-matches 137 288 98.0263 - . 9;seq2Range=1,150;eValue=1.71331e-69;cigar=2D150M;mutations=111G +chr3 Stellar eps-matches 135 286 98.0263 - . 9;seq2Range=1,150;eValue=1.71331e-69;cigar=150M2D;mutations=111G +chr3 Stellar eps-matches 14 163 98 - . 6;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=72T,87T,90G +chr2 Stellar eps-matches 1125 1274 98 + . 1;seq2Range=2,150;eValue=1.40284e-68;cigar=1M1D148M;mutations=67A,141T +chr2 Stellar eps-matches 1128 1277 98 - . 7;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=45G,67C,124C +chr2 Stellar eps-matches 1601 1750 98 - . 7;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=46T,47C,79T +chr2 Stellar eps-matches 1125 1274 98 + . 1;seq2Range=2,150;eValue=1.40284e-68;cigar=1M1D148M;mutations=67A,141T +chr2 Stellar eps-matches 858 1007 98 + . 4;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=3G,32G,69C +chr2 Stellar eps-matches 1003 1152 98 - . 8;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=53C,111G,120T +chr3 Stellar eps-matches 2147 2296 98 + . 3;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=49T,121A,139G +chr3 Stellar eps-matches 1775 1924 98 - . 5;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=12C,66A,101C +chr3 Stellar eps-matches 1601 1750 98 - . 7;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=46T,47C,79T +chr3 Stellar eps-matches 1003 1152 98 - . 8;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=53C,111G,120T +chr3 Stellar eps-matches 858 1007 98 + . 4;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=3G,32G,69C +chr2 Stellar eps-matches 2147 2296 98 + . 3;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=49T,121A,139G +chr2 Stellar eps-matches 1601 1750 98 - . 7;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=46T,47C,79T +chr2 Stellar eps-matches 14 163 98 - . 6;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=72T,87T,90G +chr1 Stellar eps-matches 753 902 98 - . 9;seq2Range=1,150;eValue=6.18503e-69;cigar=150M;mutations=56G,79A,94G +chr1 Stellar eps-matches 509 658 98 - . 8;seq2Range=1,150;eValue=6.18503e-69;cigar=150M;mutations=46G,110T,119T +chr1 Stellar eps-matches 14 163 98 - . 6;seq2Range=1,150;eValue=6.18503e-69;cigar=150M;mutations=72T,87T,90G +chr3 Stellar eps-matches 2154 2303 98 + . 2;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=16G,23C,67T +chr3 Stellar eps-matches 2147 2296 98 + . 3;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=49T,121A,139G +chr3 Stellar eps-matches 2482 2631 98 - . 9;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=9C,79G,94G +chr1 Stellar eps-matches 654 803 98 + . 2;seq2Range=1,150;eValue=6.18503e-69;cigar=150M;mutations=16G,23C,67A +chr1 Stellar eps-matches 612 761 98 + . 4;seq2Range=1,150;eValue=6.18503e-69;cigar=150M;mutations=69G,74A,92G +chr1 Stellar eps-matches 753 902 98 - . 9;seq2Range=1,150;eValue=6.18503e-69;cigar=150M;mutations=56G,79A,94G +chr2 Stellar eps-matches 495 644 98 - . 5;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=14A,90G,124T +chr2 Stellar eps-matches 556 705 98 - . 7;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=110G,115G,123T +chr2 Stellar eps-matches 753 902 98 - . 9;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=56G,79A,94G +chr3 Stellar eps-matches 495 644 98 - . 5;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=14A,90G,124T +chr3 Stellar eps-matches 556 705 98 - . 7;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=110G,115G,123T +chr3 Stellar eps-matches 753 902 98 - . 9;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=56G,79A,94G +chr1 Stellar eps-matches 612 761 98 + . 4;seq2Range=1,150;eValue=6.18503e-69;cigar=150M;mutations=69G,74A,92G +chr1 Stellar eps-matches 495 644 98 - . 5;seq2Range=1,150;eValue=6.18503e-69;cigar=150M;mutations=14A,90G,124T +chr1 Stellar eps-matches 556 705 98 - . 7;seq2Range=1,150;eValue=6.18503e-69;cigar=150M;mutations=110G,115G,123T +chr1 Stellar eps-matches 11 160 98 + . 1;seq2Range=1,150;eValue=6.18503e-69;cigar=150M;mutations=68A,79T,142T +chr1 Stellar eps-matches 283 432 98 + . 3;seq2Range=1,150;eValue=6.18503e-69;cigar=150M;mutations=63C,113A,139G +chr1 Stellar eps-matches 122 271 98 - . 6;seq2Range=1,150;eValue=6.18503e-69;cigar=150M;mutations=47C,72T,74C +chr2 Stellar eps-matches 11 160 98 + . 1;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=68A,79T,142T +chr2 Stellar eps-matches 283 432 98 + . 3;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=63C,113A,139G +chr2 Stellar eps-matches 122 271 98 - . 6;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=47C,72T,74C +chr3 Stellar eps-matches 11 160 98 + . 1;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=68A,79T,142T +chr3 Stellar eps-matches 283 432 98 + . 3;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=63C,113A,139G +chr3 Stellar eps-matches 495 644 98 - . 5;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=14A,90G,124T +chr2 Stellar eps-matches 559 708 98 + . 0;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=15G,56C,113T +chr2 Stellar eps-matches 654 803 98 + . 2;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=16G,23C,67A +chr2 Stellar eps-matches 612 761 98 + . 4;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=69G,74A,92G +chr3 Stellar eps-matches 559 708 98 + . 0;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=15G,56C,113T +chr3 Stellar eps-matches 654 803 98 + . 2;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=16G,23C,67A +chr3 Stellar eps-matches 612 761 98 + . 4;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=69G,74A,92G diff --git a/test/data/dream/16bins15window1error.gff.out b/test/data/dream/16bins15window1error.gff.out new file mode 100644 index 00000000..350d6269 --- /dev/null +++ b/test/data/dream/16bins15window1error.gff.out @@ -0,0 +1,1601 @@ +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_1_0.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_1_0.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr1. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr1 + chr1, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_11_0.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_11_0.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr3. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr3 + chr3, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_5_0.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_5_0.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr2. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr2 + chr2, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_9_0.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_9_0.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr3. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr3 + chr3, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_11_1.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_11_1.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr3. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr3 + chr3, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_6_0.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_6_0.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr2. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr2 + chr2, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_5_1.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_5_1.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr2. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr2 + chr2, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_10_0.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_10_0.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr3. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr3 + chr3, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_3_0.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_3_0.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr2. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr2 + chr2, complement + +# Eps-matches : 4 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_0_0.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_0_0.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr1. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr1 + chr1, complement + +# Eps-matches : 4 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_13_0.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_13_0.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr3. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr3 + chr3, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_8_0.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_8_0.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr2. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr2 + chr2, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_11_2.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_11_2.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr3. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr3 + chr3, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_4_0.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_4_0.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr2. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr2 + chr2, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_2_0.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_2_0.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr1. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr1 + chr1, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_12_0.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_12_0.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr3. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr3 + chr3, complement + +# Eps-matches : 3 +WARNING: Non-unique query ids. Output can be ambiguous. + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_7_0.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_7_0.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr2. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr2 + chr2, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_9_1.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_9_1.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr3. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr3 + chr3, complement + +# Eps-matches : 4 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_6_1.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_6_1.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr2. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr2 + chr2, complement + +# Eps-matches : 3 +WARNING: Non-unique query ids. Output can be ambiguous. + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_5_2.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_5_2.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr2. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr2 + chr2, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_13_1.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_13_1.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 2 query sequences. +Loaded sequence chr3. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr3 + chr3, complement + +# Eps-matches : 2 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_12_1.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_12_1.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 1 query sequence. +Loaded sequence chr3. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr3 + chr3, complement + +# Eps-matches : 1 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_11_3.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_11_3.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 1 query sequence. +Loaded sequence chr3. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr3 + chr3, complement + +# Eps-matches : 1 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_10_1.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_10_1.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 1 query sequence. +Loaded sequence chr3. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr3 + chr3, complement + +# Eps-matches : 1 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_8_1.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_8_1.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 1 query sequence. +Loaded sequence chr2. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr2 + chr2, complement + +# Eps-matches : 1 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_7_1.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_7_1.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 1 query sequence. +Loaded sequence chr2. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr2 + chr2, complement + +# Eps-matches : 1 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_3_1.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_3_1.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 1 query sequence. +Loaded sequence chr2. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr2 + chr2, complement + +# Eps-matches : 1 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_1_1.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_1_1.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 2 query sequences. +Loaded sequence chr1. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr1 + chr1, complement + +# Eps-matches : 2 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_0_1.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_0_1.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 1 query sequence. +Loaded sequence chr1. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr1 + chr1, complement + +# Eps-matches : 1 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_14_0.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_14_0.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr3. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr3 + chr3, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_2_1.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_2_1.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr1. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr1 + chr1, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_4_1.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_4_1.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr2. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr2 + chr2, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_10_2.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_10_2.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr3. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr3 + chr3, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_1_2.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_1_2.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr1. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr1 + chr1, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_0_2.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_0_2.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr1. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr1 + chr1, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_3_2.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_3_2.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr2. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr2 + chr2, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_9_2.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_9_2.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr3. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr3 + chr3, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_4_2.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_4_2.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr2. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr2 + chr2, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_10_3.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_10_3.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr3. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr3 + chr3, complement + +# Eps-matches : 3 + + diff --git a/test/data/dream/4bins13window.ibf b/test/data/dream/4bins13window.ibf new file mode 100644 index 00000000..b80f0588 Binary files /dev/null and b/test/data/dream/4bins13window.ibf differ diff --git a/test/data/dream/4bins13window1error.gff b/test/data/dream/4bins13window1error.gff new file mode 100644 index 00000000..21e1814a --- /dev/null +++ b/test/data/dream/4bins13window1error.gff @@ -0,0 +1,80 @@ +chr2 Stellar eps-matches 559 708 98 + . 0;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=15G,56C,113T +chr2 Stellar eps-matches 11 160 98 + . 1;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=68A,79T,142T +chr2 Stellar eps-matches 654 803 98 + . 2;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=16G,23C,67A +chr2 Stellar eps-matches 1375 1524 98 + . 0;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=87C,113A,136A +chr2 Stellar eps-matches 841 989 98 + . 1;seq2Range=1,150;eValue=1.40284e-68;cigar=1M1I148M;mutations=2G,103G,124T +chr2 Stellar eps-matches 753 902 98 - . 9;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=56G,79A,94G +chr1 Stellar eps-matches 841 989 98 + . 1;seq2Range=1,150;eValue=6.18503e-69;cigar=1M1I148M;mutations=2G,103G,124T +chr1 Stellar eps-matches 974 1023 98 + . 2;seq2Range=9,58;eValue=2.35246e-20;cigar=50M;mutations=8A +chr1 Stellar eps-matches 753 902 98 - . 9;seq2Range=1,150;eValue=6.18503e-69;cigar=150M;mutations=56G,79A,94G +chr3 Stellar eps-matches 966 1115 98 + . 2;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=8A,16A,113C +chr3 Stellar eps-matches 1141 1290 98 + . 3;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=19G,49T,67T +chr3 Stellar eps-matches 94 243 98 + . 4;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=45A,105C,139C +chr2 Stellar eps-matches 966 1115 98 + . 2;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=8A,16A,113C +chr2 Stellar eps-matches 1141 1290 98 + . 3;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=19G,49T,67T +chr2 Stellar eps-matches 94 243 98 + . 4;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=45A,105C,139C +chr2 Stellar eps-matches 1100 1249 98 - . 5;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=32G,74C,101C +chr2 Stellar eps-matches 1954 2103 98 - . 6;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=12C,67G,93G +chr2 Stellar eps-matches 1128 1277 98 - . 7;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=45G,67C,124C +chr3 Stellar eps-matches 1100 1249 98 - . 5;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=32G,74C,101C +chr3 Stellar eps-matches 1128 1277 98 - . 7;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=45G,67C,124C +chr3 Stellar eps-matches 509 658 98 - . 8;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=46G,110T,119T +chr1 Stellar eps-matches 94 243 98 + . 4;seq2Range=1,150;eValue=6.18503e-69;cigar=150M;mutations=45A,105C,139C +chr1 Stellar eps-matches 509 658 98 - . 8;seq2Range=1,150;eValue=6.18503e-69;cigar=150M;mutations=46G,110T,119T +chr1 Stellar eps-matches 137 288 98.0263 - . 9;seq2Range=1,150;eValue=5.72428e-70;cigar=2D150M;mutations=111G +chr1 Stellar eps-matches 135 286 98.0263 - . 9;seq2Range=1,150;eValue=5.72428e-70;cigar=150M2D;mutations=111G +chr2 Stellar eps-matches 1842 1991 98 + . 0;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=87C,113A,136A +chr2 Stellar eps-matches 509 658 98 - . 8;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=46G,110T,119T +chr2 Stellar eps-matches 137 288 98.0263 - . 9;seq2Range=1,150;eValue=1.29834e-69;cigar=2D150M;mutations=111G +chr2 Stellar eps-matches 135 286 98.0263 - . 9;seq2Range=1,150;eValue=1.29834e-69;cigar=150M2D;mutations=111G +chr3 Stellar eps-matches 1842 1991 98 + . 0;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=87C,113A,136A +chr3 Stellar eps-matches 2154 2303 98 + . 2;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=16G,23C,67T +chr3 Stellar eps-matches 1954 2103 98 - . 6;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=12C,67G,93G +chr2 Stellar eps-matches 1125 1274 98 + . 1;seq2Range=2,150;eValue=1.40284e-68;cigar=1M1D148M;mutations=67A,141T +chr2 Stellar eps-matches 2170 2300 98.4732 + . 2;seq2Range=17,147;eValue=2.60601e-60;cigar=131M;mutations=7C,51T +chr2 Stellar eps-matches 2147 2296 98 + . 3;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=49T,121A,139G +chr3 Stellar eps-matches 1125 1274 98 + . 1;seq2Range=2,150;eValue=1.85121e-68;cigar=1M1D148M;mutations=67A,141T +chr3 Stellar eps-matches 858 1007 98 + . 4;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=3G,32G,69C +chr3 Stellar eps-matches 137 288 98.0263 - . 9;seq2Range=1,150;eValue=1.71331e-69;cigar=2D150M;mutations=111G +chr3 Stellar eps-matches 135 286 98.0263 - . 9;seq2Range=1,150;eValue=1.71331e-69;cigar=150M2D;mutations=111G +chr2 Stellar eps-matches 858 1007 98 + . 4;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=3G,32G,69C +chr2 Stellar eps-matches 1775 1924 98 - . 5;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=12C,66A,101C +chr2 Stellar eps-matches 14 163 98 - . 6;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=72T,87T,90G +chr3 Stellar eps-matches 2147 2296 98 + . 3;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=49T,121A,139G +chr3 Stellar eps-matches 1775 1924 98 - . 5;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=12C,66A,101C +chr3 Stellar eps-matches 1601 1750 98 - . 7;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=46T,47C,79T +chr3 Stellar eps-matches 2482 2631 98 - . 9;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=9C,79G,94G +chr2 Stellar eps-matches 1601 1750 98 - . 7;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=46T,47C,79T +chr2 Stellar eps-matches 1003 1152 98 - . 8;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=53C,111G,120T +chr1 Stellar eps-matches 858 1007 98 + . 4;seq2Range=1,150;eValue=6.18503e-69;cigar=150M;mutations=3G,32G,69C +chr1 Stellar eps-matches 14 163 98 - . 6;seq2Range=1,150;eValue=6.18503e-69;cigar=150M;mutations=72T,87T,90G +chr3 Stellar eps-matches 14 163 98 - . 6;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=72T,87T,90G +chr3 Stellar eps-matches 1601 1750 98 - . 7;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=46T,47C,79T +chr3 Stellar eps-matches 1003 1152 98 - . 8;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=53C,111G,120T +chr3 Stellar eps-matches 1375 1524 98 + . 0;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=87C,113A,136A +chr3 Stellar eps-matches 841 989 98 + . 1;seq2Range=1,150;eValue=1.85121e-68;cigar=1M1I148M;mutations=2G,103G,124T +chr3 Stellar eps-matches 753 902 98 - . 9;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=56G,79A,94G +chr1 Stellar eps-matches 122 271 98 - . 6;seq2Range=1,150;eValue=6.18503e-69;cigar=150M;mutations=47C,72T,74C +chr1 Stellar eps-matches 556 705 98 - . 7;seq2Range=1,150;eValue=6.18503e-69;cigar=150M;mutations=110G,115G,123T +chr1 Stellar eps-matches 181 330 98 - . 8;seq2Range=1,150;eValue=6.18503e-69;cigar=150M;mutations=53C,61T,111G +chr2 Stellar eps-matches 122 271 98 - . 6;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=47C,72T,74C +chr2 Stellar eps-matches 556 705 98 - . 7;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=110G,115G,123T +chr2 Stellar eps-matches 181 330 98 - . 8;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=53C,61T,111G +chr3 Stellar eps-matches 122 271 98 - . 6;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=47C,72T,74C +chr3 Stellar eps-matches 556 705 98 - . 7;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=110G,115G,123T +chr3 Stellar eps-matches 181 330 98 - . 8;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=53C,61T,111G +chr1 Stellar eps-matches 283 432 98 + . 3;seq2Range=1,150;eValue=6.18503e-69;cigar=150M;mutations=63C,113A,139G +chr1 Stellar eps-matches 612 761 98 + . 4;seq2Range=1,150;eValue=6.18503e-69;cigar=150M;mutations=69G,74A,92G +chr1 Stellar eps-matches 495 644 98 - . 5;seq2Range=1,150;eValue=6.18503e-69;cigar=150M;mutations=14A,90G,124T +chr2 Stellar eps-matches 283 432 98 + . 3;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=63C,113A,139G +chr2 Stellar eps-matches 612 761 98 + . 4;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=69G,74A,92G +chr2 Stellar eps-matches 495 644 98 - . 5;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=14A,90G,124T +chr3 Stellar eps-matches 283 432 98 + . 3;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=63C,113A,139G +chr3 Stellar eps-matches 612 761 98 + . 4;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=69G,74A,92G +chr3 Stellar eps-matches 495 644 98 - . 5;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=14A,90G,124T +chr1 Stellar eps-matches 559 708 98 + . 0;seq2Range=1,150;eValue=6.18503e-69;cigar=150M;mutations=15G,56C,113T +chr1 Stellar eps-matches 11 160 98 + . 1;seq2Range=1,150;eValue=6.18503e-69;cigar=150M;mutations=68A,79T,142T +chr1 Stellar eps-matches 654 803 98 + . 2;seq2Range=1,150;eValue=6.18503e-69;cigar=150M;mutations=16G,23C,67A +chr3 Stellar eps-matches 559 708 98 + . 0;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=15G,56C,113T +chr3 Stellar eps-matches 11 160 98 + . 1;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=68A,79T,142T +chr3 Stellar eps-matches 654 803 98 + . 2;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=16G,23C,67A diff --git a/test/data/dream/4bins13window1error.gff.out b/test/data/dream/4bins13window1error.gff.out new file mode 100644 index 00000000..a55b86d1 --- /dev/null +++ b/test/data/dream/4bins13window1error.gff.out @@ -0,0 +1,1107 @@ +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_1_0.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_1_0.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr2. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr2 + chr2, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_1_1.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_1_1.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr2. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr2 + chr2, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_0_0.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_0_0.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr1. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr1 + chr1, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_2_0.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_2_0.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr3. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr3 + chr3, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_1_2.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_1_2.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr2. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr2 + chr2, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_1_3.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_1_3.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr2. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr2 + chr2, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_2_1.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_2_1.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr3. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr3 + chr3, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_0_1.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_0_1.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr1. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr1 + chr1, complement + +# Eps-matches : 4 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_1_4.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_1_4.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr2. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr2 + chr2, complement + +# Eps-matches : 4 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_3_0.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_3_0.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr3. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr3 + chr3, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_1_5.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_1_5.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr2. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr2 + chr2, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_2_2.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_2_2.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr3. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr3 + chr3, complement + +# Eps-matches : 4 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_1_6.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_1_6.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr2. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr2 + chr2, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_3_1.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_3_1.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr3. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr3 + chr3, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_3_2.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_3_2.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 1 query sequence. +Loaded sequence chr3. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr3 + chr3, complement + +# Eps-matches : 1 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_1_7.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_1_7.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 2 query sequences. +Loaded sequence chr2. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr2 + chr2, complement + +# Eps-matches : 2 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_0_2.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_0_2.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 2 query sequences. +Loaded sequence chr1. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr1 + chr1, complement + +# Eps-matches : 2 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_2_3.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_2_3.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr3. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr3 + chr3, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_2_4.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_2_4.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr3. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr3 + chr3, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_0_3.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_0_3.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr1. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr1 + chr1, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_1_8.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_1_8.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr2. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr2 + chr2, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_2_5.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_2_5.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr3. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr3 + chr3, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_0_4.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_0_4.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr1. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr1 + chr1, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_1_9.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_1_9.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr2. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr2 + chr2, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_2_6.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_2_6.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr3. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr3 + chr3, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_0_5.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_0_5.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr1. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr1 + chr1, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_2_7.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_2_7.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr3. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr3 + chr3, complement + +# Eps-matches : 3 + + diff --git a/test/data/dream/4bins15window.ibf b/test/data/dream/4bins15window.ibf new file mode 100644 index 00000000..2be7edb5 Binary files /dev/null and b/test/data/dream/4bins15window.ibf differ diff --git a/test/data/dream/4bins15window1error.gff b/test/data/dream/4bins15window1error.gff new file mode 100644 index 00000000..21e1814a --- /dev/null +++ b/test/data/dream/4bins15window1error.gff @@ -0,0 +1,80 @@ +chr2 Stellar eps-matches 559 708 98 + . 0;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=15G,56C,113T +chr2 Stellar eps-matches 11 160 98 + . 1;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=68A,79T,142T +chr2 Stellar eps-matches 654 803 98 + . 2;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=16G,23C,67A +chr2 Stellar eps-matches 1375 1524 98 + . 0;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=87C,113A,136A +chr2 Stellar eps-matches 841 989 98 + . 1;seq2Range=1,150;eValue=1.40284e-68;cigar=1M1I148M;mutations=2G,103G,124T +chr2 Stellar eps-matches 753 902 98 - . 9;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=56G,79A,94G +chr1 Stellar eps-matches 841 989 98 + . 1;seq2Range=1,150;eValue=6.18503e-69;cigar=1M1I148M;mutations=2G,103G,124T +chr1 Stellar eps-matches 974 1023 98 + . 2;seq2Range=9,58;eValue=2.35246e-20;cigar=50M;mutations=8A +chr1 Stellar eps-matches 753 902 98 - . 9;seq2Range=1,150;eValue=6.18503e-69;cigar=150M;mutations=56G,79A,94G +chr3 Stellar eps-matches 966 1115 98 + . 2;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=8A,16A,113C +chr3 Stellar eps-matches 1141 1290 98 + . 3;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=19G,49T,67T +chr3 Stellar eps-matches 94 243 98 + . 4;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=45A,105C,139C +chr2 Stellar eps-matches 966 1115 98 + . 2;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=8A,16A,113C +chr2 Stellar eps-matches 1141 1290 98 + . 3;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=19G,49T,67T +chr2 Stellar eps-matches 94 243 98 + . 4;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=45A,105C,139C +chr2 Stellar eps-matches 1100 1249 98 - . 5;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=32G,74C,101C +chr2 Stellar eps-matches 1954 2103 98 - . 6;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=12C,67G,93G +chr2 Stellar eps-matches 1128 1277 98 - . 7;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=45G,67C,124C +chr3 Stellar eps-matches 1100 1249 98 - . 5;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=32G,74C,101C +chr3 Stellar eps-matches 1128 1277 98 - . 7;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=45G,67C,124C +chr3 Stellar eps-matches 509 658 98 - . 8;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=46G,110T,119T +chr1 Stellar eps-matches 94 243 98 + . 4;seq2Range=1,150;eValue=6.18503e-69;cigar=150M;mutations=45A,105C,139C +chr1 Stellar eps-matches 509 658 98 - . 8;seq2Range=1,150;eValue=6.18503e-69;cigar=150M;mutations=46G,110T,119T +chr1 Stellar eps-matches 137 288 98.0263 - . 9;seq2Range=1,150;eValue=5.72428e-70;cigar=2D150M;mutations=111G +chr1 Stellar eps-matches 135 286 98.0263 - . 9;seq2Range=1,150;eValue=5.72428e-70;cigar=150M2D;mutations=111G +chr2 Stellar eps-matches 1842 1991 98 + . 0;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=87C,113A,136A +chr2 Stellar eps-matches 509 658 98 - . 8;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=46G,110T,119T +chr2 Stellar eps-matches 137 288 98.0263 - . 9;seq2Range=1,150;eValue=1.29834e-69;cigar=2D150M;mutations=111G +chr2 Stellar eps-matches 135 286 98.0263 - . 9;seq2Range=1,150;eValue=1.29834e-69;cigar=150M2D;mutations=111G +chr3 Stellar eps-matches 1842 1991 98 + . 0;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=87C,113A,136A +chr3 Stellar eps-matches 2154 2303 98 + . 2;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=16G,23C,67T +chr3 Stellar eps-matches 1954 2103 98 - . 6;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=12C,67G,93G +chr2 Stellar eps-matches 1125 1274 98 + . 1;seq2Range=2,150;eValue=1.40284e-68;cigar=1M1D148M;mutations=67A,141T +chr2 Stellar eps-matches 2170 2300 98.4732 + . 2;seq2Range=17,147;eValue=2.60601e-60;cigar=131M;mutations=7C,51T +chr2 Stellar eps-matches 2147 2296 98 + . 3;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=49T,121A,139G +chr3 Stellar eps-matches 1125 1274 98 + . 1;seq2Range=2,150;eValue=1.85121e-68;cigar=1M1D148M;mutations=67A,141T +chr3 Stellar eps-matches 858 1007 98 + . 4;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=3G,32G,69C +chr3 Stellar eps-matches 137 288 98.0263 - . 9;seq2Range=1,150;eValue=1.71331e-69;cigar=2D150M;mutations=111G +chr3 Stellar eps-matches 135 286 98.0263 - . 9;seq2Range=1,150;eValue=1.71331e-69;cigar=150M2D;mutations=111G +chr2 Stellar eps-matches 858 1007 98 + . 4;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=3G,32G,69C +chr2 Stellar eps-matches 1775 1924 98 - . 5;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=12C,66A,101C +chr2 Stellar eps-matches 14 163 98 - . 6;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=72T,87T,90G +chr3 Stellar eps-matches 2147 2296 98 + . 3;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=49T,121A,139G +chr3 Stellar eps-matches 1775 1924 98 - . 5;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=12C,66A,101C +chr3 Stellar eps-matches 1601 1750 98 - . 7;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=46T,47C,79T +chr3 Stellar eps-matches 2482 2631 98 - . 9;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=9C,79G,94G +chr2 Stellar eps-matches 1601 1750 98 - . 7;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=46T,47C,79T +chr2 Stellar eps-matches 1003 1152 98 - . 8;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=53C,111G,120T +chr1 Stellar eps-matches 858 1007 98 + . 4;seq2Range=1,150;eValue=6.18503e-69;cigar=150M;mutations=3G,32G,69C +chr1 Stellar eps-matches 14 163 98 - . 6;seq2Range=1,150;eValue=6.18503e-69;cigar=150M;mutations=72T,87T,90G +chr3 Stellar eps-matches 14 163 98 - . 6;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=72T,87T,90G +chr3 Stellar eps-matches 1601 1750 98 - . 7;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=46T,47C,79T +chr3 Stellar eps-matches 1003 1152 98 - . 8;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=53C,111G,120T +chr3 Stellar eps-matches 1375 1524 98 + . 0;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=87C,113A,136A +chr3 Stellar eps-matches 841 989 98 + . 1;seq2Range=1,150;eValue=1.85121e-68;cigar=1M1I148M;mutations=2G,103G,124T +chr3 Stellar eps-matches 753 902 98 - . 9;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=56G,79A,94G +chr1 Stellar eps-matches 122 271 98 - . 6;seq2Range=1,150;eValue=6.18503e-69;cigar=150M;mutations=47C,72T,74C +chr1 Stellar eps-matches 556 705 98 - . 7;seq2Range=1,150;eValue=6.18503e-69;cigar=150M;mutations=110G,115G,123T +chr1 Stellar eps-matches 181 330 98 - . 8;seq2Range=1,150;eValue=6.18503e-69;cigar=150M;mutations=53C,61T,111G +chr2 Stellar eps-matches 122 271 98 - . 6;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=47C,72T,74C +chr2 Stellar eps-matches 556 705 98 - . 7;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=110G,115G,123T +chr2 Stellar eps-matches 181 330 98 - . 8;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=53C,61T,111G +chr3 Stellar eps-matches 122 271 98 - . 6;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=47C,72T,74C +chr3 Stellar eps-matches 556 705 98 - . 7;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=110G,115G,123T +chr3 Stellar eps-matches 181 330 98 - . 8;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=53C,61T,111G +chr1 Stellar eps-matches 283 432 98 + . 3;seq2Range=1,150;eValue=6.18503e-69;cigar=150M;mutations=63C,113A,139G +chr1 Stellar eps-matches 612 761 98 + . 4;seq2Range=1,150;eValue=6.18503e-69;cigar=150M;mutations=69G,74A,92G +chr1 Stellar eps-matches 495 644 98 - . 5;seq2Range=1,150;eValue=6.18503e-69;cigar=150M;mutations=14A,90G,124T +chr2 Stellar eps-matches 283 432 98 + . 3;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=63C,113A,139G +chr2 Stellar eps-matches 612 761 98 + . 4;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=69G,74A,92G +chr2 Stellar eps-matches 495 644 98 - . 5;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=14A,90G,124T +chr3 Stellar eps-matches 283 432 98 + . 3;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=63C,113A,139G +chr3 Stellar eps-matches 612 761 98 + . 4;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=69G,74A,92G +chr3 Stellar eps-matches 495 644 98 - . 5;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=14A,90G,124T +chr1 Stellar eps-matches 559 708 98 + . 0;seq2Range=1,150;eValue=6.18503e-69;cigar=150M;mutations=15G,56C,113T +chr1 Stellar eps-matches 11 160 98 + . 1;seq2Range=1,150;eValue=6.18503e-69;cigar=150M;mutations=68A,79T,142T +chr1 Stellar eps-matches 654 803 98 + . 2;seq2Range=1,150;eValue=6.18503e-69;cigar=150M;mutations=16G,23C,67A +chr3 Stellar eps-matches 559 708 98 + . 0;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=15G,56C,113T +chr3 Stellar eps-matches 11 160 98 + . 1;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=68A,79T,142T +chr3 Stellar eps-matches 654 803 98 + . 2;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=16G,23C,67A diff --git a/test/data/dream/4bins15window1error.gff.out b/test/data/dream/4bins15window1error.gff.out new file mode 100644 index 00000000..a55b86d1 --- /dev/null +++ b/test/data/dream/4bins15window1error.gff.out @@ -0,0 +1,1107 @@ +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_1_0.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_1_0.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr2. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr2 + chr2, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_1_1.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_1_1.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr2. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr2 + chr2, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_0_0.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_0_0.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr1. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr1 + chr1, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_2_0.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_2_0.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr3. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr3 + chr3, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_1_2.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_1_2.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr2. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr2 + chr2, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_1_3.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_1_3.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr2. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr2 + chr2, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_2_1.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_2_1.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr3. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr3 + chr3, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_0_1.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_0_1.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr1. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr1 + chr1, complement + +# Eps-matches : 4 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_1_4.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_1_4.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr2. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr2 + chr2, complement + +# Eps-matches : 4 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_3_0.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_3_0.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr3. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr3 + chr3, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_1_5.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_1_5.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr2. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr2 + chr2, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_2_2.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_2_2.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr3. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr3 + chr3, complement + +# Eps-matches : 4 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_1_6.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_1_6.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr2. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr2 + chr2, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_3_1.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_3_1.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr3. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr3 + chr3, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_3_2.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_3_2.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 1 query sequence. +Loaded sequence chr3. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr3 + chr3, complement + +# Eps-matches : 1 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_1_7.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_1_7.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 2 query sequences. +Loaded sequence chr2. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr2 + chr2, complement + +# Eps-matches : 2 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_0_2.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_0_2.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 2 query sequences. +Loaded sequence chr1. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr1 + chr1, complement + +# Eps-matches : 2 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_2_3.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_2_3.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr3. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr3 + chr3, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_2_4.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_2_4.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr3. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr3 + chr3, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_0_3.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_0_3.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr1. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr1 + chr1, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_1_8.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_1_8.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr2. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr2 + chr2, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_2_5.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_2_5.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr3. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr3 + chr3, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_0_4.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_0_4.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr1. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr1 + chr1, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_1_9.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_1_9.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr2. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr2 + chr2, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_2_6.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_2_6.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr3. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr3 + chr3, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_0_5.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_0_5.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr1. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr1 + chr1, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_2_7.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_2_7.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr3. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr3 + chr3, complement + +# Eps-matches : 3 + + diff --git a/test/data/dream/cli_test_input.sh b/test/data/dream/cli_test_input.sh new file mode 100755 index 00000000..66cfd148 --- /dev/null +++ b/test/data/dream/cli_test_input.sh @@ -0,0 +1,44 @@ +#!/bin/bash +cd dream +set -Eeuo pipefail + +#----------- Simulate chromosomes of various lengths ----------- + +SEED=${1} + +error_rate=0.025 +read_length=150 +read_dir=reads_$read_length +mkdir -p $read_dir +read_count=10 + +i=1 +for length in 1023 2300 3030 +do + echo "Simulating chromosome with length $length" + chr_out="chr"$i".fasta" + mason_genome -l $length -o $chr_out -s $SEED &>/dev/null + + sed -i "s/^>.*$/>chr$i/g" $chr_out + let i=i+1 + + #----------- Sample reads from reference sequence ----------- + echo "Generating $read_count reads of length $read_length with error rate $error_rate" + generate_local_matches \ + --output $read_dir \ + --max-error-rate $error_rate \ + --num-matches $read_count \ + --min-match-length $read_length \ + --max-match-length $read_length \ + --verbose-ids \ + --reverse \ + --ref-len $length \ + --seed $SEED \ + $chr_out +done + +cat chr*.fasta > ref.fasta +rm chr*.fasta + +cat $read_dir/chr*.fastq > query.fastq +rm -r $read_dir diff --git a/test/data/dream/cli_test_output.sh b/test/data/dream/cli_test_output.sh new file mode 100755 index 00000000..6c3c450e --- /dev/null +++ b/test/data/dream/cli_test_output.sh @@ -0,0 +1,56 @@ +#!/bin/bash +cd dream +set -Eeuo pipefail + +if [ -z "${VALIK_TMP}" ]; then + echo "no VALIK_TMP folder given" + exit 127 +fi + +mkdir -p $VALIK_TMP + +#----------- Index and search the reference genome ----------- + +# Split parameters +seg_overlap="150" # how much adjacent segments overlap + +# Build parameters +k=13 +ibf_size="32k" + +# Search parameters +pattern=50 # min local match length +pat_overlap=49 # how much adjacent patterns overlap + +ref_input="ref.fasta" +query="query.fastq" +for b in 4 16 +do + echo "Splitting the genome into $b segments that overlap by $seg_overlap" + ref_meta="ref_meta.txt" + seg_meta="seg_meta"$seg_overlap"overlap"$b"bins.txt" + valik split "$ref_input" --overlap "$seg_overlap" --bins "$b" --ref-meta "$ref_meta" --seg-meta "$seg_meta" + + for w in 13 15 + do + echo "Creating IBF for w=$w and k=$k where segments overlap by $seg_overlap" + index=$b"bins"$w"window.ibf" + valik build "$ref_input" --kmer "$k" --window "$w" --size "$ibf_size" --output "$index" --from-segments --ref-meta "$ref_meta" --seg-meta "$seg_meta" + + for e in 1 + do + echo "Searching IBF with $e errors" + dist_out=$b"bins"$w"window"$e"error.gff" + local_out="local"$b"bins"$w"window"$e"error.gff" + valik search --index "$index" --query "$query" --output "$dist_out" --error "$e" --pattern "$pattern" --overlap "$pat_overlap" --ref-meta "$ref_meta" --seg-meta "$seg_meta" + #valik search --shared-memory --index "$index" --query "$query" --output "$local_out" --error "$e" --pattern "$pattern" --overlap "$pat_overlap" --ref-meta "$ref_meta" --seg-meta "$seg_meta" + done + + rm $VALIK_TMP/* + done +done + +#stellar_out="stellar.gff" +#stellar ref.fasta query.fasta -e 0.02 -l 50 -o $stellar_out + +rm -r $VALIK_TMP diff --git a/test/data/dream/dummy_reads.fastq b/test/data/dream/dummy_reads.fastq new file mode 100644 index 00000000..292cc6e8 --- /dev/null +++ b/test/data/dream/dummy_reads.fastq @@ -0,0 +1,24 @@ +@0 reverse,start_position=558,length=150,errors=3,reference_id='chr1',reference_file='chr1.fasta' +TAATATATATATATAATATATATATATATATATATATATATATATATATATATATATATATAATATATATATATATAATATACCGGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCG ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@1 reverse,start_position=558,length=150,errors=3,reference_id='chr1',reference_file='chr1.fasta' +TAATATATATATATAATATATATATATATATATATATATATATATATATATATATATATATAATATATATATATATAATATACCGGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCG ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@2 reverse,start_position=558,length=150,errors=3,reference_id='chr1',reference_file='chr1.fasta' +TAATATATATATATAATATATATATATATATATATATATATATATATATATATATATATATAATATATATATATATAATATACCGGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCG ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@3 reverse,start_position=558,length=150,errors=3,reference_id='chr1',reference_file='chr1.fasta' +TAATATATATATATAATATATATATATATATATATATATATATATATATATATATATATATAATATATATATATATAATATACCGGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCG ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@4 reverse,start_position=558,length=150,errors=3,reference_id='chr1',reference_file='chr1.fasta' +TAATATATATATATAATATATATATATATATATATATATATATATATATATATATATATATAATATATATATATATAATATACCGGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCG ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@5 reverse,start_position=558,length=150,errors=3,reference_id='chr1',reference_file='chr1.fasta' +TAATATATATATATAATATATATATATATATATATATATATATATATATATATATATATATAATATATATATATATAATATACCGGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCG ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII \ No newline at end of file diff --git a/test/data/dream/query.fastq b/test/data/dream/query.fastq new file mode 100644 index 00000000..032e427f --- /dev/null +++ b/test/data/dream/query.fastq @@ -0,0 +1,120 @@ +@0 reverse,start_position=558,length=150,errors=3,reference_id='chr1',reference_file='chr1.fasta' +TGTTGTGGGAGGCTGGGTCTTAAGCAGCGCGCGAGCTGTGATCCAGGCTACCACGCACATAGTGTATGGAAAGTGATCCAGAGTAGACCCGCGGGGGCCTGACCTAACCTATTTAAGTTGTATCGTGGCTATGAGGGTAGTCGCCGGAGA ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@1 reverse,start_position=10,length=150,errors=3,reference_id='chr1',reference_file='chr1.fasta' +AGTATGGAAGCATAAGCTCTGCATGCAAAGGTACATCAGATCCTGCGGTTGGGTGCCAACCCAAGTGAGTTCACGGGCTCTTGACAGACATCGGAGGATGGTGCACACTCACTCGACCAGCGCAAAGCACAGGATCTCACGTGCGGACAT ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@2 reverse,start_position=653,length=150,errors=3,reference_id='chr1',reference_file='chr1.fasta' +GGCCTGACCTAACCTGTATAAGCTGTATCGTGGCTATGAGGGTAGTCGCCGGAGAAAACGTATGCTAACTGATTTTTAAGTCGGCGTGGCGCCGAAGCCGGATCGGTTGTAAGCTAGCCGGGCCTAGGGGTTCACCGTAACGGATTAGTC ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@3 reverse,start_position=282,length=150,errors=3,reference_id='chr1',reference_file='chr1.fasta' +GCGCATTTCACGCTCTCTACGAATGACCGCAACGATCAAATGGGCGAGAACAACTAATTCCGCTTCATGGGGTTTGTGGATTGTGACACAGCGCGCCCGCTACTGCGGGACGAGAGGACGCCCAATTCTGCCAAGGATGATTTAGGGTGT ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@4 reverse,start_position=611,length=150,errors=3,reference_id='chr1',reference_file='chr1.fasta' +CGGACATAGTGTATGGAAAGTGATCCAGAGTAGACCCGCGGGGGCCTGACCTAACCTATATAAGTTGTGTCGTAGCTATGAGGGTAGTCGCGGGAGAAAACGTATGCTTACTGATTTTTAAGTCGGCGTGGCGCCGAAGCCGGATCGGTT ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@5 reverse,start_position=379,length=150,errors=3,reference_id='chr1',reference_file='chr1.fasta' +CTACTCTGGATCAATTTCCATACACTATGTCCGTGGTAGCCTGGATCACAGCTCGCGCGCTGCTTAAGACCGAGCCTCCCACAACAGGCGTAAGAGGTGTAATGGTTGACCACCCTTTTTCAGTGAGAGTCATACGATTGCGGTGGGGTG ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@6 reverse,start_position=752,length=150,errors=3,reference_id='chr1',reference_file='chr1.fasta' +GGCCTAGGGGCTTAAGGGGTGTACCGACAGGATACGTACCGGAGACCCCCGCCGTGTTAGGGGAAGCCAAATGCACGTACAAGCATTCCTCCACGATGACTGACCTAAGAGATGTCCGCCCGTGAGATCCTGTGCTTTGCGCTGGTCGAG ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@7 reverse,start_position=318,length=150,errors=3,reference_id='chr1',reference_file='chr1.fasta' +CCGGCGACTACCCTCATAGCCACGATACAACTTATATAGGTTAGGTCAGGCCCCCGCGGGTCTACTCTGGATCACTTTCCATACACTATGTCCGTGGTAGCCTGGATCAGAGCTGGCGCGCTTCTTAAGACCGAGCCTCCCACAACAGGC ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@8 reverse,start_position=693,length=150,errors=3,reference_id='chr1',reference_file='chr1.fasta' +CTCGCCCATTTGATCGTTGCGGTCATTCGTAGAGAGCGTGAAATGCGCTATGCTCTTCGGTCCTAGGGGCTTAAGGGGTGTACCGACAGGATACGTACCGGAGACGCCCGGCGTGTTAGGGGAAGCCAAAAGAACGTACAAGCATTCCTC ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@9 reverse,start_position=121,length=150,errors=3,reference_id='chr1',reference_file='chr1.fasta' +TACCTGGTAAACAACCACGCCTGCGAAAACAGATGTAGGCCCGCAGCGGAGGGGTGACGACTTGAGTTCTATCAGGAAATCATCGCTGGATTTGAATTTGACTAATCCGTTACGGTGAACCCCTAGGCCCGGCTAGCTTACAACCGATCC ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@0 reverse,start_position=1374,length=150,errors=3,reference_id='chr2',reference_file='chr2.fasta' +ACGGGAGCCTAGGCAATCCCGACGTCCCGCGTGCTGGATAAAGAAAAGGCCGACTGCGCGAAATGAAGAATCGTCAATTTATTGTTCGCAGCTTTACAGTTCTTCTCCGCGGACGGGCAGAGTGGTTTTAAGACCAGGGTCTATGCACAA ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@1 reverse,start_position=839,length=150,errors=3,reference_id='chr2',reference_file='chr2.fasta' +AGGTCGTGACCCCTCCGCTGCGGGCCTACATCTGTTTTCGCAGGCGTGGTTGTTTACCAGGTATGGTGCTCATCTCTATTAGTCACGGGCAGCATGGTGTCAGCGAACCGCGCGTCTCCTAATTTCTGGTCTACCGATTTAGCCCCGGCA ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@2 reverse,start_position=965,length=150,errors=3,reference_id='chr2',reference_file='chr2.fasta' +TGGTCTAACGATTTAACCCCGGCAAATAACTTTGGATTGTGGTTGGAGAGTGCCAGAACTGACGGGCGCTGCCGTGGGGCTCCTAACTAAAAACGCCACGGACCTGGCTAACCTTCGTTGTTGACTATAACATTTGAGGGCGCTTCGGAT ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@3 reverse,start_position=1140,length=150,errors=3,reference_id='chr2',reference_file='chr2.fasta' +GGGTGGTAAACATAGATTGTATATAGTCAACGACATACACTCATTATTTTGCAATTGCGGCATCTCTACTATGTCTTAATTAGTTTTCCCGGATGGCGAAAACGATCTTACAGGAGAAGCGCTACGCTGGTTTGGAAGACACTTAGTATC ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@4 reverse,start_position=93,length=150,errors=3,reference_id='chr2',reference_file='chr2.fasta' +ACAGACATCGGAGGATGGTGCACACTCACTCGACCAGCGCAAAGAACAGGATCTCACGGGCGGACATCTCTTAGGTCAGTCATCGTGGAGGAATGCTTGTACGTCCTTTTGGCTTCCCCTAACACGGCGGGCGTCTCCCGTACGTATCCT ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@5 reverse,start_position=1051,length=150,errors=3,reference_id='chr2',reference_file='chr2.fasta' +AAGATCGTTTTCGCCATCCGGGAAAACTAATGAAGACATAGTTGAGATGCCGCAATTGCATAATAATGAGTGTCTGTCGTTGACTATATAGAATCTATGTCTACCACCCACATAATACTCTGGCAGTATGGGGAATCCGAAGCGCCCTCA ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@6 reverse,start_position=197,length=150,errors=3,reference_id='chr2',reference_file='chr2.fasta' +GCATCGGAGTTCGTGGGAGGGTCCACAGTCTTAACAGGAAGTAGCGATTCTGATCCACGTACCTGCGTAAGGTCTAGCAAGATCCTTAGGCCGATGAGGAAGGTTGTGAGTTTTAAATCCAGGGGTATAACCCCTACTACCACTGCTGCA ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@7 reverse,start_position=1023,length=150,errors=3,reference_id='chr2',reference_file='chr2.fasta' +TTCCAAACCAGCGTAGCGCTTCTCCTGTAAGATCGTTTTCGCCAGCCGGGAAAACTAATTAAGACACAGTTGAGATGCCGCAATTGCATAATAATGAGTGTATGTCGTTGACTATATAGAATCCATGTTTACCACCCACATAATACTCTG ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@8 reverse,start_position=1642,length=150,errors=3,reference_id='chr2',reference_file='chr2.fasta' +AGGCCCCCGCGGGTCTACTCTGGATCACTTTCCATACACTATGTCGGTGGTAGCCTGGATCACAGCTCGCGCGCTGCTTAAGACCGAGCCTCCCACAACAGGCATAAGATGTGTAATGTTTGACCACCCTTTTTCAGCGAGAGTCATACG ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@9 reverse,start_position=2014,length=150,errors=3,reference_id='chr2',reference_file='chr2.fasta' +GCGCTATGTTCTTCGGGCCTAGGGGCTTAAGGGGTGTACCGACAGGATACGTACCGGAGACGCCCGCCGTGTTAGGGGAAGCCAAAAGAACGTACAAGCATTCCTCCACGGTGACTGACCTAAGAGATGTCCGCCCGTGAGATCCTGTGC ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@0 reverse,start_position=1841,length=150,errors=3,reference_id='chr3',reference_file='chr3.fasta' +GCACTGCATGCTCGGATGGAACTCGGAGATCACCTGGAAAGTCAGTGTCATGCGTGGCGGTTTAGTGTTCGACGTAAGAAAAACCTCGAAGACGGACGAGGTATGCAGACATAGCAGCAGTGGTAGTAGGGGTTAAACCCCTGGATTTAA ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@1 reverse,start_position=1124,length=150,errors=3,reference_id='chr3',reference_file='chr3.fasta' +TTCCAGAGTATTATGTGGGTGGTAAACATAGATTCTATATAGTCAACGACATACACTCATTATTATGAAATTGCGGCATCTCAACTATGTCTTAATTAGTTTTCCCGGATGGCGAAAACGATCTTACAGGAGAAGCGCTACTCTGGTTTG ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@2 reverse,start_position=2153,length=150,errors=3,reference_id='chr3',reference_file='chr3.fasta' +AGCTGAGCGGTTCAGGCAGAGTCACTACATCTTATATGTAACCACACTCACATAGTTGTTGGGGGCTAACAGCTAAGGATTCCTGGTCCCTGGCACGGATATAGATCACAATCTGGAATTCCCTCCTAAGTACCCGCCCGGTATTCCCAC ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@3 reverse,start_position=2146,length=150,errors=3,reference_id='chr3',reference_file='chr3.fasta' +CCTTGAAAGCTGAGCGGTTCAGACAGAGTAACTACATCTTATATGTAATCACACTCACATAGTTGTTGGGGGCAAACAGCTAAGGATTCCTGGTCCCTGGCACGGATATAGATCACAATCAGGAATTCCCTCCTAAGTGCCCGCCCGGTA ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@4 reverse,start_position=857,length=150,errors=3,reference_id='chr3',reference_file='chr3.fasta' +TGGGGGCCTACATCTGTTTTCGCAGGCGTGGGTGTTTACCAGGTATGGTGCTCATCTCTATTAGTCACCGGCAGCATGGTGTCACCGAACCGCGCGTCTCCTAATATCTGGTCTACCGATTTAGCCCCGGCAAATAACTTTGGATTGTGG ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@5 reverse,start_position=1106,length=150,errors=3,reference_id='chr3',reference_file='chr3.fasta' +TTTTTCTTACGCCGAACACTAAACCGCCACGCATGACACTGACTTTCCAGGTGATCTCCGAGTTCAATCCGAGCATGCAGTGCGTCTTTCCAGTGTGAGACGGTCATAACTGTACGGAAAAGGCTTACCTTGATAGATGGGAAGAGTAAC ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@6 reverse,start_position=2867,length=150,errors=3,reference_id='chr3',reference_file='chr3.fasta' +GAGATGTCCGCCCGTGAGATCCTGTGCTTTGCGCTGGTCGAGTGAGTGTGCACCATCCTCCGATGTCTGTCTAGCGCCCGTGAACATACGTGGGTTGGCACCCAACCGCAGGATCTGATGTACCTTTGCATGCAGAGCTTATGCTTCCAT ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@7 reverse,start_position=1280,length=150,errors=3,reference_id='chr3',reference_file='chr3.fasta' +TCCAGTTAGTGCGCGACACTTTTCCCTGATTGGCAGTTCGGCTTATCACTATTCGTCAAGAATCGGCACATTTGATTCTCCCGCGCCATAAGACTGGGTTTTCCAAGTCTACTATGGAGTAGGTACATCGGAGGTTGTCCGCTCAGCGGG ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@8 reverse,start_position=1878,length=150,errors=3,reference_id='chr3',reference_file='chr3.fasta' +TGTTTACCACCCACATAATACTCTGGCAGTATGGGGAATCCGAAGCGCCCTCCAATGTTATAGTCAACAACGAATGTTAGCCAGGTCCGTGGCGTTTTTAGTTAGGAGCCGCACGGCAGTGCCCGTCAGTTCTGGCACTCTCCAACCACA ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@9 reverse,start_position=399,length=150,errors=3,reference_id='chr3',reference_file='chr3.fasta' +AACTTCAGCAACACTAATAGGCACCATTGGCAATGCATCGGTGCCCAGACTAGTTTCACTGTGGATCCTGTATCATTCGCCGTCGCGTCAAAAGTCGTTTATAACCGACCCATAACTATGGTGCTTAGACCGGACGACGCCGGGATCAAT ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII diff --git a/test/data/dream/ref.fasta b/test/data/dream/ref.fasta new file mode 100644 index 00000000..7fee5ae9 --- /dev/null +++ b/test/data/dream/ref.fasta @@ -0,0 +1,95 @@ +>chr1 +TATGCACCAGAGTATGGAAGCATAAGCTCTGCATGCAAAGGTACATCAGATCCTGCGGTTGGGTGCCAAC +CCAAGTGTGTTCACGGGCGCTTGACAGACATCGGAGGATGGTGCACACTCACTCGACCAGCGCAAAGCAC +AGGATCTCACGGGCGGACATCTCTTAGGTCAGTCATCGTGGAGGAATGCTTGTACGTTCTTTTGGCTTCC +CCTAACACGGCGGGCGTCTCCGGTACGTATCCTGTCGGTACACCCCTTAAGCCCCTAGGCCCGAAGAACA +TAGCGCATTTCACGCTCTCTACGAATGACCGCAACGATCAAATGGGCGAGAACAACTAATTCCGATTCAT +GGGGTTTGTGGATTGTGACACAGCGCGCCCGCTACTGCGGGACGTGAGGACGCCCAATTCTGCCAAGGAT +TATTTAGGGTGTTTCACTAGAGTTATGCGCCGACCCCGGTTGGACCAGCTTGCATTCGAAACTGCGTTAC +ACAGCACCCCACCGCAATCGTATGACTCTCGCTGAAAAAGGGTGGTCAACCATTACACCTCTTATGCCTG +TTGTGGGAGGCTCGGTCTTAAGCAGCGCGCGAGCTGTGATCCAGGCTACCACGGACATAGTGTATGGAAA +GTGATCCAGAGTAGACCCGCGGGGGCCTGACCTAACCTATATAAGTTGTATCGTGGCTATGAGGGTAGTC +GCCGGAGAAAACGTATGCTTACTGATTTTTAAGTCGGCGTGGCGCCGAAGCCGGATCGGTTGTAAGCTAG +CCGGGCCTAGGGGTTCACCGTAACGGATTAGTCAAATTAAAATCCAGCGATGACTTCCTGATAGAACTCA +AGTCGTGACCCCTCCGCTGCGGGCCTACATCTGTTTTCGCAGGCGTGGTTGTTTACCAGGTATGGTGCTC +ATCTCTATTAGTCACGGGCAGCATGGTGTCACCGAACCGCGCGTCTCCTAATATCTGGTCTACCGATTTA +GCCCCGGCAAATAACTTTGGATTGTGGTTGGAGAGTGCCAGAA +>chr2 +TATGCACCAGAGTATGGAAGCATAAGCTCTGCATGCAAAGGTACATCAGATCCTGCGGTTGGGTGCCAAC +CCAAGTGTGTTCACGGGCGCTTGACAGACATCGGAGGATGGTGCACACTCACTCGACCAGCGCAAAGCAC +AGGATCTCACGGGCGGACATCTCTTAGGTCAGTCATCGTGGAGGAATGCTTGTACGTTCTTTTGGCTTCC +CCTAACACGGCGGGCGTCTCCGGTACGTATCCTGTCGGTACACCCCTTAAGCCCCTAGGCCCGAAGAACA +TAGCGCATTTCACGCTCTCTACGAATGACCGCAACGATCAAATGGGCGAGAACAACTAATTCCGATTCAT +GGGGTTTGTGGATTGTGACACAGCGCGCCCGCTACTGCGGGACGTGAGGACGCCCAATTCTGCCAAGGAT +TATTTAGGGTGTTTCACTAGAGTTATGCGCCGACCCCGGTTGGACCAGCTTGCATTCGAAACTGCGTTAC +ACAGCACCCCACCGCAATCGTATGACTCTCGCTGAAAAAGGGTGGTCAACCATTACACCTCTTATGCCTG +TTGTGGGAGGCTCGGTCTTAAGCAGCGCGCGAGCTGTGATCCAGGCTACCACGGACATAGTGTATGGAAA +GTGATCCAGAGTAGACCCGCGGGGGCCTGACCTAACCTATATAAGTTGTATCGTGGCTATGAGGGTAGTC +GCCGGAGAAAACGTATGCTTACTGATTTTTAAGTCGGCGTGGCGCCGAAGCCGGATCGGTTGTAAGCTAG +CCGGGCCTAGGGGTTCACCGTAACGGATTAGTCAAATTAAAATCCAGCGATGACTTCCTGATAGAACTCA +AGTCGTGACCCCTCCGCTGCGGGCCTACATCTGTTTTCGCAGGCGTGGTTGTTTACCAGGTATGGTGCTC +ATCTCTATTAGTCACGGGCAGCATGGTGTCACCGAACCGCGCGTCTCCTAATATCTGGTCTACCGATTTA +GCCCCGGCAAATAACTTTGGATTGTGGTTGGAGAGTGCCAGAACTGACGGGCGCTGCCGTGGGGCTCCTA +ACTAAAAACGCCACGGACCTGGCTAACATTCGTTGTTGACTATAACATTTGAGGGCGCTTCGGATTCCCC +ATACTGCCAGAGTATTATGTGGGTGGTAAACATAGATTCTATATAGTCAACGACATACACTCATTATTAT +GCAATTGCGGCATCTCAACTATGTCTTAATTAGTTTTCCCGGATGGCGAAAACGATCTTACAGGAGAAGC +GCTACGCTGGTTTGGAAGACACTTAGTATCCTAGTAGTATGGGCTTGTGCGGGTCAACGGGCGCCGTCAA +AGCGCACACATATCTGGTGGGGACGGTGTCCCCTATCGGCGCACACGGGAGCCTAGGCAATCCCGACGTC +CCGCGTGCTGGATAAAGAAAAGGCCGACTGCGCGAAATGAAGAATCGTCAATTTATTGTTGGCAGCTTTA +CAGTTCTTCTCCGCGGGCGGGCAGAGTGGTTTTAAGACCGGGGTCTATGCACAAGGGTGGAGCTTGATTA +CTATCATCGAAGGGTGACTTGCCGTGTTACAATCGACAAGCGAACGGCCGACTGCTTCGGCCCGCTGAGC +GGACAACCTCCGATGTACCTACTCCATAGTAGACTTGGAAAACCCAGTCTTATGGCGCGGGGGAATCAAA +TGTGCCGATTCTTGACGAATAGTTCTAAGCCGAACTGCCAATCAGGGAAAAGTGTCGCGCACTAACTGGA +GCTGAAACCGCCAATAGTGTCTAAGTTACTCTTCCCATCTATCAAGGTAAGCCTTTTCCGTACAGTTATG +ACCATCTCACACTGGAAAGACGCACTGCATGCTCGGATGGAACTCGGAGATCACCTGGAAAGTCAGTGTC +ATGCGTGGCGGTTTAGTGTTCGACGTAAGAAAAACCTGGAAGACGGACGAGGTATGCAGACATTGCAGCA +GTGGTAGTAGGGGTTATACCCCTGGATTTAAAACTCACAACCTTCCTCATAGGCCTAAGGATCTTGCTAG +ACCTTAAGCAGGTACGTGGATCAGAATCGCTACTTCCTGTTAAGACTGTGGACCCTCCCACAAACTCCGA +TGCGAGCTAGGACGTCTTTAGCTCAGCTTGAGAATACTCCTATTTGCCTTGAAAGCTGAGCGGTTCAGAC +AGAGTAACTACATCTTATATGTAACCACACTCACATAGTTGTTGGGGGCAAACAGCTAAGGATTCCTGGT +CCCTGGCACGGATATAGATCACAATCTGGAATTCCCTCCTAAGTACCCGCCCGGTATTCC +>chr3 +TATGCACCAGAGTATGGAAGCATAAGCTCTGCATGCAAAGGTACATCAGATCCTGCGGTTGGGTGCCAAC +CCAAGTGTGTTCACGGGCGCTTGACAGACATCGGAGGATGGTGCACACTCACTCGACCAGCGCAAAGCAC +AGGATCTCACGGGCGGACATCTCTTAGGTCAGTCATCGTGGAGGAATGCTTGTACGTTCTTTTGGCTTCC +CCTAACACGGCGGGCGTCTCCGGTACGTATCCTGTCGGTACACCCCTTAAGCCCCTAGGCCCGAAGAACA +TAGCGCATTTCACGCTCTCTACGAATGACCGCAACGATCAAATGGGCGAGAACAACTAATTCCGATTCAT +GGGGTTTGTGGATTGTGACACAGCGCGCCCGCTACTGCGGGACGTGAGGACGCCCAATTCTGCCAAGGAT +TATTTAGGGTGTTTCACTAGAGTTATGCGCCGACCCCGGTTGGACCAGCTTGCATTCGAAACTGCGTTAC +ACAGCACCCCACCGCAATCGTATGACTCTCGCTGAAAAAGGGTGGTCAACCATTACACCTCTTATGCCTG +TTGTGGGAGGCTCGGTCTTAAGCAGCGCGCGAGCTGTGATCCAGGCTACCACGGACATAGTGTATGGAAA +GTGATCCAGAGTAGACCCGCGGGGGCCTGACCTAACCTATATAAGTTGTATCGTGGCTATGAGGGTAGTC +GCCGGAGAAAACGTATGCTTACTGATTTTTAAGTCGGCGTGGCGCCGAAGCCGGATCGGTTGTAAGCTAG +CCGGGCCTAGGGGTTCACCGTAACGGATTAGTCAAATTAAAATCCAGCGATGACTTCCTGATAGAACTCA +AGTCGTGACCCCTCCGCTGCGGGCCTACATCTGTTTTCGCAGGCGTGGTTGTTTACCAGGTATGGTGCTC +ATCTCTATTAGTCACGGGCAGCATGGTGTCACCGAACCGCGCGTCTCCTAATATCTGGTCTACCGATTTA +GCCCCGGCAAATAACTTTGGATTGTGGTTGGAGAGTGCCAGAACTGACGGGCGCTGCCGTGGGGCTCCTA +ACTAAAAACGCCACGGACCTGGCTAACATTCGTTGTTGACTATAACATTTGAGGGCGCTTCGGATTCCCC +ATACTGCCAGAGTATTATGTGGGTGGTAAACATAGATTCTATATAGTCAACGACATACACTCATTATTAT +GCAATTGCGGCATCTCAACTATGTCTTAATTAGTTTTCCCGGATGGCGAAAACGATCTTACAGGAGAAGC +GCTACGCTGGTTTGGAAGACACTTAGTATCCTAGTAGTATGGGCTTGTGCGGGTCAACGGGCGCCGTCAA +AGCGCACACATATCTGGTGGGGACGGTGTCCCCTATCGGCGCACACGGGAGCCTAGGCAATCCCGACGTC +CCGCGTGCTGGATAAAGAAAAGGCCGACTGCGCGAAATGAAGAATCGTCAATTTATTGTTGGCAGCTTTA +CAGTTCTTCTCCGCGGGCGGGCAGAGTGGTTTTAAGACCGGGGTCTATGCACAAGGGTGGAGCTTGATTA +CTATCATCGAAGGGTGACTTGCCGTGTTACAATCGACAAGCGAACGGCCGACTGCTTCGGCCCGCTGAGC +GGACAACCTCCGATGTACCTACTCCATAGTAGACTTGGAAAACCCAGTCTTATGGCGCGGGGGAATCAAA +TGTGCCGATTCTTGACGAATAGTTCTAAGCCGAACTGCCAATCAGGGAAAAGTGTCGCGCACTAACTGGA +GCTGAAACCGCCAATAGTGTCTAAGTTACTCTTCCCATCTATCAAGGTAAGCCTTTTCCGTACAGTTATG +ACCATCTCACACTGGAAAGACGCACTGCATGCTCGGATGGAACTCGGAGATCACCTGGAAAGTCAGTGTC +ATGCGTGGCGGTTTAGTGTTCGACGTAAGAAAAACCTGGAAGACGGACGAGGTATGCAGACATTGCAGCA +GTGGTAGTAGGGGTTATACCCCTGGATTTAAAACTCACAACCTTCCTCATAGGCCTAAGGATCTTGCTAG +ACCTTAAGCAGGTACGTGGATCAGAATCGCTACTTCCTGTTAAGACTGTGGACCCTCCCACAAACTCCGA +TGCGAGCTAGGACGTCTTTAGCTCAGCTTGAGAATACTCCTATTTGCCTTGAAAGCTGAGCGGTTCAGAC +AGAGTAACTACATCTTATATGTAACCACACTCACATAGTTGTTGGGGGCAAACAGCTAAGGATTCCTGGT +CCCTGGCACGGATATAGATCACAATCTGGAATTCCCTCCTAAGTACCCGCCCGGTATTCCCACACTCTGT +GAGACTACGTGCGCGTGTAGTATCGTGAGGTCCGCGGTGGAAAAGGGTTTGGCACTTACTACTCAGTGAC +CGTATACACGGAGATTCGCACTGATGTGGAATATGAAATCCCACATCCCCTGAGAATTTCGAATCTGAGG +ATGAGTATATGCCTCGATGTAGGCCAGGAGCATTGATCCCGGCGTCGTCCGGTCTAAGCACCATAGTTAT +GGGTCGGTTATAAACGAATTTTGACGCGACGGGGAATGATACAGGATCCACAGTGAAACTAGTCTGGGCA +CCGATGCATTGCCAATGGTGCCTATTAGTGTTCCTGAAGTTGACTACAGTCCGTACCTCAGTATAGCGCT +GGTTACTAGTAGCGAAGTTGAGATTGTAGCTCGTACTCCAATGACCACCCGAGGGGGTGGTGCAATGTGC +AGGTAGGGGTAGGTTCCTGTAGTTCGGAGGTCAACCTCTTGTTGACGTCTGATGCGAGCCTGACTAAAAT +GCGCTTCTTCACTTTTGTTCGTATAGTCACTATATTCGCGAAACCGTCGCTTTTATTATAGACGGCCTAC +TTCTTTGACCGAGCCTCATAGTCTGCACTCGGGACGAAACTAACGGCTGTTCCACTCATGACCTACGCGC +CTGAGTGATCAAATAATCAAAAGAATGCGCCGCTATATGTAGGGGGCCCATGTATTGGCTGACTTTGAAA +ACACTCTGACACGAACTTGA diff --git a/test/data/dream/ref_meta.txt b/test/data/dream/ref_meta.txt new file mode 100644 index 00000000..5dbfaa67 --- /dev/null +++ b/test/data/dream/ref_meta.txt @@ -0,0 +1,3 @@ +chr1 0 1023 +chr2 1 2300 +chr3 2 3030 diff --git a/test/data/dream/seg_meta150overlap16bins.txt b/test/data/dream/seg_meta150overlap16bins.txt new file mode 100644 index 00000000..9776fd3d --- /dev/null +++ b/test/data/dream/seg_meta150overlap16bins.txt @@ -0,0 +1,16 @@ +0 0 0 492 +1 0 342 492 +2 0 684 339 +3 1 0 534 +4 1 384 534 +5 1 768 534 +6 1 1152 534 +7 1 1536 534 +8 1 1920 380 +9 2 0 583 +10 2 433 583 +11 2 866 583 +12 2 1299 583 +13 2 1732 583 +14 2 2165 583 +15 2 2598 432 diff --git a/test/data/dream/seg_meta150overlap4bins.txt b/test/data/dream/seg_meta150overlap4bins.txt new file mode 100644 index 00000000..df37c68b --- /dev/null +++ b/test/data/dream/seg_meta150overlap4bins.txt @@ -0,0 +1,4 @@ +0 0 0 1023 +1 1 0 2300 +2 2 0 1666 +3 2 1516 1514 diff --git a/test/data/simulate_input.sh b/test/data/simulate_input.sh index 5caeb858..dab9acc1 100755 --- a/test/data/simulate_input.sh +++ b/test/data/simulate_input.sh @@ -39,4 +39,6 @@ done ./search/cli_test_input.sh $SEED $BIN_NUMBER $HAPLOTYPE_COUNT +./dream/cli_test_input.sh $SEED + ./consolidate/cli_test_input.sh diff --git a/test/data/split/api_test_input.sh b/test/data/split/api_test_input.sh index dbd365a5..e9624d0c 100755 --- a/test/data/split/api_test_input.sh +++ b/test/data/split/api_test_input.sh @@ -26,6 +26,6 @@ do cat chr*.fasta > ${out_dir}/ref.fasta rm chr*.fasta - valik split ${out_dir}/ref.fasta --overlap ${overlap} --bins ${bins} --reference-output ${out_dir}/reference_metadata.txt --segment-output ${out_dir}/reference_segments.txt + valik split ${out_dir}/ref.fasta --overlap ${overlap} --bins ${bins} --ref-meta ${out_dir}/reference_metadata.txt --seg-meta ${out_dir}/reference_segments.txt done done diff --git a/test/data/split/cli_test_input.sh b/test/data/split/cli_test_input.sh index 9870ea5c..ffb63c9d 100755 --- a/test/data/split/cli_test_input.sh +++ b/test/data/split/cli_test_input.sh @@ -43,6 +43,7 @@ generate_local_matches \ --min-match-length $read_length \ --max-match-length $read_length \ --verbose-ids \ + --ref-len $ref_len \ --seed $SEED \ $ref_out diff --git a/test/data/update_datasources.sh b/test/data/update_datasources.sh index 34330d81..5b3bc1f8 100755 --- a/test/data/update_datasources.sh +++ b/test/data/update_datasources.sh @@ -118,3 +118,20 @@ do echo -n $sha >> ../datasources.cmake echo ")" >> ../datasources.cmake done + +echo -e "\n" >> ../datasources.cmake + +cd ../dream + +for file in * +do + [[ -d $file ]] && continue # skip folders + [[ $file == *.sh ]] && continue + echo -n "declare_datasource (FILE ${file} + URL \${CMAKE_SOURCE_DIR}/test/data/dream/${file} + URL_HASH SHA256=" >> ../datasources.cmake + + sha=($(shasum -a 256 $file)) + echo -n $sha >> ../datasources.cmake + echo ")" >> ../datasources.cmake +done \ No newline at end of file