From 65b091da5bac4cec21c0b67f3690a57b143d626e Mon Sep 17 00:00:00 2001 From: Evelin Aasna Date: Tue, 4 Jul 2023 15:48:28 +0200 Subject: [PATCH] Call Stellar search functions (#90) * Add stellar3 submodule * Add seqan2 submodule * Update lib/stellar3 (seqan2 namespace) * Load Stellar3 database sequences * Load database sequences from all reference files * Add flag to toggle shared memory vs distributed (default) execution * Each threads imports cart query sequences * sharg::parser instead of seqan3::argument_parser * Workaround for g++-10 template deduction * [FIX] error handling; lambda with implicit void return type * Environment variable parsing as struct * Supress stellar3 warnings * Overwrite (not append) stellar search output files * Compile stellar diagnostics * Stream stellar diagnostics * Share databaseIDMap between threads * Test data for DREAM-Stellar CLI tests * [FIX] empty output when no valik matches found * [TEST] DREAM-Stellar output shared memory vs distributed * [FIX] always write out matches * Compare DREAM-Stellar matches * Launch Stellar3 search * Calculate Stellar qGram length * Refactor search time printing * Pump stellar version * Stellar search reverse strand * Update CLI test suit after raptor_data_simulation update * Delete stellar_call.hpp * Apply suggestions from code review * Pump lib/stellar3 version --- .gitmodules | 9 + CMakeLists.txt | 7 +- .../argument_parsing/consolidate.hpp | 4 +- .../utilities/consolidate/stellar_match.hpp | 2 +- include/valik/argument_parsing/build.hpp | 4 +- include/valik/argument_parsing/search.hpp | 4 +- include/valik/argument_parsing/shared.hpp | 17 +- include/valik/argument_parsing/split.hpp | 4 +- include/valik/argument_parsing/top_level.hpp | 2 +- include/valik/argument_parsing/validators.hpp | 18 +- include/valik/index.hpp | 12 +- include/valik/search/env_var_pack.hpp | 71 + .../valik/search/search_time_statistics.hpp | 19 +- include/valik/shared.hpp | 47 +- include/valik/valik.hpp | 14 +- lib/raptor_data_simulation | 2 +- lib/seqan | 1 + lib/sharg | 1 + lib/stellar3 | 1 + src/CMakeLists.txt | 4 + src/argument_parsing/build.cpp | 101 +- src/argument_parsing/consolidate.cpp | 33 +- src/argument_parsing/search.cpp | 215 ++- src/argument_parsing/shared.cpp | 6 +- src/argument_parsing/split.cpp | 52 +- src/argument_parsing/top_level.cpp | 2 +- src/valik_main.cpp | 4 +- src/valik_search.cpp | 411 +++-- test/cli/CMakeLists.txt | 20 + test/cli/cli_test.hpp | 29 +- test/cli/dream_test.cpp | 100 + test/cli/valik_options_test.cpp | 10 +- test/cli/valik_test.cpp | 3 +- .../16bins50overlap_dream_all.gff.out | 672 ------- .../8bins50overlap_dream_all.gff.out | 336 ---- test/data/consolidate/cli_test_input.sh | 11 +- test/data/create_output.sh | 6 + test/data/datasources.cmake | 62 +- test/data/dream/16bins13window.ibf | Bin 0 -> 32891 bytes test/data/dream/16bins13window1error.gff | 102 ++ test/data/dream/16bins13window1error.gff.out | 1560 ++++++++++++++++ test/data/dream/16bins15window.ibf | Bin 0 -> 32891 bytes test/data/dream/16bins15window1error.gff | 104 ++ test/data/dream/16bins15window1error.gff.out | 1601 +++++++++++++++++ test/data/dream/4bins13window.ibf | Bin 0 -> 32891 bytes test/data/dream/4bins13window1error.gff | 80 + test/data/dream/4bins13window1error.gff.out | 1107 ++++++++++++ test/data/dream/4bins15window.ibf | Bin 0 -> 32891 bytes test/data/dream/4bins15window1error.gff | 80 + test/data/dream/4bins15window1error.gff.out | 1107 ++++++++++++ test/data/dream/cli_test_input.sh | 44 + test/data/dream/cli_test_output.sh | 56 + test/data/dream/dummy_reads.fastq | 24 + test/data/dream/query.fastq | 120 ++ test/data/dream/ref.fasta | 95 + test/data/dream/ref_meta.txt | 3 + test/data/dream/seg_meta150overlap16bins.txt | 16 + test/data/dream/seg_meta150overlap4bins.txt | 4 + test/data/simulate_input.sh | 2 + test/data/split/api_test_input.sh | 2 +- test/data/split/cli_test_input.sh | 1 + test/data/update_datasources.sh | 17 + 62 files changed, 7060 insertions(+), 1381 deletions(-) create mode 100644 include/valik/search/env_var_pack.hpp create mode 160000 lib/seqan create mode 160000 lib/sharg create mode 160000 lib/stellar3 create mode 100644 test/cli/dream_test.cpp delete mode 100644 test/data/consolidate/16bins50overlap_dream_all.gff.out delete mode 100644 test/data/consolidate/8bins50overlap_dream_all.gff.out create mode 100644 test/data/dream/16bins13window.ibf create mode 100644 test/data/dream/16bins13window1error.gff create mode 100644 test/data/dream/16bins13window1error.gff.out create mode 100644 test/data/dream/16bins15window.ibf create mode 100644 test/data/dream/16bins15window1error.gff create mode 100644 test/data/dream/16bins15window1error.gff.out create mode 100644 test/data/dream/4bins13window.ibf create mode 100644 test/data/dream/4bins13window1error.gff create mode 100644 test/data/dream/4bins13window1error.gff.out create mode 100644 test/data/dream/4bins15window.ibf create mode 100644 test/data/dream/4bins15window1error.gff create mode 100644 test/data/dream/4bins15window1error.gff.out create mode 100755 test/data/dream/cli_test_input.sh create mode 100755 test/data/dream/cli_test_output.sh create mode 100644 test/data/dream/dummy_reads.fastq create mode 100644 test/data/dream/query.fastq create mode 100644 test/data/dream/ref.fasta create mode 100644 test/data/dream/ref_meta.txt create mode 100644 test/data/dream/seg_meta150overlap16bins.txt create mode 100644 test/data/dream/seg_meta150overlap4bins.txt diff --git a/.gitmodules b/.gitmodules index aa4c926d..bf13091e 100644 --- a/.gitmodules +++ b/.gitmodules @@ -11,3 +11,12 @@ [submodule "lib/raptor_data_simulation"] path = lib/raptor_data_simulation url = git@github.com:eaasna/raptor_data_simulation.git +[submodule "lib/stellar3"] + path = lib/stellar3 + url = git@github.com:seqan/stellar3.git +[submodule "lib/seqan"] + path = lib/seqan + url = git@github.com:seqan/seqan.git +[submodule "lib/sharg"] + path = lib/sharg + url = https://github.com/seqan/sharg-parser.git diff --git a/CMakeLists.txt b/CMakeLists.txt index 8c2d3eae..c3934893 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -35,8 +35,11 @@ list (APPEND CMAKE_MODULE_PATH "${SEQAN3_CLONE_DIR}/test/cmake/") list (APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/test/cmake/") # Use ccache. -include (seqan_require_ccache) -seqan_require_ccache () +include (seqan3_require_ccache) +seqan3_require_ccache () + +# Dependency: Sharg. +find_package (Sharg REQUIRED PATHS lib/sharg/build_system) # Add the application. add_subdirectory (src) diff --git a/include/utilities/argument_parsing/consolidate.hpp b/include/utilities/argument_parsing/consolidate.hpp index 585bda78..0c74d49e 100644 --- a/include/utilities/argument_parsing/consolidate.hpp +++ b/include/utilities/argument_parsing/consolidate.hpp @@ -6,7 +6,7 @@ namespace valik::app { -void init_consolidation_parser(seqan3::argument_parser & parser, consolidation_arguments & arguments); -void run_consolidate(seqan3::argument_parser & parser); +void init_consolidation_parser(sharg::parser & parser, consolidation_arguments & arguments); +void run_consolidate(sharg::parser & parser); } // namespace valik::app diff --git a/include/utilities/consolidate/stellar_match.hpp b/include/utilities/consolidate/stellar_match.hpp index 6c453f03..7f7fdec7 100644 --- a/include/utilities/consolidate/stellar_match.hpp +++ b/include/utilities/consolidate/stellar_match.hpp @@ -64,7 +64,7 @@ struct stellar_match } } - std::string to_string() + std::string to_string() const { std::string match_str = dname; match_str += "\tStellar\teps-matches\t"; diff --git a/include/valik/argument_parsing/build.hpp b/include/valik/argument_parsing/build.hpp index 6df5a87b..9a363844 100644 --- a/include/valik/argument_parsing/build.hpp +++ b/include/valik/argument_parsing/build.hpp @@ -5,7 +5,7 @@ namespace valik::app { -void init_build_parser(seqan3::argument_parser & parser, build_arguments & arguments); -void run_build(seqan3::argument_parser & parser); +void init_build_parser(sharg::parser & parser, build_arguments & arguments); +void run_build(sharg::parser & parser); } // namespace valik::app diff --git a/include/valik/argument_parsing/search.hpp b/include/valik/argument_parsing/search.hpp index 916e3833..38ddbee2 100644 --- a/include/valik/argument_parsing/search.hpp +++ b/include/valik/argument_parsing/search.hpp @@ -5,7 +5,7 @@ namespace valik::app { -void init_search_parser(seqan3::argument_parser & parser, search_arguments & arguments); -void run_search(seqan3::argument_parser & parser); +void init_search_parser(sharg::parser & parser, search_arguments & arguments); +void run_search(sharg::parser & parser); } // namespace valik::app diff --git a/include/valik/argument_parsing/shared.hpp b/include/valik/argument_parsing/shared.hpp index 3427129e..bb10ebd7 100644 --- a/include/valik/argument_parsing/shared.hpp +++ b/include/valik/argument_parsing/shared.hpp @@ -6,20 +6,7 @@ namespace valik::app { -void init_shared_meta(seqan3::argument_parser & parser); -void try_parsing(seqan3::argument_parser & parser); - -template -void init_shared_options(seqan3::argument_parser & parser, arguments_t & arguments) -{ - static_assert(std::same_as || std::same_as); - - parser.add_option(arguments.threads, - '\0', - "threads", - "Choose the number of threads.", - seqan3::option_spec::standard, - positive_integer_validator{}); -} +void init_shared_meta(sharg::parser & parser); +void try_parsing(sharg::parser & parser); } // namespace valik::app diff --git a/include/valik/argument_parsing/split.hpp b/include/valik/argument_parsing/split.hpp index 17dedd53..393936d8 100644 --- a/include/valik/argument_parsing/split.hpp +++ b/include/valik/argument_parsing/split.hpp @@ -5,7 +5,7 @@ namespace valik::app { -void init_split_parser(seqan3::argument_parser & parser, split_arguments & arguments); -void run_split(seqan3::argument_parser & parser); +void init_split_parser(sharg::parser & parser, split_arguments & arguments); +void run_split(sharg::parser & parser); } // namespace valik::app diff --git a/include/valik/argument_parsing/top_level.hpp b/include/valik/argument_parsing/top_level.hpp index eb4766e6..b0937486 100644 --- a/include/valik/argument_parsing/top_level.hpp +++ b/include/valik/argument_parsing/top_level.hpp @@ -5,6 +5,6 @@ namespace valik::app { -void init_top_level_parser(seqan3::argument_parser & parser); +void init_top_level_parser(sharg::parser & parser); } // namespace valik::app diff --git a/include/valik/argument_parsing/validators.hpp b/include/valik/argument_parsing/validators.hpp index e3bfdb07..df593c46 100644 --- a/include/valik/argument_parsing/validators.hpp +++ b/include/valik/argument_parsing/validators.hpp @@ -1,6 +1,6 @@ #pragma once -#include +#include #include namespace valik::app @@ -13,7 +13,7 @@ struct power_of_two_validator void operator() (option_value_type const & val) const { if (!std::has_single_bit(val)) - throw seqan3::validation_error{"The value must be a power of two."}; + throw sharg::validation_error{"The value must be a power of two."}; } static std::string get_help_page_message () @@ -39,7 +39,7 @@ class positive_integer_validator void operator()(option_value_type const & val) const { if (!is_zero_positive && !val) - throw seqan3::validation_error{"The value must be a positive integer."}; + throw sharg::validation_error{"The value must be a positive integer."}; } std::string get_help_page_message () const @@ -71,7 +71,7 @@ class size_validator void operator()(option_value_type const & cmp) const { if (!std::regex_match(cmp, expression)) - throw seqan3::validation_error{seqan3::detail::to_string("Value ", cmp, " must be an integer followed by [k,m,g,t] (case insensitive).")}; + throw sharg::validation_error{seqan3::detail::to_string("Value ", cmp, " must be an integer followed by [k,m,g,t] (case insensitive).")}; } template @@ -105,7 +105,7 @@ class bin_validator void operator() (option_value_type const & values) const { if (values.empty()) - throw seqan3::validation_error{"The list of input files cannot be empty."}; + throw sharg::validation_error{"The list of input files cannot be empty."}; for (auto && value : values) { @@ -113,7 +113,7 @@ class bin_validator { sequence_file_validator(value); } - catch (seqan3::validation_error const & exception) + catch (sharg::validation_error const & exception) { if (value.extension() == ".minimiser") minimiser_file_validator(value); @@ -142,7 +142,7 @@ class bin_validator for (auto && value : values) if (is_minimiser_input != (value.extension() == ".minimiser")) - throw seqan3::validation_error{"You cannot mix sequence and minimiser files as input."}; + throw sharg::validation_error{"You cannot mix sequence and minimiser files as input."}; } std::string get_help_page_message() const @@ -184,10 +184,10 @@ class bin_validator } return result; }()}; - seqan3::input_file_validator<> minimiser_file_validator{{"minimiser"}}; + sharg::input_file_validator minimiser_file_validator{{"minimiser"}}; public: - seqan3::input_file_validator<> sequence_file_validator{{combined_extensions}}; + sharg::input_file_validator sequence_file_validator{{combined_extensions}}; }; } // namespace valik::app diff --git a/include/valik/index.hpp b/include/valik/index.hpp index 1b55a7f3..58066e5c 100644 --- a/include/valik/index.hpp +++ b/include/valik/index.hpp @@ -7,7 +7,7 @@ #pragma once -#include +#include #include #include <../lib/raptor/include/raptor/hierarchical_interleaved_bloom_filter.hpp> @@ -155,19 +155,19 @@ class valik_index if ((data_layout_mode == seqan3::data_layout::compressed && !compressed_) || (data_layout_mode == seqan3::data_layout::uncompressed && compressed_)) { - throw seqan3::argument_parser_error{"Data layouts of serialised and specified index differ."}; + throw sharg::validation_error{"Data layouts of serialised and specified index differ."}; } archive(bin_path_); archive(ibf_); } catch (std::exception const & e) { - throw seqan3::argument_parser_error{"Cannot read index: " + std::string{e.what()}}; + throw sharg::validation_error{"Cannot read index: " + std::string{e.what()}}; } } else { - throw seqan3::argument_parser_error{"Unsupported index version. Check valik upgrade."}; // GCOVR_EXCL_LINE + throw sharg::validation_error{"Unsupported index version. Check valik upgrade."}; // GCOVR_EXCL_LINE } } @@ -195,13 +195,13 @@ class valik_index // GCOVR_EXCL_START catch (std::exception const & e) { - throw seqan3::argument_parser_error{"Cannot read index: " + std::string{e.what()}}; + throw sharg::validation_error{"Cannot read index: " + std::string{e.what()}}; } // GCOVR_EXCL_STOP } else { - throw seqan3::argument_parser_error{"Unsupported index version. Check valik upgrade."}; // GCOVR_EXCL_LINE + throw sharg::validation_error{"Unsupported index version. Check valik upgrade."}; // GCOVR_EXCL_LINE } } //!\endcond diff --git a/include/valik/search/env_var_pack.hpp b/include/valik/search/env_var_pack.hpp new file mode 100644 index 00000000..1b3f5752 --- /dev/null +++ b/include/valik/search/env_var_pack.hpp @@ -0,0 +1,71 @@ +#pragma once + +#include +#include +#include +#include +#include + +namespace valik +{ + +struct env_var_pack +{ + std::filesystem::path tmp_path; + std::string stellar_exec{"stellar"}; + std::string merge_exec{"cat"}; + + env_var_pack() + { + // the location of bin-query fasta files can be overwritten with an environment variable + if (auto ptr = std::getenv("VALIK_TMP"); ptr != nullptr) + { + tmp_path = std::string(ptr); + std::filesystem::file_status s = status(tmp_path); + std::filesystem::perms p = s.permissions(); + bool is_writable = (std::filesystem::perms::none != (p & std::filesystem::perms::owner_write)) | + (std::filesystem::perms::none != (p & std::filesystem::perms::group_write)) | + (std::filesystem::perms::none != (p & std::filesystem::perms::others_write)); + + if (!exists(tmp_path) | !is_directory(s) | !is_writable ) + throw std::runtime_error("Directory $VALIK_TMP=" + std::string(ptr) + " must exist and write permission must be granted"); + } + + else + tmp_path = create_temporary_path("valik/stellar_call_XXXXXX"); + + if (auto ptr = std::getenv("VALIK_STELLAR"); ptr != nullptr) + stellar_exec = std::string(ptr); + + if (auto ptr = std::getenv("VALIK_MERGE"); ptr != nullptr) + merge_exec = std::string(ptr); + } + + /* Creates a temporary folder in the temporary path of the OS + * + * \param name: a name with 'XXXXXX' at the end, e.g.: valik/call_XXXXXX + * \return returns the name with the 'XXXXXX' replaced and the directory created + * + * throws if any errors occurs + */ + static std::filesystem::path create_temporary_path(std::filesystem::path name) + { + if (!name.is_relative()) + { + throw std::runtime_error("Must be given a relative file"); + } + auto path = std::filesystem::temp_directory_path() / name; + auto path_str = path.native(); + create_directories(path.parent_path()); + auto str = std::vector(path_str.size()+1, '\0'); // Must include an extra character to include a 0 + std::copy_n(path_str.data(), path_str.size(), str.data()); + auto ptr = mkdtemp(str.data()); + if (!ptr) + { + throw std::runtime_error("Could not create temporary folder: " + path_str); + } + return str.data(); + } +}; + +} diff --git a/include/valik/search/search_time_statistics.hpp b/include/valik/search/search_time_statistics.hpp index 7a7f13a8..98f7edf8 100644 --- a/include/valik/search/search_time_statistics.hpp +++ b/include/valik/search/search_time_statistics.hpp @@ -35,10 +35,9 @@ struct search_time_statistics } }; -inline void write_time_statistics(search_time_statistics const & time_statistics, search_arguments const & arguments) +inline void write_time_statistics(search_time_statistics const & time_statistics, std::string const & time_file) { - std::filesystem::path file_path{arguments.out_file}; - file_path += ".time"; + std::filesystem::path file_path{time_file}; std::ofstream file_handle(file_path, std::ofstream::app); file_handle << "IBF I/O\tReads I/O\tPrefilter\tMin cart time\tAvg cart time\tMax cart time\tNr carts\n"; @@ -46,11 +45,15 @@ inline void write_time_statistics(search_time_statistics const & time_statistics << std::setprecision(2) << time_statistics.index_io_time << '\t' << time_statistics.reads_io_time << '\t' - << time_statistics.prefilter_time << '\t' - << time_statistics.get_cart_min() << '\t' - << time_statistics.get_cart_avg() << '\t' - << time_statistics.get_cart_max() << '\t' - << time_statistics.cart_processing_times.size() << '\n'; + << time_statistics.prefilter_time << '\t'; + if (!time_statistics.cart_processing_times.empty()) + { + file_handle << time_statistics.get_cart_min() << '\t' + << time_statistics.get_cart_avg() << '\t' + << time_statistics.get_cart_max() << '\t' + << time_statistics.cart_processing_times.size() << '\n'; + + } } diff --git a/include/valik/shared.hpp b/include/valik/shared.hpp index 8656feac..1976a0f8 100644 --- a/include/valik/shared.hpp +++ b/include/valik/shared.hpp @@ -8,6 +8,8 @@ #include +#include + namespace valik { @@ -65,8 +67,39 @@ struct build_arguments std::filesystem::path ref_meta_path{}; }; -struct search_arguments +struct minimiser_threshold_arguments +{ + virtual ~minimiser_threshold_arguments() = 0; // make an abstract base struct + + double tau{0.9999}; + double threshold{std::numeric_limits::quiet_NaN()}; + double p_max{0.15}; + double fpr{0.05}; + uint8_t errors{0}; + size_t pattern_size{}; + bool treshold_was_set{false}; + bool cache_thresholds{false}; + + protected: + // prevent creating, assigning or moving base struct instances + minimiser_threshold_arguments() = default; + minimiser_threshold_arguments(minimiser_threshold_arguments const&) = default; + minimiser_threshold_arguments(minimiser_threshold_arguments&&) = default; + minimiser_threshold_arguments& operator=(minimiser_threshold_arguments const&) = default; + minimiser_threshold_arguments& operator=(minimiser_threshold_arguments&&) = default; +}; + +inline minimiser_threshold_arguments::~minimiser_threshold_arguments() = default; + +struct search_arguments final : public minimiser_threshold_arguments { + ~search_arguments() override = default; + search_arguments() = default; + search_arguments(search_arguments const&) = default; + search_arguments(search_arguments&&) = default; + search_arguments& operator=(search_arguments const&) = default; + search_arguments& operator=(search_arguments&&) = default; + uint32_t window_size{23u}; seqan3::shape shape{seqan3::ungapped{20u}}; uint8_t shape_size{shape.size()}; @@ -80,16 +113,6 @@ struct search_arguments std::filesystem::path index_file{}; std::filesystem::path out_file{"search.gff"}; - // Related to thresholding - double tau{0.9999}; - double threshold{std::numeric_limits::quiet_NaN()}; - double p_max{0.15}; - double fpr{0.05}; - uint8_t errors{0}; - uint64_t pattern_size{}; - bool treshold_was_set{false}; - bool cache_thresholds{false}; - bool compressed{false}; bool write_time{false}; @@ -115,6 +138,8 @@ struct search_arguments std::filesystem::path seg_path{}; std::filesystem::path ref_meta_path{}; + bool shared_memory{false}; + }; } // namespace valik diff --git a/include/valik/valik.hpp b/include/valik/valik.hpp index 78e2b569..2090755b 100644 --- a/include/valik/valik.hpp +++ b/include/valik/valik.hpp @@ -1,17 +1,17 @@ #pragma once -#include +#include #include namespace valik::app { -void try_parsing(seqan3::argument_parser & parser); -void init_top_level_parser(seqan3::argument_parser & parser); -void run_split(seqan3::argument_parser & parser); -void run_build(seqan3::argument_parser & parser); -void run_search(seqan3::argument_parser & parser); -void run_consolidation(seqan3::argument_parser & parser); +void try_parsing(sharg::parser & parser); +void init_top_level_parser(sharg::parser & parser); +void run_split(sharg::parser & parser); +void run_build(sharg::parser & parser); +void run_search(sharg::parser & parser); +void run_consolidation(sharg::parser & parser); } // namespace valik::app diff --git a/lib/raptor_data_simulation b/lib/raptor_data_simulation index 1c0b309b..768f2e84 160000 --- a/lib/raptor_data_simulation +++ b/lib/raptor_data_simulation @@ -1 +1 @@ -Subproject commit 1c0b309bc485bc995fe71e07af070ce39126fc6e +Subproject commit 768f2e84f41d6fb348544d68006b661a6abc2120 diff --git a/lib/seqan b/lib/seqan new file mode 160000 index 00000000..8ce355dd --- /dev/null +++ b/lib/seqan @@ -0,0 +1 @@ +Subproject commit 8ce355dd960bbf7a5fa0292b49f7342f7e456da6 diff --git a/lib/sharg b/lib/sharg new file mode 160000 index 00000000..a59e3682 --- /dev/null +++ b/lib/sharg @@ -0,0 +1 @@ +Subproject commit a59e3682e8976f44f693f9f736b3f7b2761b9248 diff --git a/lib/stellar3 b/lib/stellar3 new file mode 160000 index 00000000..35c7f5dc --- /dev/null +++ b/lib/stellar3 @@ -0,0 +1 @@ +Subproject commit 35c7f5dc821e9f1ed8970b64e9daf08d9568e4fe diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index bd7f9c4c..941d4835 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -6,8 +6,11 @@ cmake_minimum_required (VERSION 3.16) # Shared interface_ add_library ("${PROJECT_NAME}_interface" INTERFACE) target_link_libraries ("${PROJECT_NAME}_interface" INTERFACE seqan3::seqan3) +target_link_libraries ("${PROJECT_NAME}_interface" INTERFACE sharg::sharg) target_include_directories ("${PROJECT_NAME}_interface" INTERFACE ../include) target_include_directories ("${PROJECT_NAME}_interface" INTERFACE ../lib/raptor/include) +target_include_directories ("${PROJECT_NAME}_interface" SYSTEM INTERFACE ../lib/stellar3/include) +target_include_directories ("${PROJECT_NAME}_interface" INTERFACE ../lib/seqan/include) target_include_directories ("${PROJECT_NAME}_interface" INTERFACE ../lib/robin-hood-hashing/src/include) target_compile_options ("${PROJECT_NAME}_interface" INTERFACE "-pedantic" "-Wall" "-Wextra") @@ -29,6 +32,7 @@ add_library ("raptor_threshold" STATIC ../lib/raptor/src/threshold/precompute_threshold.cpp ) target_link_libraries ("raptor_threshold" PUBLIC "${PROJECT_NAME}_interface") + add_library ("${PROJECT_NAME}_search_lib" STATIC valik_search.cpp) target_link_libraries ("${PROJECT_NAME}_search_lib" PUBLIC "raptor_threshold") diff --git a/src/argument_parsing/build.cpp b/src/argument_parsing/build.cpp index 2bf26eb3..de75f374 100644 --- a/src/argument_parsing/build.cpp +++ b/src/argument_parsing/build.cpp @@ -6,67 +6,66 @@ namespace valik::app { -void init_build_parser(seqan3::argument_parser & parser, build_arguments & arguments) +void init_build_parser(sharg::parser & parser, build_arguments & arguments) { init_shared_meta(parser); - init_shared_options(parser, arguments); parser.add_positional_option(arguments.bin_file, - "File containing one file per line per bin when building from clustered sequences. " - "Input sequence file when building from overlapping segments.", - seqan3::input_file_validator{}); + sharg::config{.description = "File containing one file per line per bin when building from clustered sequences. " + "Input sequence file when building from overlapping segments.", + .validator = sharg::input_file_validator{}}); parser.add_option(arguments.window_size, - '\0', - "window", - "Choose the window size.", - seqan3::option_spec::standard, - positive_integer_validator{}); + sharg::config{.short_id = '\0', + .long_id = "window", + .description = "Choose the window size.", + .validator = positive_integer_validator{}}); parser.add_option(arguments.kmer_size, - '\0', - "kmer", - "Choose the kmer size.", - seqan3::option_spec::standard, - seqan3::arithmetic_range_validator{1, 32}); + sharg::config{.short_id = '\0', + .long_id = "kmer", + .description = "Choose the kmer size.", + .validator = sharg::arithmetic_range_validator{1, 32}}); parser.add_option(arguments.out_path, - '\0', - "output", - "Provide an output filepath.", - seqan3::option_spec::required); + sharg::config{.short_id = '\0', + .long_id = "output", + .description = "Provide an output filepath.", + .required = true, + .validator = sharg::output_file_validator{sharg::output_file_open_options::open_or_create, {}}}); parser.add_option(arguments.size, - '\0', - "size", - "Choose the size of the resulting IBF.", - seqan3::option_spec::required, - size_validator{"\\d+\\s{0,1}[k,m,g,t,K,M,G,T]"}); + sharg::config{.short_id = '\0', + .long_id = "size", + .description = "Choose the size of the resulting IBF.", + .required = true, + .validator = size_validator{"\\d+\\s{0,1}[k,m,g,t,K,M,G,T]"}}); parser.add_option(arguments.hash, - '\0', - "hash", - "Choose the number of hashes.", - seqan3::option_spec::standard, - seqan3::arithmetic_range_validator{1, 5}); + sharg::config{.short_id = '\0', + .long_id = "hash", + .description = "Choose the number of hashes.", + .validator = sharg::arithmetic_range_validator{1, 5}}); parser.add_flag(arguments.compressed, - '\0', - "compressed", - "Build a compressed IBF."); + sharg::config{.short_id = '\0', + .long_id = "compressed", + .description = "Build a compressed IBF."}); parser.add_flag(arguments.from_segments, - '\0', - "from-segments", - "Creates IBF from split reference database instead of reference clusters.", - seqan3::option_spec::standard); + sharg::config{.short_id = '\0', + .long_id = "from-segments", + .description = "Creates IBF from split reference database instead of reference clusters."}); parser.add_option(arguments.seg_path, - '\0', - "seg-meta", - "Path to segment metadata file created by split.", - seqan3::option_spec::standard, - seqan3::input_file_validator{}); + sharg::config{.short_id = '\0', + .long_id = "seg-meta", + .description = "Path to segment metadata file created by split.", + .validator = sharg::input_file_validator{}}); parser.add_option(arguments.ref_meta_path, - '\0', - "ref-meta", - "Path to reference metadata file created by split.", - seqan3::option_spec::standard, - seqan3::input_file_validator{}); + sharg::config{.short_id = '\0', + .long_id = "ref-meta", + .description = "Path to reference metadata file created by split.", + .validator = sharg::input_file_validator{}}); + parser.add_option(arguments.threads, + sharg::config{.short_id = '\0', + .long_id = "threads", + .description = "Choose the number of threads.", + .validator = positive_integer_validator{}}); } -void run_build(seqan3::argument_parser & parser) +void run_build(sharg::parser & parser) { build_arguments arguments{}; init_build_parser(parser, arguments); @@ -114,16 +113,16 @@ void run_build(seqan3::argument_parser & parser) if (parser.is_option_set("window")) { if (arguments.kmer_size > arguments.window_size) - throw seqan3::argument_parser_error{"The k-mer size cannot be bigger than the window size."}; + throw sharg::parser_error{"The k-mer size cannot be bigger than the window size."}; } else arguments.window_size = arguments.shape.size(); try { - seqan3::output_file_validator{seqan3::output_file_open_options::open_or_create}(arguments.out_path); + sharg::output_file_validator{sharg::output_file_open_options::open_or_create}(arguments.out_path); } - catch (seqan3::argument_parser_error const & ext) + catch (sharg::parser_error const & ext) { std::cerr << "[Error] " << ext.what() << '\n'; std::exit(-1); @@ -151,7 +150,7 @@ void run_build(seqan3::argument_parser & parser) multiplier = 8ull * 1024ull; break; default: - throw seqan3::argument_parser_error{"Use {k, m, g, t} to pass size. E.g., --size 8g."}; + throw sharg::parser_error{"Use {k, m, g, t} to pass size. E.g., --size 8g."}; } size_t size{}; diff --git a/src/argument_parsing/consolidate.cpp b/src/argument_parsing/consolidate.cpp index 55b74e16..b7d9e72a 100644 --- a/src/argument_parsing/consolidate.cpp +++ b/src/argument_parsing/consolidate.cpp @@ -4,30 +4,29 @@ namespace valik::app { -void init_consolidation_parser(seqan3::argument_parser & parser, consolidation_arguments & arguments) +void init_consolidation_parser(sharg::parser & parser, consolidation_arguments & arguments) { init_shared_meta(parser); parser.add_option(arguments.matches_in, - 'i', - "input", - "DREAM Stellar matches.", - seqan3::option_spec::required, - seqan3::input_file_validator{{"gff"}}); + sharg::config{.short_id = 'i', + .long_id = "input", + .description = "DREAM Stellar matches.", + .required = true, + .validator = sharg::input_file_validator{{"gff"}}}); parser.add_option(arguments.ref_meta_path, - '\0', - "ref-meta", - "Path to reference metadata file created by split.", - seqan3::option_spec::standard, - seqan3::input_file_validator{}); + sharg::config{.short_id = '\0', + .long_id = "ref-meta", + .description = "Path to reference metadata file created by split.", + .validator = sharg::input_file_validator{}}); parser.add_option(arguments.matches_out, - 'o', - "output", - "Consolidated output.", - seqan3::option_spec::required, - seqan3::output_file_validator{seqan3::output_file_open_options::open_or_create, {"gff"}}); + sharg::config{.short_id = 'o', + .long_id = "output", + .description = "Consolidated output.", + .required = true, + .validator = sharg::output_file_validator{sharg::output_file_open_options::open_or_create, {"gff"}}}); } -void run_consolidation(seqan3::argument_parser & parser) +void run_consolidation(sharg::parser & parser) { consolidation_arguments arguments{}; init_consolidation_parser(parser, arguments); diff --git a/src/argument_parsing/search.cpp b/src/argument_parsing/search.cpp index 0ca13a9d..5947a83f 100644 --- a/src/argument_parsing/search.cpp +++ b/src/argument_parsing/search.cpp @@ -8,102 +8,159 @@ namespace valik::app { -void init_search_parser(seqan3::argument_parser & parser, search_arguments & arguments) +void init_search_parser(sharg::parser & parser, search_arguments & arguments) { init_shared_meta(parser); - init_shared_options(parser, arguments); parser.add_option(arguments.index_file, - '\0', - "index", - "Provide a valid path to an IBF.", - seqan3::option_spec::required, - seqan3::input_file_validator{}); + sharg::config{.short_id = '\0', + .long_id = "index", + .description = "Provide a valid path to an IBF.", + .required = true, + .validator = sharg::input_file_validator{}}); parser.add_option(arguments.query_file, - '\0', - "query", - "Provide a path to the query file.", - seqan3::option_spec::required, - seqan3::input_file_validator{}); + sharg::config{.short_id = '\0', + .long_id = "query", + .description = "Provide a path to the query file.", + .required = true, + .validator = sharg::input_file_validator{}}); parser.add_option(arguments.out_file, - '\0', - "output", - "Please provide a valid path to the output.", - seqan3::option_spec::required, - seqan3::output_file_validator{seqan3::output_file_open_options::open_or_create, {"gff"}}); + sharg::config{.short_id = '\0', + .long_id = "output", + .description = "Please provide a valid path to the output.", + .required = true, + .validator = sharg::output_file_validator{sharg::output_file_open_options::open_or_create, {"gff"}}}); parser.add_option(arguments.errors, - '\0', - "error", - "Choose the number of errors.", - seqan3::option_spec::standard, - positive_integer_validator{true}); + sharg::config{.short_id = '\0', + .long_id = "error", + .description = "Choose the number of errors.", + .validator = positive_integer_validator{true}}); parser.add_option(arguments.tau, - '\0', - "tau", - "Used in the dynamic thresholding. The higher tau, the lower the threshold.", - seqan3::option_spec::standard, - seqan3::arithmetic_range_validator{0, 1}); + sharg::config{.short_id = '\0', + .long_id = "tau", + .description = "Used in the dynamic thresholding. The higher tau, the lower the threshold.", + .validator = sharg::arithmetic_range_validator{0, 1}}); parser.add_option(arguments.threshold, - '\0', - "threshold", - "If set, this threshold is used instead of the probabilistic models.", - seqan3::option_spec::standard, - seqan3::arithmetic_range_validator{0, 1}); + sharg::config{.short_id = '\0', + .long_id = "threshold", + .description = "If set, this threshold is used instead of the probabilistic models.", + .validator = sharg::arithmetic_range_validator{0, 1}}); parser.add_option(arguments.p_max, - '\0', - "p_max", - "Used in the dynamic thresholding. The higher p_max, the lower the threshold.", - seqan3::option_spec::standard, - seqan3::arithmetic_range_validator{0, 1}); + sharg::config{.short_id = '\0', + .long_id = "p_max", + .description = "Used in the dynamic thresholding. The higher p_max, the lower the threshold.", + .validator = sharg::arithmetic_range_validator{0, 1}}); parser.add_option(arguments.pattern_size, - '\0', - "pattern", - "Choose the pattern size. Default: half of first query sequence.", - seqan3::option_spec::standard); + sharg::config{.short_id = '\0', + .long_id = "pattern", + .description = "Choose the pattern size. Default: half of first query sequence."}); parser.add_option(arguments.overlap, - '\0', - "overlap", - "Choose how much sequential patterns overlap. Default: pattern size - 1.", - seqan3::option_spec::standard); + sharg::config{.short_id = '\0', + .long_id = "overlap", + .description = "Choose how much sequential patterns overlap. Default: pattern size - 1."}); parser.add_flag(arguments.compressed, - '\0', - "compressed", - "Build a compressed IBF."); + sharg::config{.short_id = '\0', + .long_id = "compressed", + .description = "Build a compressed IBF."}); parser.add_flag(arguments.cache_thresholds, - '\0', - "cache-thresholds", - "Stores the computed thresholds with an unique name next to the index. In the next search call " - "using this option, the stored thresholds are re-used.\n" - "Two files are stored:\n" - "\\fBthreshold_*.bin\\fP: Depends on pattern, window, kmer/shape, errors, and tau.\n" - "\\fBcorrection_*.bin\\fP: Depends on pattern, window, kmer/shape, p_max, and fpr."); + sharg::config{.short_id = '\0', + .long_id = "cache-thresholds", + .description = "Stores the computed thresholds with an unique name next to the index. In the next search call " + "using this option, the stored thresholds are re-used.\n" + "Two files are stored:\n" + "\\fBthreshold_*.bin\\fP: Depends on pattern, window, kmer/shape, errors, and tau.\n" + "\\fBcorrection_*.bin\\fP: Depends on pattern, window, kmer/shape, p_max, and fpr."}); parser.add_flag(arguments.write_time, - '\0', - "time", - "Write timing file.", - seqan3::option_spec::advanced); + sharg::config{.short_id = '\0', + .long_id = "time", + .description = "Write timing file.", + .advanced = true}); parser.add_option(arguments.cart_max_capacity, - '\0', - "cart_max_capacity", - "Number of elements to be stored in a single cart before it is send for processing."); + sharg::config{.short_id = '\0', + .long_id = "cart_max_capacity", + .description = "Number of elements to be stored in a single cart before it is send for processing."}); parser.add_option(arguments.max_queued_carts, - '\0', - "max_queued_carts", - "Maximal number of carts that are full and are waiting to be processed."); + sharg::config{.short_id = '\0', + .long_id = "max_queued_carts", + .description = "Maximal number of carts that are full and are waiting to be processed."}); parser.add_option(arguments.ref_meta_path, - '\0', - "ref-meta", - "Path to reference metadata file created by split.", - seqan3::option_spec::standard, - seqan3::input_file_validator{}); + sharg::config{.short_id = '\0', + .long_id = "ref-meta", + .description = "Path to reference metadata file created by split.", + .validator = sharg::input_file_validator{}}); parser.add_option(arguments.seg_path, - '\0', - "seg-meta", - "Path to segment metadata file created by split.", - seqan3::option_spec::standard, - seqan3::input_file_validator{}); + sharg::config{.short_id = '\0', + .long_id = "seg-meta", + .description = "Path to segment metadata file created by split.", + .validator = sharg::input_file_validator{}}); + parser.add_flag(arguments.shared_memory, + sharg::config{.short_id = '\0', + .long_id = "shared-memory", + .description = "Launch Stellar instances on a single machine with shared memory."}); + parser.add_option(arguments.threads, + sharg::config{.short_id = '\0', + .long_id = "threads", + .description = "Choose the number of threads.", + .validator = positive_integer_validator{}}); + + ///////////////////////////////////////// + // Stellar options + ///////////////////////////////////////// + /* + // Filtering options + parser.add_option(options.qGram, + sharg::config{.short_id = 'k', + .long_id = "kmer", + .description = "Length of the q-grams.", + .validator = sharg::arithmetic_range_validator{1, 32}}); + parser.add_option(options.maxRepeatPeriod, + sharg::config{.short_id = '\0', + .long_id = "repeatPeriod", + .description = "Maximal period of low complexity repeats to be filtered.", + .validator = sharg::arithmetic_range_validator{1, 32}}); + parser.add_option(options.minRepeatLength, + sharg::config{.short_id = '\0', + .long_id = "repeatLength", + .description = "Minimal length of low complexity repeats to be filtered.", + .validator = sharg::arithmetic_range_validator{1u, std::numeric_limits::max()}}); + parser.add_option(options.qgramAbundanceCut, + sharg::config{.short_id = 'c', + .long_id = "abundanceCut", + .description = "k-mer overabundance cut ratio.", + .validator = float_in_range_validator{0, 1}}); + + // Verification options + parser.add_option(options.xDrop, + sharg::config{.short_id = 'x', + .long_id = "xDrop", + .description = "Maximal x-drop for extension."}); + parser.add_option(options.strVerificationMethod, + sharg::config{.short_id = '\0', + .long_id = "verification", + .description = "Verification strategy: exact or bestLocal or bandedGlobal.", + .validator = sharg::value_list_validator{"exact", "bestLocal", "bandedGlobal", "bandedGlobalExtend"}}); + parser.add_option(options.disableThresh, + sharg::config{.short_id = '\0', + .long_id = "disableThresh", + .description = "Maximal number of verified matches before disabling verification for one query sequence (default infinity).", + .validator = sharg::arithmetic_range_validator{1, 10000}}); + parser.add_option(options.numMatches, + sharg::config{.short_id = 'n', + .long_id = "numMatches", + .description = "Maximal number of kept matches per query and database. If STELLAR finds more matches, only the longest ones are kept."}); + parser.add_option(options.compactThresh, + sharg::config{.short_id = 's', + .long_id = "sortThresh", + .description = "Number of matches triggering removal of duplicates. Choose a smaller value for saving space."}); + + parser.add_option(options.disabledQueriesFile, + sharg::config{.short_id = '\0', + .long_id = "disabledQueriesFile", + .description = "Name of output file for disabled query sequences.", + .validator = sharg::output_file_validator{sharg::output_file_open_options::open_or_create, {"fa", "fasta"}}}); + */ } -void run_search(seqan3::argument_parser & parser) +void run_search(sharg::parser & parser) { search_arguments arguments{}; @@ -115,7 +172,7 @@ void run_search(seqan3::argument_parser & parser) // Various checks. // ========================================== - seqan3::input_file_validator>{}(arguments.query_file); + sharg::input_file_validator{}(arguments.query_file); arguments.treshold_was_set = parser.is_option_set("threshold"); // ========================================== @@ -141,7 +198,7 @@ void run_search(seqan3::argument_parser & parser) if (parser.is_option_set("pattern")) { if (arguments.pattern_size < arguments.window_size) - throw seqan3::argument_parser_error{"The minimiser window cannot be bigger than the pattern."}; + throw sharg::validation_error{"The minimiser window cannot be bigger than the pattern."}; } else @@ -164,7 +221,7 @@ void run_search(seqan3::argument_parser & parser) if (parser.is_option_set("overlap")) { if (arguments.overlap >= arguments.pattern_size) - throw seqan3::argument_parser_error{"The overlap size has to be smaller than the pattern size."}; + throw sharg::validation_error{"The overlap size has to be smaller than the pattern size."}; } else arguments.overlap = arguments.pattern_size - 1; diff --git a/src/argument_parsing/shared.cpp b/src/argument_parsing/shared.cpp index e8230365..fc12b48b 100644 --- a/src/argument_parsing/shared.cpp +++ b/src/argument_parsing/shared.cpp @@ -3,7 +3,7 @@ namespace valik::app { -void init_shared_meta(seqan3::argument_parser & parser) +void init_shared_meta(sharg::parser & parser) { parser.info.app_name = "valik"; parser.info.author = "Evelin Aasna"; @@ -43,13 +43,13 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.)"; parser.info.version = "1.0"; } -void try_parsing(seqan3::argument_parser & parser) +void try_parsing(sharg::parser & parser) { try { parser.parse(); } - catch (seqan3::argument_parser_error const & ext) + catch (sharg::parser_error const & ext) { std::cerr << "[Error] " << ext.what() << '\n'; std::exit(-1); diff --git a/src/argument_parsing/split.cpp b/src/argument_parsing/split.cpp index 5e563074..11beb69e 100644 --- a/src/argument_parsing/split.cpp +++ b/src/argument_parsing/split.cpp @@ -6,43 +6,41 @@ namespace valik::app { -void init_split_parser(seqan3::argument_parser & parser, split_arguments & arguments) +void init_split_parser(sharg::parser & parser, split_arguments & arguments) { init_shared_meta(parser); parser.add_positional_option(arguments.ref_file, - "File containing reference sequences.", - seqan3::input_file_validator{}); + sharg::config{.description = "File containing reference sequences.", + .validator = sharg::input_file_validator{}}); parser.add_option(arguments.ref_out, - '\0', - "ref-meta", - "Please provide a valid path to the reference metadata output.", - seqan3::option_spec::required, - seqan3::output_file_validator{seqan3::output_file_open_options::open_or_create}); + sharg::config{.short_id = '\0', + .long_id = "ref-meta", + .description = "Please provide a valid path to the reference metadata output.", + .required = true, + .validator = sharg::output_file_validator{sharg::output_file_open_options::open_or_create}}); parser.add_option(arguments.seg_out, - '\0', - "seg-meta", - "Please provide a valid path to the segment metadata output.", - seqan3::option_spec::required, - seqan3::output_file_validator{seqan3::output_file_open_options::open_or_create}); + sharg::config{.short_id = '\0', + .long_id = "seg-meta", + .description = "Please provide a valid path to the segment metadata output.", + .required = true, + .validator = sharg::output_file_validator{sharg::output_file_open_options::open_or_create}}); parser.add_option(arguments.overlap, - '\0', - "overlap", - "Choose how much consecutive segments overlap.", - seqan3::option_spec::standard, - positive_integer_validator{true}); + sharg::config{.short_id = '\0', + .long_id = "overlap", + .description = "Choose how much consecutive segments overlap.", + .validator = positive_integer_validator{true}}); parser.add_option(arguments.bins, - '\0', - "bins", - "Number of bins in the IBF. Multiples of 64 lead to better performance.", - seqan3::option_spec::standard, - seqan3::arithmetic_range_validator{1, 29952}); + sharg::config{.short_id = '\0', + .long_id = "bins", + .description = "Number of bins in the IBF. Multiples of 64 lead to better performance.", + .validator = sharg::arithmetic_range_validator{1, 29952}}); parser.add_flag(arguments.write_seg, - '\0', - "write-out", - "Write segment sequences to disk."); + sharg::config{.short_id = '\0', + .long_id = "write-out", + .description = "Write segment sequences to disk."}); } -void run_split(seqan3::argument_parser & parser) +void run_split(sharg::parser & parser) { split_arguments arguments{}; init_split_parser(parser, arguments); diff --git a/src/argument_parsing/top_level.cpp b/src/argument_parsing/top_level.cpp index 7028ee67..8ec64d69 100644 --- a/src/argument_parsing/top_level.cpp +++ b/src/argument_parsing/top_level.cpp @@ -3,7 +3,7 @@ namespace valik::app { -void init_top_level_parser(seqan3::argument_parser & parser) +void init_top_level_parser(sharg::parser & parser) { init_shared_meta(parser); parser.info.description.emplace_back("Binning Directories are a datastruture that can be used in various ways. " diff --git a/src/valik_main.cpp b/src/valik_main.cpp index 1d5e047c..97b68d5f 100644 --- a/src/valik_main.cpp +++ b/src/valik_main.cpp @@ -13,12 +13,12 @@ int main(int argc, char ** argv) { try { - seqan3::argument_parser top_level_parser{"valik", argc, argv, seqan3::update_notifications::on, {"split", "build", "search", "consolidate"}}; + sharg::parser top_level_parser{"valik", argc, argv, sharg::update_notifications::off, {"split", "build", "search", "consolidate"}}; valik::app::init_top_level_parser(top_level_parser); valik::app::try_parsing(top_level_parser); - seqan3::argument_parser & sub_parser = top_level_parser.get_sub_parser(); + sharg::parser & sub_parser = top_level_parser.get_sub_parser(); if (sub_parser.info.app_name == std::string_view{"valik-split"}) valik::app::run_split(sub_parser); if (sub_parser.info.app_name == std::string_view{"valik-build"}) diff --git a/src/valik_search.cpp b/src/valik_search.cpp index f824a9b5..8261f8e4 100644 --- a/src/valik_search.cpp +++ b/src/valik_search.cpp @@ -1,7 +1,6 @@ #include -#include -#include +#include #include #include #include @@ -14,35 +13,17 @@ #include +#include +#include +#include +#include +#include +#include +#include namespace valik::app { -/* Creates a temporary folder in the temporary path of the OS - * - * \param name: a name with 'XXXXXX' at the end, e.g.: valik/call_XXXXXX - * \return returns the name with the 'XXXXXX' replaced and the directory created - * - * throws if any errors occures - */ -static std::filesystem::path create_temporary_path(std::filesystem::path name) { - if (!name.is_relative()) { - throw std::runtime_error("Must be given a relative file"); - } - auto path = std::filesystem::temp_directory_path() / name; - auto path_str = path.native(); - create_directories(path.parent_path()); - auto str = std::vector(path_str.size()+1, '\0'); // Must include an extra character to include a 0 - std::copy_n(path_str.data(), path_str.size(), str.data()); - auto ptr = mkdtemp(str.data()); - if (!ptr) { - throw std::runtime_error("Could not create temporary folder: " + path_str); - } - return str.data(); -} - - - //----------------------------- // // Setup IBF and launch multithreaded search. @@ -72,23 +53,7 @@ bool run_program(search_arguments const &arguments, search_time_statistics & tim raptor::threshold::threshold const thresholder{arguments.make_threshold_parameters()}; - // the location of bin-query fasta files can be overwritten with an environment variable - // the $VALIK_TMP directory has to exist and write permission must be granted - std::filesystem::path tmp_path; - if (auto ptr = std::getenv("VALIK_TMP"); ptr != nullptr) - tmp_path = std::string(ptr); - else - tmp_path = create_temporary_path("valik/stellar_call_XXXXXX"); - - std::string stellar_exec = "stellar"; - if (auto ptr = std::getenv("VALIK_STELLAR"); ptr != nullptr) - stellar_exec = std::string(ptr); - - std::string merge_exec = "cat"; - if (auto ptr = std::getenv("VALIK_MERGE"); ptr != nullptr) - merge_exec = std::string(ptr); - - + env_var_pack var_pack{}; sync_out synced_out{arguments.out_file}; auto queue = cart_queue{index.ibf().bin_count(), arguments.cart_max_capacity, arguments.max_queued_carts}; @@ -110,30 +75,73 @@ bool run_program(search_arguments const &arguments, search_time_statistics & tim std::vector output_files; std::stringstream text_out; std::vector timeStatistics; + std::vector stellarTimes; }; std::vector localData(arguments.threads); + using TAlphabet = seqan2::Dna; + using TSequence = seqan2::String; + + // negative (reverse complemented) database strand + bool const reverse = true /*threadOptions.reverse && threadOptions.alphabet != "protein" && threadOptions.alphabet != "char" */; + seqan2::StringSet databases; + seqan2::StringSet reverseDatabases; + seqan2::StringSet databaseIDs; + using TSize = decltype(length(databases[0])); + TSize refLen; + + if (arguments.shared_memory) + { + stellar::stellar_app_runtime stellarTime{}; + + for (auto bin_paths : index.bin_path()) + { + for (auto path : bin_paths) + { + bool const databasesSuccess = stellarTime.input_databases_time.measure_time([&]() + { + std::cout << "Launching stellar search on a shared memory machine...\n"; + return stellar::_importAllSequences(path.c_str(), "database", databases, databaseIDs, refLen, std::cout, std::cerr); + }); + if (!databasesSuccess) + return false; + } + } + + if (reverse) + { + for (auto database : databases) + { + reverseComplement(database); + seqan2::appendValue(reverseDatabases, database, seqan2::Generous()); + } + } + } + stellar::DatabaseIDMap databaseIDMap{databases, databaseIDs}; + stellar::DatabaseIDMap reverseDatabaseIDMap{reverseDatabases, databaseIDs}; + + auto consumerThreads = std::vector{}; for (size_t threadNbr = 0; threadNbr < arguments.threads; ++threadNbr) { consumerThreads.emplace_back( [&, threadNbr]() { auto& ld = localData[threadNbr]; - // this will block until producer threads have added carts to queue for (auto next = queue.dequeue(); next; next = queue.dequeue()) { auto & [bin_id, records] = *next; std::unique_lock g(mutex); - std::filesystem::path path = tmp_path / std::string("query_" + std::to_string(bin_id) + "_" + std::to_string(bin_count[bin_id]++) + ".fasta"); + std::filesystem::path cart_queries_path = var_pack.tmp_path / std::string("query_" + std::to_string(bin_id) + + "_" + std::to_string(bin_count[bin_id]++) + ".fasta"); g.unlock(); - ld.output_files.push_back(path.string() + ".gff"); + ld.output_files.push_back(cart_queries_path.string() + ".gff"); { - seqan3::sequence_file_output fout{path, fields{}}; + seqan3::sequence_file_output fout{cart_queries_path, fields{}}; for (auto & record : records) { @@ -142,57 +150,255 @@ bool run_program(search_arguments const &arguments, search_time_statistics & tim } } - std::vector process_args{}; - if (arguments.write_time) + if (arguments.shared_memory) { - std::filesystem::path time_path = path.string() + std::string(".gff.time"); - process_args.insert(process_args.end(), {"/usr/bin/time", "-o", std::string(time_path), "-f", "\"%e\t%M\t%x\t%C\""}); - } - process_args.insert(process_args.end(), {stellar_exec, "--version-check", "0"}); + stellar::StellarOptions threadOptions{}; + stellar::stellar_app_runtime stellarThreadTime{}; + using TDatabaseSegment = stellar::StellarDatabaseSegment; + + // import query sequences + seqan2::StringSet queries; + seqan2::StringSet queryIDs; + + using TSize = decltype(length(queries[0])); + TSize queryLen{0}; // does not get populated currently + //!TODO: split query sequence + bool const queriesSuccess = stellarThreadTime.input_queries_time.measure_time([&]() + { + return stellar::_importAllSequences(cart_queries_path.c_str(), "query", queries, queryIDs, queryLen, ld.text_out, ld.text_out); + }); + if (!queriesSuccess) + { + std::cerr << "Error importing queries\n"; + error_triggered = true; + } - if (segments && ref_meta) - { - // search segments of a single reference file - auto ref_len = ref_meta->total_len; - auto seg = segments->segment_from_bin(bin_id); - process_args.insert(process_args.end(), {index.bin_path()[0][0], std::string(path), - "--referenceLength", std::to_string(ref_len), - "--sequenceOfInterest", std::to_string(seg.ref_ind), - "--segmentBegin", std::to_string(seg.start), - "--segmentEnd", std::to_string(seg.start + seg.len)}); + threadOptions.alphabet = "dna"; // Possible values: dna, rna, protein, char + threadOptions.queryFile = cart_queries_path.string(); + threadOptions.prefilteredSearch = true; + threadOptions.referenceLength = refLen; + if (segments && ref_meta) + { + threadOptions.searchSegment = true; + auto seg = segments->segment_from_bin(bin_id); + threadOptions.binSequences.emplace_back(seg.ref_ind); + threadOptions.segmentBegin = seg.start; + threadOptions.segmentEnd = seg.start + seg.len; + } + else + { + if (index.bin_path().size() < (size_t) bin_id) { + throw std::runtime_error("Could not find reference file with index " + std::to_string(bin_id) + + ". Did you forget to provide metadata to search segments in a single reference file instead?"); + } + threadOptions.binSequences.push_back(bin_id); //!TODO: what if mutliple sequence files per bin + } + threadOptions.numEpsilon = er_rate; + threadOptions.epsilon = stellar::utils::fraction::from_double(threadOptions.numEpsilon).limit_denominator(); + threadOptions.minLength = arguments.pattern_size; + threadOptions.outputFile = cart_queries_path.string() + ".gff"; + stellar::_writeFileNames(threadOptions, ld.text_out); + stellar::_writeSpecifiedParams(threadOptions, ld.text_out); + stellar::_writeCalculatedParams(threadOptions, ld.text_out); // calculate qGram + ld.text_out << std::endl; + stellar::_writeMoreCalculatedParams(threadOptions, threadOptions.referenceLength, queries, ld.text_out); + + + auto current_time = stellarThreadTime.swift_index_construction_time.now(); + stellar::StellarIndex stellarIndex{queries, threadOptions}; + stellar::StellarSwiftPattern swiftPattern = stellarIndex.createSwiftPattern(); + + // Construct index of the queries + ld.text_out << "Constructing index..." << '\n'; + stellarIndex.construct(); + ld.text_out << std::endl; + stellarThreadTime.swift_index_construction_time.manual_timing(current_time); + + //!TODO: process disabled queries + std::vector disabledQueryIDs{}; + + stellar::StellarOutputStatistics outputStatistics{}; + if (threadOptions.forward) + { + auto databaseSegment = stellar::_getDREAMDatabaseSegment + (databases[threadOptions.binSequences[0]], threadOptions); + stellarThreadTime.forward_strand_stellar_time.measure_time([&]() + { + size_t const databaseRecordID = databaseIDMap.recordID(databaseSegment); + seqan2::CharString const & databaseID = databaseIDMap.databaseID(databaseRecordID); + // container for eps-matches + seqan2::StringSet const, + seqan2::CharString> > > forwardMatches; + seqan2::resize(forwardMatches, length(queries)); + + constexpr bool databaseStrand = true; + stellar::QueryIDMap queryIDMap{queries}; + + stellar::StellarComputeStatistics statistics = stellar::StellarLauncher::search_and_verify + ( + databaseSegment, + databaseID, + queryIDMap, + databaseStrand, + threadOptions, + swiftPattern, + stellarThreadTime.forward_strand_stellar_time.prefiltered_stellar_time, + forwardMatches + ); + + ld.text_out << std::endl; // swift filter output is on same line + stellar::_printDatabaseIdAndStellarKernelStatistics(threadOptions.verbose, databaseStrand, databaseID, statistics, ld.text_out); + + stellarThreadTime.forward_strand_stellar_time.post_process_eps_matches_time.measure_time([&]() + { + // forwardMatches is an in-out parameter + // this is the match consolidation + stellar::_postproccessQueryMatches(databaseStrand, threadOptions.referenceLength, threadOptions, + forwardMatches, disabledQueryIDs); + }); // measure_time + + // open output files + std::ofstream outputFile(threadOptions.outputFile.c_str(), ::std::ios_base::out); + if (!outputFile.is_open()) + { + std::cerr << "Could not open output file." << std::endl; + error_triggered = true; + } + stellarThreadTime.forward_strand_stellar_time.output_eps_matches_time.measure_time([&]() + { + // output forwardMatches on positive database strand + stellar::_writeAllQueryMatchesToFile(forwardMatches, queryIDs, databaseStrand, "gff", outputFile); + }); // measure_time + + outputStatistics = stellar::_computeOutputStatistics(forwardMatches); + }); // measure_time + } + + + if (reverse) + { + TDatabaseSegment databaseSegment{}; + stellarThreadTime.reverse_complement_database_time.measure_time([&]() + { + databaseSegment = _getDREAMDatabaseSegment + (reverseDatabases[threadOptions.binSequences[0]], threadOptions, reverse); + }); // measure_time + + stellarThreadTime.reverse_strand_stellar_time.measure_time([&]() + { + size_t const databaseRecordID = reverseDatabaseIDMap.recordID(databaseSegment); + seqan2::CharString const & databaseID = reverseDatabaseIDMap.databaseID(databaseRecordID); + // container for eps-matches + seqan2::StringSet const, + seqan2::CharString> > > reverseMatches; + seqan2::resize(reverseMatches, length(queries)); + + constexpr bool databaseStrand = false; + stellar::QueryIDMap queryIDMap{queries}; + + stellar::StellarComputeStatistics statistics = stellar::StellarLauncher::search_and_verify + ( + databaseSegment, + databaseID, + queryIDMap, + databaseStrand, + threadOptions, + swiftPattern, + stellarThreadTime.reverse_strand_stellar_time.prefiltered_stellar_time, + reverseMatches + ); + + ld.text_out << std::endl; // swift filter output is on same line + stellar::_printDatabaseIdAndStellarKernelStatistics(threadOptions.verbose, databaseStrand, databaseID, statistics, ld.text_out); + + stellarThreadTime.reverse_strand_stellar_time.post_process_eps_matches_time.measure_time([&]() + { + // reverseMatches is an in-out parameter + // this is the match consolidation + stellar::_postproccessQueryMatches(databaseStrand, threadOptions.referenceLength, threadOptions, + reverseMatches, disabledQueryIDs); + }); // measure_time + + // open output files + std::ofstream outputFile(threadOptions.outputFile.c_str(), ::std::ios_base::app); + if (!outputFile.is_open()) + { + std::cerr << "Could not open output file." << std::endl; + error_triggered = true; + } + stellarThreadTime.reverse_strand_stellar_time.output_eps_matches_time.measure_time([&]() + { + // output reverseMatches on negative database strand + stellar::_writeAllQueryMatchesToFile(reverseMatches, queryIDs, databaseStrand, "gff", outputFile); + }); // measure_time + + outputStatistics.mergeIn(stellar::_computeOutputStatistics(reverseMatches)); + }); // measure_time + } + + stellar::_writeOutputStatistics(outputStatistics, threadOptions.verbose, false /* disabledQueriesFile.is_open() */, ld.text_out); + + ld.timeStatistics.emplace_back(stellarThreadTime.milliseconds()); + if (arguments.write_time) + { + std::filesystem::path time_path = cart_queries_path.string() + std::string(".gff.time"); + + stellar::_print_stellar_app_time(stellarThreadTime, ld.text_out); + } } else { - // search a reference database of bin sequence files - if (index.bin_path().size() < bin_id) { - throw std::runtime_error("Could not find reference file with index " + std::to_string(bin_id) + ". Did you forget to provide metadata to search segments in a single reference file instead?"); + std::vector process_args{}; + process_args.insert(process_args.end(), {var_pack.stellar_exec, "--version-check", "0"}); + + if (segments && ref_meta) + { + // search segments of a single reference file + auto ref_len = ref_meta->total_len; + auto seg = segments->segment_from_bin(bin_id); + process_args.insert(process_args.end(), {index.bin_path()[0][0], std::string(cart_queries_path), + "--referenceLength", std::to_string(ref_len), + "--sequenceOfInterest", std::to_string(seg.ref_ind), + "--segmentBegin", std::to_string(seg.start), + "--segmentEnd", std::to_string(seg.start + seg.len)}); + } + else + { + // search a reference database of bin sequence files + if (index.bin_path().size() < (size_t) bin_id) { + throw std::runtime_error("Could not find reference file with index " + std::to_string(bin_id) + + ". Did you forget to provide metadata to search segments in a single reference file instead?"); + } + process_args.insert(process_args.end(), {index.bin_path()[bin_id][0], std::string(cart_queries_path)}); } - process_args.insert(process_args.end(), {index.bin_path()[bin_id][0], std::string(path)}); - } - process_args.insert(process_args.end(), {"-e", std::to_string(er_rate), - "-l", std::to_string(arguments.pattern_size), - "-o", std::string(path) + ".gff"}); + if (arguments.write_time) + process_args.insert(process_args.end(), "--time"); - auto start = std::chrono::high_resolution_clock::now(); - external_process process(process_args); - auto end = std::chrono::high_resolution_clock::now(); + process_args.insert(process_args.end(), {"-e", std::to_string(er_rate), + "-l", std::to_string(arguments.pattern_size), + "-o", std::string(cart_queries_path) + ".gff"}); - ld.timeStatistics.emplace_back(0.0 + std::chrono::duration_cast>(end - start).count()); + auto start = std::chrono::high_resolution_clock::now(); + external_process process(process_args); + auto end = std::chrono::high_resolution_clock::now(); - ld.text_out << process.cout(); - ld.text_out << process.cerr(); + ld.timeStatistics.emplace_back(0.0 + std::chrono::duration_cast>(end - start).count()); - if (process.status() != 0) { - std::unique_lock g(mutex); // make sure that our output is synchronized - std::cerr << "error running VALIK_STELLAR\n"; - std::cerr << "call:"; - for (auto args : process_args) { - std::cerr << " " << args; + ld.text_out << process.cout(); + ld.text_out << process.cerr(); + + if (process.status() != 0) { + std::unique_lock g(mutex); // make sure that our output is synchronized + std::cerr << "error running VALIK_STELLAR\n"; + std::cerr << "call:"; + for (auto args : process_args) { + std::cerr << " " << args; + } + std::cerr << '\n'; + std::cerr << process.cerr() << '\n'; + error_triggered = true; } - std::cerr << '\n'; - std::cerr << process.cerr() << '\n'; - error_triggered = true; } } }); @@ -228,9 +434,20 @@ bool run_program(search_arguments const &arguments, search_time_statistics & tim text_out << ld.text_out.str(); } - std::vector merge_process_args{merge_exec}; - for (auto & path : output_files) - merge_process_args.push_back(path); + std::vector merge_process_args; + if (output_files.size() > 0) + { + merge_process_args.push_back(var_pack.merge_exec); + for (auto & path : output_files) + merge_process_args.push_back(path); + } + else + { + //!WORKAROUND: merge hangs if no valik matches found + merge_process_args.push_back("echo"); + merge_process_args.push_back("-n"); + } + external_process merge(merge_process_args); auto check_external_process_success = [&](std::vector const & proc_args, external_process const & proc) @@ -249,18 +466,6 @@ bool run_program(search_arguments const &arguments, search_time_statistics & tim return error_triggered; }; - if (arguments.write_time) - { - std::vector merge_time_files{"cat"}; - for (auto & path : output_files) - merge_time_files.push_back(path + ".time"); - external_process merge_time(merge_time_files); - error_triggered = check_external_process_success(merge_time_files, merge_time); - - std::ofstream time_out(arguments.out_file.string() + std::string(".time")); - time_out << merge_time.cout(); - } - error_triggered = check_external_process_success(merge_process_args, merge); std::ofstream matches_out(arguments.out_file); @@ -279,7 +484,7 @@ void valik_search(search_arguments const & arguments) failed = run_program(arguments, time_statistics); if (arguments.write_time) - write_time_statistics(time_statistics, arguments); + write_time_statistics(time_statistics, arguments.out_file.string() + ".time"); if (failed) { throw std::runtime_error("valik_search failed. Run didn't complete correctly."); diff --git a/test/cli/CMakeLists.txt b/test/cli/CMakeLists.txt index b75795b5..1159508f 100644 --- a/test/cli/CMakeLists.txt +++ b/test/cli/CMakeLists.txt @@ -77,3 +77,23 @@ target_use_datasources (valik_test FILES 8bins50overlap_dream_all.gff) target_use_datasources (valik_test FILES 8bins50overlap_reference_metadata.tsv) target_use_datasources (valik_test FILES multi_seq_ref.fasta) target_use_datasources (valik_test FILES query_e0.06.fasta) + +add_cli_test (dream_test.cpp) +target_use_datasources (dream_test FILES 16bins13window1error.gff) +target_use_datasources (dream_test FILES 16bins13window1error.gff.out) +target_use_datasources (dream_test FILES 16bins13window.ibf) +target_use_datasources (dream_test FILES 16bins15window1error.gff) +target_use_datasources (dream_test FILES 16bins15window1error.gff.out) +target_use_datasources (dream_test FILES 16bins15window.ibf) +target_use_datasources (dream_test FILES 4bins13window1error.gff) +target_use_datasources (dream_test FILES 4bins13window1error.gff.out) +target_use_datasources (dream_test FILES 4bins13window.ibf) +target_use_datasources (dream_test FILES 4bins15window1error.gff) +target_use_datasources (dream_test FILES 4bins15window1error.gff.out) +target_use_datasources (dream_test FILES 4bins15window.ibf) +target_use_datasources (dream_test FILES dummy_reads.fastq) +target_use_datasources (dream_test FILES query.fastq) +target_use_datasources (dream_test FILES ref.fasta) +target_use_datasources (dream_test FILES ref_meta.txt) +target_use_datasources (dream_test FILES seg_meta150overlap16bins.txt) +target_use_datasources (dream_test FILES seg_meta150overlap4bins.txt) diff --git a/test/cli/cli_test.hpp b/test/cli/cli_test.hpp index 22eb06b8..70a7f4f5 100644 --- a/test/cli/cli_test.hpp +++ b/test/cli/cli_test.hpp @@ -193,6 +193,20 @@ struct valik_base : public cli_test return cli_test::data(name); } + static std::filesystem::path search_result_path(size_t const number_of_bins, size_t const window_size, + size_t const number_of_errors) noexcept + { + std::string name{}; + name += std::to_string(number_of_bins); + name += "bins"; + name += std::to_string(window_size); + name += "window"; + name += std::to_string(number_of_errors); + name += "error"; + name += ".gff"; + return cli_test::data(name); + } + static std::string string_from_file(std::filesystem::path const & path, std::ios_base::openmode const mode = std::ios_base::in) { std::ifstream file_stream(path, mode); @@ -518,17 +532,25 @@ struct valik_base : public cli_test return cli_test::data(name); } - static void compare_consolidation_out(std::vector const & expected, - std::vector const & actual) + static void compare_gff_out(std::vector const & expected, + std::vector const & actual) { EXPECT_EQ(expected.size(), actual.size()); + size_t not_actually_found{0}; for (auto & match : expected) { auto it = std::find(actual.begin(), actual.end(), match); - EXPECT_TRUE(it != actual.end()); + if (it == actual.end()) + { + not_actually_found++; + seqan3::debug_stream << match.to_string(); + } + // EXPECT_EQ(match.percid, (*it).percid); // EXPECT_EQ(match.attributes, (*it).attributes); } + + EXPECT_EQ(not_actually_found, 0); } }; @@ -539,4 +561,5 @@ struct valik_search_clusters : public valik_base, public testing::WithParamInter size_t, size_t>> {}; struct valik_search_segments : public valik_base, public testing::WithParamInterface> {}; +struct dream_search : public valik_base, public testing::WithParamInterface> {}; struct valik_consolidate : public valik_base, public testing::WithParamInterface> {}; diff --git a/test/cli/dream_test.cpp b/test/cli/dream_test.cpp new file mode 100644 index 00000000..efd7a0a0 --- /dev/null +++ b/test/cli/dream_test.cpp @@ -0,0 +1,100 @@ +#include +#include +#include // range comparisons +#include // strings +#include // vectors + +#include "cli_test.hpp" + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +///////////////////////////////////////////////// DREAM shared memory ////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +TEST_P(dream_search, shared_mem) +{ + auto const [number_of_bins, window_size, number_of_errors] = GetParam(); + + setup_tmp_dir(); + setenv("VALIK_MERGE", "cat", true); + + std::filesystem::path ref_meta_path = data("ref_meta.txt"); + valik::reference_metadata reference(ref_meta_path, false); + std::filesystem::path seg_meta_path = data("seg_meta150overlap" + std::to_string(number_of_bins) + "bins.txt"); + std::filesystem::path index_path = ibf_path(number_of_bins, window_size); + + cli_test_result const build = execute_app("valik", "build", + data("ref.fasta"), + "--window", std::to_string(window_size), + "--kmer 13", + "--size 32k", + "--from-segments", + "--ref-meta", ref_meta_path, + "--seg-meta", seg_meta_path, + "--output ", index_path); + + cli_test_result const result = execute_app("valik", "search", + "--shared-memory", + "--output search.gff", + "--pattern 50", + "--overlap 49", + "--error ", std::to_string(number_of_errors), + "--index ", index_path, + "--query ", data("query.fastq"), + "--ref-meta", ref_meta_path, + "--seg-meta", seg_meta_path); + EXPECT_EQ(result.exit_code, 0); + EXPECT_EQ(result.out, std::string{"Launching stellar search on a shared memory machine...\nLoaded 3 database sequences.\n"}); + EXPECT_EQ(result.err, std::string{}); + + auto distributed = valik::read_stellar_output(search_result_path(number_of_bins, window_size, number_of_errors), reference, std::ios::binary); + auto local = valik::read_stellar_output("search.gff", reference); + + compare_gff_out(distributed, local); +} + +INSTANTIATE_TEST_SUITE_P(shared_memory_dream_suite, + dream_search, + testing::Combine(testing::Values(4, 16), testing::Values(13, 15), testing::Values(1)), + [] (testing::TestParamInfo const & info) + { + std::string name = std::to_string(std::get<0>(info.param)) + "_bins_" + + std::to_string(std::get<1>(info.param)) + "_window_" + + std::to_string(std::get<2>(info.param)) + "_error"; + return name; + }); + +TEST_F(dream_search, no_matches) +{ + setup_tmp_dir(); + setenv("VALIK_MERGE", "cat", true); + + size_t number_of_bins = 4; + size_t window_size = 15; + cli_test_result const build = execute_app("valik", "build", + data("ref.fasta"), + "--window ", std::to_string(window_size), + "--kmer 13", + "--size 32k", + "--from-segments", + "--ref-meta", data("ref_meta.txt"), + "--seg-meta", data("seg_meta150overlap" + std::to_string(number_of_bins) + "bins.txt"), + "--output ", ibf_path(number_of_bins, window_size)); + + cli_test_result const result = execute_app("valik", "search", + "--shared-memory", + "--output search.gff", + "--pattern 50", + "--overlap 49", + "--error 1", + "--index ", ibf_path(number_of_bins, window_size), + "--query ", data("dummy_reads.fastq"), + "--ref-meta", data("ref_meta.txt"), + "--seg-meta", data("seg_meta150overlap" + std::to_string(number_of_bins) + "bins.txt")); + EXPECT_EQ(result.exit_code, 0); + EXPECT_EQ(result.out, std::string{"Launching stellar search on a shared memory machine...\nLoaded 3 database sequences.\n"}); + EXPECT_EQ(result.err, std::string{}); + + auto actual = string_list_from_file("search.gff"); + + EXPECT_EQ(0, actual.size()); +} diff --git a/test/cli/valik_options_test.cpp b/test/cli/valik_options_test.cpp index 5250979e..0a094517 100644 --- a/test/cli/valik_options_test.cpp +++ b/test/cli/valik_options_test.cpp @@ -76,8 +76,8 @@ TEST_F(argparse, no_subparser) cli_test_result const result = execute_app("valik", "foo"); std::string const expected { - "[Error] You either forgot or misspelled the subcommand! Please specify which sub-program you want to use: one " - "of [split,build,search,consolidate]. Use -h/--help for more information.\n" + "[Error] You misspelled the subcommand! Please specify which sub-program you want to use: one of [split, build, search, consolidate]. " + "Use -h/--help for more information.\n" }; EXPECT_NE(result.exit_code, 0); EXPECT_EQ(result.out, std::string{}); @@ -89,8 +89,8 @@ TEST_F(argparse, unknown_option) cli_test_result const result = execute_app("valik", "-v"); std::string const expected { - "[Error] You either forgot or misspelled the subcommand! Please specify which sub-program you want to use: one " - "of [split,build,search,consolidate]. Use -h/--help for more information.\n" + "[Error] You misspelled the subcommand! Please specify which sub-program you want to use: one of [split, build, search, consolidate]. " + "Use -h/--help for more information.\n" }; EXPECT_NE(result.exit_code, 0); EXPECT_EQ(result.out, std::string{}); @@ -169,7 +169,7 @@ TEST_F(argparse_build, output_wrong) tmp_bin_list_file.file_path); EXPECT_NE(result.exit_code, 0); EXPECT_EQ(result.out, std::string{}); - EXPECT_EQ(result.err, std::string{"[Error] Cannot write \"foo/out.ibf\"!\n"}); + EXPECT_EQ(result.err, std::string{"[Error] Validation failed for option --output: Cannot write \"foo/out.ibf\"!\n"}); } TEST_F(argparse_build, size_missing) diff --git a/test/cli/valik_test.cpp b/test/cli/valik_test.cpp index aa2ee2b1..689f370e 100644 --- a/test/cli/valik_test.cpp +++ b/test/cli/valik_test.cpp @@ -103,6 +103,7 @@ TEST_P(valik_build_segments, build_from_segments) std::string ref_meta_path = cli_test::data("reference_metadata.txt"); std::string seg_meta_path = cli_test::data(std::to_string(overlap) + "overlap" + std::to_string(number_of_bins) + "bins.txt"); + //!TODO: the paths in the index are not data(path.fasta) so the file can't be opened by stellar (only a testing issue) cli_test_result const result = execute_app("valik", "build", "--kmer 13", "--window ", std::to_string(window_size), @@ -253,7 +254,7 @@ TEST_P(valik_consolidate, consolidation) auto expected = valik::read_stellar_output(stellar_gold_path(segment_overlap), reference, std::ios::binary); auto actual = valik::read_stellar_output("consolidated.gff", reference); - compare_consolidation_out(expected, actual); + compare_gff_out(expected, actual); } INSTANTIATE_TEST_SUITE_P(consolidation_suite, diff --git a/test/data/consolidate/16bins50overlap_dream_all.gff.out b/test/data/consolidate/16bins50overlap_dream_all.gff.out deleted file mode 100644 index 5ad4a575..00000000 --- a/test/data/consolidate/16bins50overlap_dream_all.gff.out +++ /dev/null @@ -1,672 +0,0 @@ -I/O options: - database file : multi_seq_ref.fasta - query file : /tmp/valik/stellar_call_pcxc7t/query_15_0.fasta - alphabet : dna5 - output file : /tmp/valik/stellar_call_pcxc7t/query_15_0.fasta.gff - output format : gff - -User specified parameters: - minimal match length : 50 - maximal error rate (epsilon) : 0.06 = (3/50) - maximal x-drop : 5 - search forward strand : yes - search reverse complement : yes - - verification strategy : exact - maximal number of matches : 50 - duplicate removal every : 500 - threads : 1 - -Calculated parameters: - k-mer length : 12 - s^min : 12 - threshold : 3 - distance cut : 50 - delta : 16 - overlap : 3 - -Loaded 1 query sequence. -Loaded sequence 3. - -All matches resulting from your search have an E-value of: - 6.4259e-16 or smaller (match score = 1, error penalty = -2) - -Constructing index... - -Aligning all query sequences to database sequence... - 3 - 3, complement - -# Eps-matches : 1 - - -I/O options: - database file : multi_seq_ref.fasta - query file : /tmp/valik/stellar_call_pcxc7t/query_14_0.fasta - alphabet : dna5 - output file : /tmp/valik/stellar_call_pcxc7t/query_14_0.fasta.gff - output format : gff - -User specified parameters: - minimal match length : 50 - maximal error rate (epsilon) : 0.06 = (3/50) - maximal x-drop : 5 - search forward strand : yes - search reverse complement : yes - - verification strategy : exact - maximal number of matches : 50 - duplicate removal every : 500 - threads : 1 - -Calculated parameters: - k-mer length : 12 - s^min : 12 - threshold : 3 - distance cut : 50 - delta : 16 - overlap : 3 - -Loaded 1 query sequence. -Loaded sequence 3. - -All matches resulting from your search have an E-value of: - 6.4259e-16 or smaller (match score = 1, error penalty = -2) - -Constructing index... - -Aligning all query sequences to database sequence... - 3 - 3, complement - -# Eps-matches : 1 - - -I/O options: - database file : multi_seq_ref.fasta - query file : /tmp/valik/stellar_call_pcxc7t/query_13_0.fasta - alphabet : dna5 - output file : /tmp/valik/stellar_call_pcxc7t/query_13_0.fasta.gff - output format : gff - -User specified parameters: - minimal match length : 50 - maximal error rate (epsilon) : 0.06 = (3/50) - maximal x-drop : 5 - search forward strand : yes - search reverse complement : yes - - verification strategy : exact - maximal number of matches : 50 - duplicate removal every : 500 - threads : 1 - -Calculated parameters: - k-mer length : 12 - s^min : 12 - threshold : 3 - distance cut : 50 - delta : 16 - overlap : 3 - -Loaded 1 query sequence. -Loaded sequence 3. - -All matches resulting from your search have an E-value of: - 6.4259e-16 or smaller (match score = 1, error penalty = -2) - -Constructing index... - -Aligning all query sequences to database sequence... - 3 - 3, complement - -# Eps-matches : 1 - - -I/O options: - database file : multi_seq_ref.fasta - query file : /tmp/valik/stellar_call_pcxc7t/query_12_0.fasta - alphabet : dna5 - output file : /tmp/valik/stellar_call_pcxc7t/query_12_0.fasta.gff - output format : gff - -User specified parameters: - minimal match length : 50 - maximal error rate (epsilon) : 0.06 = (3/50) - maximal x-drop : 5 - search forward strand : yes - search reverse complement : yes - - verification strategy : exact - maximal number of matches : 50 - duplicate removal every : 500 - threads : 1 - -Calculated parameters: - k-mer length : 12 - s^min : 12 - threshold : 3 - distance cut : 50 - delta : 16 - overlap : 3 - -Loaded 1 query sequence. -Loaded sequence 2. - -All matches resulting from your search have an E-value of: - 6.4259e-16 or smaller (match score = 1, error penalty = -2) - -Constructing index... - -Aligning all query sequences to database sequence... - 2 - 2, complement - -# Eps-matches : 0 - - -I/O options: - database file : multi_seq_ref.fasta - query file : /tmp/valik/stellar_call_pcxc7t/query_11_0.fasta - alphabet : dna5 - output file : /tmp/valik/stellar_call_pcxc7t/query_11_0.fasta.gff - output format : gff - -User specified parameters: - minimal match length : 50 - maximal error rate (epsilon) : 0.06 = (3/50) - maximal x-drop : 5 - search forward strand : yes - search reverse complement : yes - - verification strategy : exact - maximal number of matches : 50 - duplicate removal every : 500 - threads : 1 - -Calculated parameters: - k-mer length : 12 - s^min : 12 - threshold : 3 - distance cut : 50 - delta : 16 - overlap : 3 - -Loaded 1 query sequence. -Loaded sequence 2. - -All matches resulting from your search have an E-value of: - 6.4259e-16 or smaller (match score = 1, error penalty = -2) - -Constructing index... - -Aligning all query sequences to database sequence... - 2 - 2, complement - -# Eps-matches : 1 - - -I/O options: - database file : multi_seq_ref.fasta - query file : /tmp/valik/stellar_call_pcxc7t/query_10_0.fasta - alphabet : dna5 - output file : /tmp/valik/stellar_call_pcxc7t/query_10_0.fasta.gff - output format : gff - -User specified parameters: - minimal match length : 50 - maximal error rate (epsilon) : 0.06 = (3/50) - maximal x-drop : 5 - search forward strand : yes - search reverse complement : yes - - verification strategy : exact - maximal number of matches : 50 - duplicate removal every : 500 - threads : 1 - -Calculated parameters: - k-mer length : 12 - s^min : 12 - threshold : 3 - distance cut : 50 - delta : 16 - overlap : 3 - -Loaded 1 query sequence. -Loaded sequence 2. - -All matches resulting from your search have an E-value of: - 6.4259e-16 or smaller (match score = 1, error penalty = -2) - -Constructing index... - -Aligning all query sequences to database sequence... - 2 - 2, complement - -# Eps-matches : 1 - - -I/O options: - database file : multi_seq_ref.fasta - query file : /tmp/valik/stellar_call_pcxc7t/query_9_0.fasta - alphabet : dna5 - output file : /tmp/valik/stellar_call_pcxc7t/query_9_0.fasta.gff - output format : gff - -User specified parameters: - minimal match length : 50 - maximal error rate (epsilon) : 0.06 = (3/50) - maximal x-drop : 5 - search forward strand : yes - search reverse complement : yes - - verification strategy : exact - maximal number of matches : 50 - duplicate removal every : 500 - threads : 1 - -Calculated parameters: - k-mer length : 12 - s^min : 12 - threshold : 3 - distance cut : 50 - delta : 16 - overlap : 3 - -Loaded 1 query sequence. -Loaded sequence 2. - -All matches resulting from your search have an E-value of: - 6.4259e-16 or smaller (match score = 1, error penalty = -2) - -Constructing index... - -Aligning all query sequences to database sequence... - 2 - 2, complement - -# Eps-matches : 3 - - -I/O options: - database file : multi_seq_ref.fasta - query file : /tmp/valik/stellar_call_pcxc7t/query_8_0.fasta - alphabet : dna5 - output file : /tmp/valik/stellar_call_pcxc7t/query_8_0.fasta.gff - output format : gff - -User specified parameters: - minimal match length : 50 - maximal error rate (epsilon) : 0.06 = (3/50) - maximal x-drop : 5 - search forward strand : yes - search reverse complement : yes - - verification strategy : exact - maximal number of matches : 50 - duplicate removal every : 500 - threads : 1 - -Calculated parameters: - k-mer length : 12 - s^min : 12 - threshold : 3 - distance cut : 50 - delta : 16 - overlap : 3 - -Loaded 1 query sequence. -Loaded sequence 2. - -All matches resulting from your search have an E-value of: - 6.4259e-16 or smaller (match score = 1, error penalty = -2) - -Constructing index... - -Aligning all query sequences to database sequence... - 2 - 2, complement - -# Eps-matches : 4 - - -I/O options: - database file : multi_seq_ref.fasta - query file : /tmp/valik/stellar_call_pcxc7t/query_7_0.fasta - alphabet : dna5 - output file : /tmp/valik/stellar_call_pcxc7t/query_7_0.fasta.gff - output format : gff - -User specified parameters: - minimal match length : 50 - maximal error rate (epsilon) : 0.06 = (3/50) - maximal x-drop : 5 - search forward strand : yes - search reverse complement : yes - - verification strategy : exact - maximal number of matches : 50 - duplicate removal every : 500 - threads : 1 - -Calculated parameters: - k-mer length : 12 - s^min : 12 - threshold : 3 - distance cut : 50 - delta : 16 - overlap : 3 - -Loaded 1 query sequence. -Loaded sequence 2. - -All matches resulting from your search have an E-value of: - 6.4259e-16 or smaller (match score = 1, error penalty = -2) - -Constructing index... - -Aligning all query sequences to database sequence... - 2 - 2, complement - -# Eps-matches : 2 - - -I/O options: - database file : multi_seq_ref.fasta - query file : /tmp/valik/stellar_call_pcxc7t/query_6_0.fasta - alphabet : dna5 - output file : /tmp/valik/stellar_call_pcxc7t/query_6_0.fasta.gff - output format : gff - -User specified parameters: - minimal match length : 50 - maximal error rate (epsilon) : 0.06 = (3/50) - maximal x-drop : 5 - search forward strand : yes - search reverse complement : yes - - verification strategy : exact - maximal number of matches : 50 - duplicate removal every : 500 - threads : 1 - -Calculated parameters: - k-mer length : 12 - s^min : 12 - threshold : 3 - distance cut : 50 - delta : 16 - overlap : 3 - -Loaded 1 query sequence. -Loaded sequence 1. - -All matches resulting from your search have an E-value of: - 6.4259e-16 or smaller (match score = 1, error penalty = -2) - -Constructing index... - -Aligning all query sequences to database sequence... - 1 - 1, complement - -# Eps-matches : 1 - - -I/O options: - database file : multi_seq_ref.fasta - query file : /tmp/valik/stellar_call_pcxc7t/query_5_0.fasta - alphabet : dna5 - output file : /tmp/valik/stellar_call_pcxc7t/query_5_0.fasta.gff - output format : gff - -User specified parameters: - minimal match length : 50 - maximal error rate (epsilon) : 0.06 = (3/50) - maximal x-drop : 5 - search forward strand : yes - search reverse complement : yes - - verification strategy : exact - maximal number of matches : 50 - duplicate removal every : 500 - threads : 1 - -Calculated parameters: - k-mer length : 12 - s^min : 12 - threshold : 3 - distance cut : 50 - delta : 16 - overlap : 3 - -Loaded 1 query sequence. -Loaded sequence 1. - -All matches resulting from your search have an E-value of: - 6.4259e-16 or smaller (match score = 1, error penalty = -2) - -Constructing index... - -Aligning all query sequences to database sequence... - 1 - 1, complement - -# Eps-matches : 2 - - -I/O options: - database file : multi_seq_ref.fasta - query file : /tmp/valik/stellar_call_pcxc7t/query_4_0.fasta - alphabet : dna5 - output file : /tmp/valik/stellar_call_pcxc7t/query_4_0.fasta.gff - output format : gff - -User specified parameters: - minimal match length : 50 - maximal error rate (epsilon) : 0.06 = (3/50) - maximal x-drop : 5 - search forward strand : yes - search reverse complement : yes - - verification strategy : exact - maximal number of matches : 50 - duplicate removal every : 500 - threads : 1 - -Calculated parameters: - k-mer length : 12 - s^min : 12 - threshold : 3 - distance cut : 50 - delta : 16 - overlap : 3 - -Loaded 1 query sequence. -Loaded sequence 1. - -All matches resulting from your search have an E-value of: - 6.4259e-16 or smaller (match score = 1, error penalty = -2) - -Constructing index... - -Aligning all query sequences to database sequence... - 1 - 1, complement - -# Eps-matches : 2 - - -I/O options: - database file : multi_seq_ref.fasta - query file : /tmp/valik/stellar_call_pcxc7t/query_3_0.fasta - alphabet : dna5 - output file : /tmp/valik/stellar_call_pcxc7t/query_3_0.fasta.gff - output format : gff - -User specified parameters: - minimal match length : 50 - maximal error rate (epsilon) : 0.06 = (3/50) - maximal x-drop : 5 - search forward strand : yes - search reverse complement : yes - - verification strategy : exact - maximal number of matches : 50 - duplicate removal every : 500 - threads : 1 - -Calculated parameters: - k-mer length : 12 - s^min : 12 - threshold : 3 - distance cut : 50 - delta : 16 - overlap : 3 - -Loaded 1 query sequence. -Loaded sequence 1. - -All matches resulting from your search have an E-value of: - 6.4259e-16 or smaller (match score = 1, error penalty = -2) - -Constructing index... - -Aligning all query sequences to database sequence... - 1 - 1, complement - -# Eps-matches : 3 - - -I/O options: - database file : multi_seq_ref.fasta - query file : /tmp/valik/stellar_call_pcxc7t/query_2_0.fasta - alphabet : dna5 - output file : /tmp/valik/stellar_call_pcxc7t/query_2_0.fasta.gff - output format : gff - -User specified parameters: - minimal match length : 50 - maximal error rate (epsilon) : 0.06 = (3/50) - maximal x-drop : 5 - search forward strand : yes - search reverse complement : yes - - verification strategy : exact - maximal number of matches : 50 - duplicate removal every : 500 - threads : 1 - -Calculated parameters: - k-mer length : 12 - s^min : 12 - threshold : 3 - distance cut : 50 - delta : 16 - overlap : 3 - -Loaded 1 query sequence. -Loaded sequence 1. - -All matches resulting from your search have an E-value of: - 6.4259e-16 or smaller (match score = 1, error penalty = -2) - -Constructing index... - -Aligning all query sequences to database sequence... - 1 - 1, complement - -# Eps-matches : 3 - - -I/O options: - database file : multi_seq_ref.fasta - query file : /tmp/valik/stellar_call_pcxc7t/query_1_0.fasta - alphabet : dna5 - output file : /tmp/valik/stellar_call_pcxc7t/query_1_0.fasta.gff - output format : gff - -User specified parameters: - minimal match length : 50 - maximal error rate (epsilon) : 0.06 = (3/50) - maximal x-drop : 5 - search forward strand : yes - search reverse complement : yes - - verification strategy : exact - maximal number of matches : 50 - duplicate removal every : 500 - threads : 1 - -Calculated parameters: - k-mer length : 12 - s^min : 12 - threshold : 3 - distance cut : 50 - delta : 16 - overlap : 3 - -Loaded 1 query sequence. -Loaded sequence 1. - -All matches resulting from your search have an E-value of: - 6.4259e-16 or smaller (match score = 1, error penalty = -2) - -Constructing index... - -Aligning all query sequences to database sequence... - 1 - 1, complement - -# Eps-matches : 4 - - -I/O options: - database file : multi_seq_ref.fasta - query file : /tmp/valik/stellar_call_pcxc7t/query_0_0.fasta - alphabet : dna5 - output file : /tmp/valik/stellar_call_pcxc7t/query_0_0.fasta.gff - output format : gff - -User specified parameters: - minimal match length : 50 - maximal error rate (epsilon) : 0.06 = (3/50) - maximal x-drop : 5 - search forward strand : yes - search reverse complement : yes - - verification strategy : exact - maximal number of matches : 50 - duplicate removal every : 500 - threads : 1 - -Calculated parameters: - k-mer length : 12 - s^min : 12 - threshold : 3 - distance cut : 50 - delta : 16 - overlap : 3 - -Loaded 1 query sequence. -Loaded sequence 1. - -All matches resulting from your search have an E-value of: - 6.4259e-16 or smaller (match score = 1, error penalty = -2) - -Constructing index... - -Aligning all query sequences to database sequence... - 1 - 1, complement - -# Eps-matches : 2 - - diff --git a/test/data/consolidate/8bins50overlap_dream_all.gff.out b/test/data/consolidate/8bins50overlap_dream_all.gff.out deleted file mode 100644 index aa899a4b..00000000 --- a/test/data/consolidate/8bins50overlap_dream_all.gff.out +++ /dev/null @@ -1,336 +0,0 @@ -I/O options: - database file : multi_seq_ref.fasta - query file : /tmp/valik/stellar_call_iIVcaz/query_7_0.fasta - alphabet : dna5 - output file : /tmp/valik/stellar_call_iIVcaz/query_7_0.fasta.gff - output format : gff - -User specified parameters: - minimal match length : 50 - maximal error rate (epsilon) : 0.06 = (3/50) - maximal x-drop : 5 - search forward strand : yes - search reverse complement : yes - - verification strategy : exact - maximal number of matches : 50 - duplicate removal every : 500 - threads : 1 - -Calculated parameters: - k-mer length : 12 - s^min : 12 - threshold : 3 - distance cut : 50 - delta : 16 - overlap : 3 - -Loaded 1 query sequence. -Loaded sequence 3. - -All matches resulting from your search have an E-value of: - 6.4259e-16 or smaller (match score = 1, error penalty = -2) - -Constructing index... - -Aligning all query sequences to database sequence... - 3 - 3, complement - -# Eps-matches : 1 - - -I/O options: - database file : multi_seq_ref.fasta - query file : /tmp/valik/stellar_call_iIVcaz/query_6_0.fasta - alphabet : dna5 - output file : /tmp/valik/stellar_call_iIVcaz/query_6_0.fasta.gff - output format : gff - -User specified parameters: - minimal match length : 50 - maximal error rate (epsilon) : 0.06 = (3/50) - maximal x-drop : 5 - search forward strand : yes - search reverse complement : yes - - verification strategy : exact - maximal number of matches : 50 - duplicate removal every : 500 - threads : 1 - -Calculated parameters: - k-mer length : 12 - s^min : 12 - threshold : 3 - distance cut : 50 - delta : 16 - overlap : 3 - -Loaded 1 query sequence. -Loaded sequence 2. - -All matches resulting from your search have an E-value of: - 6.4259e-16 or smaller (match score = 1, error penalty = -2) - -Constructing index... - -Aligning all query sequences to database sequence... - 2 - 2, complement - -# Eps-matches : 1 - - -I/O options: - database file : multi_seq_ref.fasta - query file : /tmp/valik/stellar_call_iIVcaz/query_5_0.fasta - alphabet : dna5 - output file : /tmp/valik/stellar_call_iIVcaz/query_5_0.fasta.gff - output format : gff - -User specified parameters: - minimal match length : 50 - maximal error rate (epsilon) : 0.06 = (3/50) - maximal x-drop : 5 - search forward strand : yes - search reverse complement : yes - - verification strategy : exact - maximal number of matches : 50 - duplicate removal every : 500 - threads : 1 - -Calculated parameters: - k-mer length : 12 - s^min : 12 - threshold : 3 - distance cut : 50 - delta : 16 - overlap : 3 - -Loaded 1 query sequence. -Loaded sequence 2. - -All matches resulting from your search have an E-value of: - 6.4259e-16 or smaller (match score = 1, error penalty = -2) - -Constructing index... - -Aligning all query sequences to database sequence... - 2 - 2, complement - -# Eps-matches : 3 - - -I/O options: - database file : multi_seq_ref.fasta - query file : /tmp/valik/stellar_call_iIVcaz/query_4_0.fasta - alphabet : dna5 - output file : /tmp/valik/stellar_call_iIVcaz/query_4_0.fasta.gff - output format : gff - -User specified parameters: - minimal match length : 50 - maximal error rate (epsilon) : 0.06 = (3/50) - maximal x-drop : 5 - search forward strand : yes - search reverse complement : yes - - verification strategy : exact - maximal number of matches : 50 - duplicate removal every : 500 - threads : 1 - -Calculated parameters: - k-mer length : 12 - s^min : 12 - threshold : 3 - distance cut : 50 - delta : 16 - overlap : 3 - -Loaded 1 query sequence. -Loaded sequence 2. - -All matches resulting from your search have an E-value of: - 6.4259e-16 or smaller (match score = 1, error penalty = -2) - -Constructing index... - -Aligning all query sequences to database sequence... - 2 - 2, complement - -# Eps-matches : 4 - - -I/O options: - database file : multi_seq_ref.fasta - query file : /tmp/valik/stellar_call_iIVcaz/query_3_0.fasta - alphabet : dna5 - output file : /tmp/valik/stellar_call_iIVcaz/query_3_0.fasta.gff - output format : gff - -User specified parameters: - minimal match length : 50 - maximal error rate (epsilon) : 0.06 = (3/50) - maximal x-drop : 5 - search forward strand : yes - search reverse complement : yes - - verification strategy : exact - maximal number of matches : 50 - duplicate removal every : 500 - threads : 1 - -Calculated parameters: - k-mer length : 12 - s^min : 12 - threshold : 3 - distance cut : 50 - delta : 16 - overlap : 3 - -Loaded 1 query sequence. -Loaded sequence 1. - -All matches resulting from your search have an E-value of: - 6.4259e-16 or smaller (match score = 1, error penalty = -2) - -Constructing index... - -Aligning all query sequences to database sequence... - 1 - 1, complement - -# Eps-matches : 1 - - -I/O options: - database file : multi_seq_ref.fasta - query file : /tmp/valik/stellar_call_iIVcaz/query_2_0.fasta - alphabet : dna5 - output file : /tmp/valik/stellar_call_iIVcaz/query_2_0.fasta.gff - output format : gff - -User specified parameters: - minimal match length : 50 - maximal error rate (epsilon) : 0.06 = (3/50) - maximal x-drop : 5 - search forward strand : yes - search reverse complement : yes - - verification strategy : exact - maximal number of matches : 50 - duplicate removal every : 500 - threads : 1 - -Calculated parameters: - k-mer length : 12 - s^min : 12 - threshold : 3 - distance cut : 50 - delta : 16 - overlap : 3 - -Loaded 1 query sequence. -Loaded sequence 1. - -All matches resulting from your search have an E-value of: - 6.4259e-16 or smaller (match score = 1, error penalty = -2) - -Constructing index... - -Aligning all query sequences to database sequence... - 1 - 1, complement - -# Eps-matches : 2 - - -I/O options: - database file : multi_seq_ref.fasta - query file : /tmp/valik/stellar_call_iIVcaz/query_1_0.fasta - alphabet : dna5 - output file : /tmp/valik/stellar_call_iIVcaz/query_1_0.fasta.gff - output format : gff - -User specified parameters: - minimal match length : 50 - maximal error rate (epsilon) : 0.06 = (3/50) - maximal x-drop : 5 - search forward strand : yes - search reverse complement : yes - - verification strategy : exact - maximal number of matches : 50 - duplicate removal every : 500 - threads : 1 - -Calculated parameters: - k-mer length : 12 - s^min : 12 - threshold : 3 - distance cut : 50 - delta : 16 - overlap : 3 - -Loaded 1 query sequence. -Loaded sequence 1. - -All matches resulting from your search have an E-value of: - 6.4259e-16 or smaller (match score = 1, error penalty = -2) - -Constructing index... - -Aligning all query sequences to database sequence... - 1 - 1, complement - -# Eps-matches : 4 - - -I/O options: - database file : multi_seq_ref.fasta - query file : /tmp/valik/stellar_call_iIVcaz/query_0_0.fasta - alphabet : dna5 - output file : /tmp/valik/stellar_call_iIVcaz/query_0_0.fasta.gff - output format : gff - -User specified parameters: - minimal match length : 50 - maximal error rate (epsilon) : 0.06 = (3/50) - maximal x-drop : 5 - search forward strand : yes - search reverse complement : yes - - verification strategy : exact - maximal number of matches : 50 - duplicate removal every : 500 - threads : 1 - -Calculated parameters: - k-mer length : 12 - s^min : 12 - threshold : 3 - distance cut : 50 - delta : 16 - overlap : 3 - -Loaded 1 query sequence. -Loaded sequence 1. - -All matches resulting from your search have an E-value of: - 6.4259e-16 or smaller (match score = 1, error penalty = -2) - -Constructing index... - -Aligning all query sequences to database sequence... - 1 - 1, complement - -# Eps-matches : 4 - - diff --git a/test/data/consolidate/cli_test_input.sh b/test/data/consolidate/cli_test_input.sh index a535f9e9..b07bd590 100755 --- a/test/data/consolidate/cli_test_input.sh +++ b/test/data/consolidate/cli_test_input.sh @@ -16,21 +16,22 @@ query_file=query_e${errRate}.fasta for minLen in 50 do - stellar -e $errRate -l $minLen -v --suppress-runtime-printing -o ${minLen}overlap_full.gff $ref_file $query_file > /dev/null + stellar -e $errRate -l $minLen -v -o ${minLen}overlap_full.gff $ref_file $query_file > /dev/null for bin in 8 16 do - valik split $ref_file --reference-output ${bin}bins${minLen}overlap_reference_metadata.tsv \ - --segment-output ${bin}bins${minLen}overlap_segment_metadata.tsv \ + valik split $ref_file --ref-meta ${bin}bins${minLen}overlap_reference_metadata.tsv \ + --seg-meta ${bin}bins${minLen}overlap_segment_metadata.tsv \ --bins $bin --overlap $minLen - valik build --from-segments $ref_file --seg-path ${bin}bins${minLen}overlap_segment_metadata.tsv \ + valik build --from-segments $ref_file --seg-meta ${bin}bins${minLen}overlap_segment_metadata.tsv \ --ref-meta ${bin}bins${minLen}overlap_reference_metadata.tsv \ --window 15 --kmer 13 --output ${bin}index.ibf --size 10k errors=$(echo "($errRate*$minLen+0.5)/1;" | bc) valik search --index ${bin}index.ibf --query $query_file --pattern $minLen --error $errors \ - --output ${bin}bins${minLen}overlap_dream_all.gff --seg-path ${bin}bins${minLen}overlap_segment_metadata.tsv + --output ${bin}bins${minLen}overlap_dream_all.gff --ref-meta ${bin}bins${minLen}overlap_reference_metadata.tsv \ + --seg-meta ${bin}bins${minLen}overlap_segment_metadata.tsv rm ${bin}index.ibf rm ${bin}bins${minLen}overlap_segment_metadata.tsv diff --git a/test/data/create_output.sh b/test/data/create_output.sh index bc8d8eed..ce89ba53 100755 --- a/test/data/create_output.sh +++ b/test/data/create_output.sh @@ -23,4 +23,10 @@ echo "### Running valik search ###" echo "### Running valik consolidate ###" ./consolidate/cli_test_output.sh +export VALIK_STELLAR=stellar +export VALIK_MERGE=cat + +echo "### Running distributed DREAM-Stellar ###" +./dream/cli_test_output.sh + echo "### Finished ###" diff --git a/test/data/datasources.cmake b/test/data/datasources.cmake index 45d215c5..f37d9ce9 100644 --- a/test/data/datasources.cmake +++ b/test/data/datasources.cmake @@ -192,9 +192,6 @@ declare_datasource (FILE query.fq declare_datasource (FILE 16bins50overlap_dream_all.gff URL ${CMAKE_SOURCE_DIR}/test/data/consolidate/16bins50overlap_dream_all.gff URL_HASH SHA256=e8f69893d25e86d51399b413a21d6885e4928c3bc1622244c789b30eb4016206) -declare_datasource (FILE 16bins50overlap_dream_all.gff.out - URL ${CMAKE_SOURCE_DIR}/test/data/consolidate/16bins50overlap_dream_all.gff.out - URL_HASH SHA256=6e469a784d3b6ce3e1ac3d60e7d504431fb4342e66d6b4e514f95e058496d5bb) declare_datasource (FILE 16bins50overlap_dream_consolidated.gff URL ${CMAKE_SOURCE_DIR}/test/data/consolidate/16bins50overlap_dream_consolidated.gff URL_HASH SHA256=545709097b8e55a685c132933c3363558c62d4ed1b71892f13de61f35d81c39b) @@ -207,9 +204,6 @@ declare_datasource (FILE 50overlap_full.gff declare_datasource (FILE 8bins50overlap_dream_all.gff URL ${CMAKE_SOURCE_DIR}/test/data/consolidate/8bins50overlap_dream_all.gff URL_HASH SHA256=8786f8b22517bc35b28e3a0406450e6d782a3c47d6ce2ebe4424b1f8a0382fe4) -declare_datasource (FILE 8bins50overlap_dream_all.gff.out - URL ${CMAKE_SOURCE_DIR}/test/data/consolidate/8bins50overlap_dream_all.gff.out - URL_HASH SHA256=c66dc056d5d490a611733cb1d7fae89d785f7fdcd5048e5bbd202eac4afec5e7) declare_datasource (FILE 8bins50overlap_dream_consolidated.gff URL ${CMAKE_SOURCE_DIR}/test/data/consolidate/8bins50overlap_dream_consolidated.gff URL_HASH SHA256=545709097b8e55a685c132933c3363558c62d4ed1b71892f13de61f35d81c39b) @@ -222,3 +216,59 @@ declare_datasource (FILE multi_seq_ref.fasta declare_datasource (FILE query_e0.06.fasta URL ${CMAKE_SOURCE_DIR}/test/data/consolidate/query_e0.06.fasta URL_HASH SHA256=b107238db9e0c0515b33fff570a787a86126789a0341bd2fdb52c6c607772f8d) + + +declare_datasource (FILE 16bins13window1error.gff + URL ${CMAKE_SOURCE_DIR}/test/data/dream/16bins13window1error.gff + URL_HASH SHA256=ae836328a087bfd08fbf03844798ef1f5dc8bfba52a188c08d35f1d755a99ac3) +declare_datasource (FILE 16bins13window1error.gff.out + URL ${CMAKE_SOURCE_DIR}/test/data/dream/16bins13window1error.gff.out + URL_HASH SHA256=beaacc34bd952d8c754f9ab2539bde5ca4683424ba131161c47e4bd0f5cc4f9b) +declare_datasource (FILE 16bins13window.ibf + URL ${CMAKE_SOURCE_DIR}/test/data/dream/16bins13window.ibf + URL_HASH SHA256=0875cd2d90d1320f93c575f3197e7879a5b8e51c5c11de76975561d30e4cf324) +declare_datasource (FILE 16bins15window1error.gff + URL ${CMAKE_SOURCE_DIR}/test/data/dream/16bins15window1error.gff + URL_HASH SHA256=3581ca9b126e98991372990e0599f956430332e09c17aa07e8b607639441fbc2) +declare_datasource (FILE 16bins15window1error.gff.out + URL ${CMAKE_SOURCE_DIR}/test/data/dream/16bins15window1error.gff.out + URL_HASH SHA256=27a5d4e972258d8c286aa78d5fce46d559f7263b4d5b2b32ee3ba65ff342e7bf) +declare_datasource (FILE 16bins15window.ibf + URL ${CMAKE_SOURCE_DIR}/test/data/dream/16bins15window.ibf + URL_HASH SHA256=7f1ce2bbdf8d657da29d39879ab23c68cb19dbe0b58c69a9c5a576f6528ad24c) +declare_datasource (FILE 4bins13window1error.gff + URL ${CMAKE_SOURCE_DIR}/test/data/dream/4bins13window1error.gff + URL_HASH SHA256=00466ddefd51171c59d7e98fab1c1735758ae741f55f2731223b683ba6d667ab) +declare_datasource (FILE 4bins13window1error.gff.out + URL ${CMAKE_SOURCE_DIR}/test/data/dream/4bins13window1error.gff.out + URL_HASH SHA256=7a9eb5a96dc362ca16b28475fda5000b2d60f065209f9653e42473faa58041b9) +declare_datasource (FILE 4bins13window.ibf + URL ${CMAKE_SOURCE_DIR}/test/data/dream/4bins13window.ibf + URL_HASH SHA256=b08ec3c196dc45faf73c24b86113e2c89adaf3d1844799d646e25dc0e77ac6bb) +declare_datasource (FILE 4bins15window1error.gff + URL ${CMAKE_SOURCE_DIR}/test/data/dream/4bins15window1error.gff + URL_HASH SHA256=00466ddefd51171c59d7e98fab1c1735758ae741f55f2731223b683ba6d667ab) +declare_datasource (FILE 4bins15window1error.gff.out + URL ${CMAKE_SOURCE_DIR}/test/data/dream/4bins15window1error.gff.out + URL_HASH SHA256=7a9eb5a96dc362ca16b28475fda5000b2d60f065209f9653e42473faa58041b9) +declare_datasource (FILE 4bins15window.ibf + URL ${CMAKE_SOURCE_DIR}/test/data/dream/4bins15window.ibf + URL_HASH SHA256=8b584d0e55043b3cc0835674dc83f5a7db6143645071f49973a1d085ac4fb919) +declare_datasource (FILE dummy_reads.fastq + URL ${CMAKE_SOURCE_DIR}/test/data/dream/dummy_reads.fastq + URL_HASH SHA256=f1aa9ca0fb0b87393923848f0389cc3fb5cfd4841566afaf72e6c55829b64d73) +declare_datasource (FILE query.fastq + URL ${CMAKE_SOURCE_DIR}/test/data/dream/query.fastq + URL_HASH SHA256=f6df8e312ed67e8044ae2c495259f3bf1eff7a7293b33a6b2d05203218f9dc0c) +declare_datasource (FILE ref.fasta + URL ${CMAKE_SOURCE_DIR}/test/data/dream/ref.fasta + URL_HASH SHA256=30ed460bfe4838a6ce3e97dff22b42c9312b0c801c8d671dbe82f6abca265f4e) +declare_datasource (FILE ref_meta.txt + URL ${CMAKE_SOURCE_DIR}/test/data/dream/ref_meta.txt + URL_HASH SHA256=cfaea330c4abde12e75cec5ae8b74ffd985d2b1d4ad1620b72e064f17488e1d5) +declare_datasource (FILE seg_meta150overlap16bins.txt + URL ${CMAKE_SOURCE_DIR}/test/data/dream/seg_meta150overlap16bins.txt + URL_HASH SHA256=b19d2082c26c72fa58af17d1e0b8220dbf13f71a1aa0aafc145c76bfbd366d1e) +declare_datasource (FILE seg_meta150overlap4bins.txt + URL ${CMAKE_SOURCE_DIR}/test/data/dream/seg_meta150overlap4bins.txt + URL_HASH SHA256=fb1ebef2e9d5a75272ec11f20d4b3a77b9b647bbdbabaf7c4d9f8cfbd2ff5f31) diff --git a/test/data/dream/16bins13window.ibf b/test/data/dream/16bins13window.ibf new file mode 100644 index 0000000000000000000000000000000000000000..057c65be7d0e85e5afa4d4a99f1a140a9056216c GIT binary patch literal 32891 zcmaLg52&Blc^~j!jS*3mg>WgCV1@cBQMb9R6IZ>x^j$o^#&!z51!yZ1zvC{r{8yz&CHN`s}AVxHI+RAOFOA zK5^tTj~;n@5JTWBh+j_K0Jq$zPX}hRTd!9S|7>Ab-coRLU%{ns6+C-q!S*{<>DA8_ zynRQ(mESMeeupZpy)~FAP&G_WA1TE2>BV-n$?AgGoAHkQbRpWjd#b+t@eqy$$Berx zt2fU)>s^0m(e2JUuH2S>8SlbU)%)Mg*9wCl?Mn|-Y3q)H^A8kUDsdy5&4v^M&HwUp z;dlOdVEEqo?y6sUsNnQe!Qo2e~e$nlV+&=fVv|SFI3Lg7|g5zC*S^w~kMLath zzgBp^J0DjH91EVkx8U;8-~)DMU9emKtxEg8TX6GSLH3F6Li95`QW$a_!!Nj-+&aDn z_B2-iH{NUHg7^YktMRjLcDeO08pT3^-pRkoG4tOZJArSVhbpZ`U*oe?--@5y_GH!B z2RVQZUt}}#p|?|il>VLkn;*w7mf{ci+xx-vyRYD(_|2WMC-S-{b^OofMZbK1L43}p zt{#2P#gU=fcmK1{jRp69A@u31-8ek^>z_jEt{a zNB`+HTTZ+hKNZZLg4PY5x~fOP=6{_W;1{W{g~7y^*;p;h=L;T>{m+J~``!7ml)}$0 z@-q&?n?QarOMmO^=V4bj8K$AvzFWoJzZKM2aA)`u*NsnZ(=QdB{ZTv_|2F+^DM)U^ zhpSG1Ytc_(xBu$*^xkUYkN6q?uJGh>Ao}9>R};5~#GN+7SS9PAKl_ik3r=5ZJ+BuY zOx&>UjqI;`A6cZ^$bQEz&1YQo)@!QL5dUxX9X-nzP97>ce)LNA+e`nd@ZzPo#V(&- zt@cAdQ_!#dmwsFS5UkQu_|Q*Wp>KN7zAN@)o%A_mzhN)y&s2}zPmLq~s2lH_e^!_# zCj{UTg@T78r`_MG`h$^|{b^(0B0W2$M|oGH8e8dl!FtG%|M4U80rg({>ncrm!xSW6 z>us%LmFU0Wt#>GleOvoH@kPH!yM2gV@&o>LGx@&%Lp8GfnttVzZSwQ1zXERggbaZ8np??S2ww5$=9qvER}7@szl^2Z|dKVcPeq)xa>&$7MIwKy755c(ueUjezPdX$jLcLKl#IfFNSWV;O(CX z#Z=JvcSMi$3R*Y0rT5JF$o^p*`yhaQeW3Y(VLzpp)%%T5P#(DdcNX!+mB-2tZ<7ayT)wIIxRTNsxF;}REAgwBtNs1y zpDRN9e%rTVAM|8B2l)H2IL|n*y10L=JFi(E`)Kj*uIjfpaZa9xp8wLP{@TZ<7Wq&k zNB!u#`3=HBVG-)L^km@=K30$&;9orL_XYBzpF@cw4{5ej)TT-W0tr_PVwo z-O^(g>(*u8LrTkR!{AND>l-XzO zx78JfDwzMpLshp=%A@I1x1;eFb@naqc)K?#z|+8w00;i`#L*2_(t`>&o=+ffX;d1k$LPF^25EqUoEF|K6EbJ zo&8cj^Sh6L2b)~R!k`yNkN%+W_7VN?)9QCuX*1^za&kTc(f5!)41ZiL{Fr{1&KDkE zY?lk96aQ?8<2FujxnLGseEr zJsSIBZ~Wpyo?<$WyO&{4^Y{(9AOBE|C@%jn=RA5epLMIR-Cw17;u*W8M|9+{FZ005 zC)~r~2cPidr=NAQC(n&uyEhb_eLNpM9giR3j~$qo{Fa&ptGeeDQs&w|_T^}2XQe|oWF`_6qul`V} z;Dwy4V8z*^d0)fx2JQIr-a>C*?mfisi~pv2dj9LVfOzoUFIIbh?!FXzaxNF|8XqY% z`D+-eFukK7`ERApey_Z*+MUzTwf&@;+r6M_lkZrVwdj)^+|w<=Kda{=B=@t?U$d)_ zo%q=*(6`?B?D6zWPLJjsJSGo-CwF*uY5Zf~sgnD#ecgTI{%#*X6h04RKd}B|$p?(P zJLlo)uENaeV?fctX|Zm-^XwP+UKe*lPL3_#4xT-J*?m>;;<2ZxlT@vj}+V&Ixp>d zyc*p1w)}l8yg0FPECU3Pzk57!*|`WcdD+jcs31JOw0g*zTy8OP!4_A8wl9lc zueq<$!d}R0`kI!&_z8d4 zI$o}l^MQQj^?M6HqO0t z(6P_czl!1???--(kC2c5X|dmxZ&bbUFP=_16nsAGZvLO<$0Dy$zrf~ChS{S~5MRzo z_6PBazS66`uVlXGKU*!mp3sjgRU%LKN*7|6@?!Mt-nkoI98tHQ(}RAm{%I(tf?Mum zgT{w`Ee?$#2)ONmDy)C9Ah{gaP^)c-OF3h%`#8_K9=_W`HbgZ+MR>gl{j_gcSA80Z0il0-}9S3zJ%H1;xje+(Rz5-l*{tezHgL1FAy^Vq}Ye<-~7tnHuX;jiBJ5O1c~5&e_H&OD!> zkM{m@80zf-pX^(|L%*)&{}0ENY8OZGD=vx0^PJ~eUepbH@v|LbsQO30yohi8_Y{8P zKNoE48mex8Y4#2uxsek-#kr$zt^R(+!#>Z`elsY3is$G~N8jq~PkmSJGt}vWpTM(s z_SIkr+Wa~M-jn;2)h7yJUqeQY_J8LW?eawD@P1x?#NYAJ))~DSuZQO657yt)$vfm_ zo_G2V#Ja>q{zTu6pA5YF$tp}a53pl-Oq+jbrRxRf#6|42+Ef_NEjIqJqqv;oU&mI|}dnPW$LL*9-4gKCjMh?4R4@JvPcJ3?pMc;bCj`kFmkbS+L*wfUnez7OUe58n5z^W=f>_G@zEALB|UDA&0iV+)^{>Zyx&e{*?85enKuo@*&S3`uza^o>MNLFU;wyS^~#{?zQbB zO+WGb3#W>1j9oUn84BruN`#$zTZ`Ml=&pRKCnJ@O#_{hZgcHfF0 zaO$z2)9YvYy`r>#xX(AQ zILJ@rRrBn_P0nMDv+KTs7xNsVeeaNV_fthRoAviu^lyH2{7D`3-n)BXc>YdL#zjw$ z?4Z}5ajbjuuL^@szV2QLz4)NsL-7ut9@)XpF7Kre?c#<0p7X2sa1^f+Z@0DoU199o z+S!G=b+ZR`>vu17=;`XQ9Dnuv!E;J})W&^0rSM1Z?t?y``!eg0KWr{nKlz3C>(I&n z=&zMYsAyLG8W%(mG=wQ{=xlarD(^ih@7dXY|{4?n%Si`4lFU z1>K9lzL@>uVD53@r!Q5>zK?%-z4cq)JnzN6{?6+6mE4=nKTvh|clcjJ8k+V30pR0M!FJ8(|hTJ<|$UO{teuWSI1V5XjEGQ0WXNS&1*7wG>zZmnA z|9X8C_S)}QC4NUv@T++)A)dCsV-Yzz&$RKnf^*LDt<|vm@q*&9{0aY_Q_82;-d6O^ zp^d-HbFMpo;9H#O_gU*O9zMM9y7uw#nf3M0$un*H?zYdr#Ot%44CNFUyVM`#H{vP& z#V>T)+0Q+Zi~MXoazNMgX#jAj!qO8E1^YO1?&k_`JbX1jJhIqd``W)=>?dv-XGr|$ z*LU)G^<&TCMIX1D-|s2)>3TtaC?6s3M%B~5 zl$PjmHPmIXC8SWq75{uN*L3H~R)C#Q_kz_?OH^i6M5NCNF^ z*2B)&|K@)Uy>6XdPj^>?_-LIgIe*EgjgN27)h>1Khl(fkC7v&GaiiADpS;f9TQsM0 zPCWVUs@o5(+jG*j^VQzwzcIx^sqKSPp{*C67oD?Wck;tSUHt9&Hpwg52R3q!BM0~V z?eC-L5ZL$_3&D=)bK_%$N5^l(om1gE!?kl5|9~H|t|o^;OTm^eC2w+%i;6t1{zYM& z580{pqF1NK>C@H1uj$)5TfY?8q4Qhc{?WvtHh&1%h56e&D@AbNfr9MVI>|S~%$9dl ziM)*Sz7G|Cp7=`+^uRvkIS>Du5ewP}tdAbegL+Jya_+Q0JeNG!`kG(Z57LkSHoq8* zT##JdTeQ}(N<-p>e2@Rn=GE>#>)^$z`yJEId7a#>mwwv_E(pP_ep##f^r zs^qz)Pqlc`Vb-@J+t%ecoIvivGQZi>rDTq$hu; zmOsti3rQ_jK)^#sGoz1FfGr zzSyJohR^z+%lBZtJ&9kwJ8F7~9_X><-HH43a%cXop6>|6nQd_o!-1_mb>kt!zdHMB zFb(5M753#^z#quLdhP3=ziYDpTh-rlO>r;7&sHB%DrjGZmA~;Tc(C~cL*wHdd+UgV=Bzt{(t~XyZe#0 zj|Ts8m*4Ns`LxX!I`Pb~?8Lh9&0pDve7E5xx#*sf+jZSViTOG^ibs`I1iQ&pE=d!OAobMFVLy~%g1y5G+yzvc(x0=c*cChy-! z9J4NT#sk-#*N4Ke_vQPmu0Q(s@2Ju__r%E`4xTD}JC~<}`*i)~Q`T?)(+;v9_TKPa zzS8h3Rl;wx%P|z`=RJq@+ykRGkNCu1nm#g&_aC2sG>8E9xQM;>`d#)pc=t%=%f!U09}>6iNBs-;HM*R&&eA;^RJ#S41d)wzZbvZ z!T$cD={w_kUlF}~X7zqPd7ke;ozs@WAG_3l+dB7Di#!NjI}Zgh@8TgpWDlM{JpI?z z;nmzf(I5T%Lhhsedj#$y?ZflzKkonhonLXZJ?|W99(GJF*2g}>egn?yYl&n0iamIb zt?k?Ci7x{=-?VyXzx{VS==cM9oY#+o{&7JyQ+utwf^3R zmnWdt&c255cW!UBwC@+ks*|_z$#sFpQ$?gBsY-U^oDoeq3-=}`w+R|w;#uNy&R3_ zIR`p+rrtw;Uk2WM#-n#}2c0@Qg{Mz-^EbMwTI6Z)<^x~Mb7XpSo+h7Ha*u_N+w+{_ zu@g1E{e@r3w~W7fKMo6ep1p|ue`<+;Fzw&TxC~QJp8ja=bF7bjp|&o1cdoF{P4QRs zo*%3G?$P-0>_Wafu2jp(uNUn99jDf>E7&LGH{^)SJk!{Kk8seLU{-9Q~&3gU6ERwgQHWRMHoiUhJg4CgFIK6?YnQKxQ}FoH?_Wkf^KXY@C}=(8u=+&d z$phbO@hAGsDDCfbuz!BZ4jz0c^L$E=g6xIeA5A=dU-Ec*nq|KyU!!<_hrh2RUiiCD z&fVm&c0N6)f@ku6mOQ}vIGQ^(k?^korzmfZu`LXKnS6@lk5*kjP~OP@*(d(^7d@%Bd}49B8-EXfNJI2=;=!tWugf@X z+}M-v*7(6j_BnB%Jow@9__sXVJr=gkMW5;*I&qpD*}L;PKJ^pVrn?F=o+^@7wM7J^sSJ!i@3TWpSw^wEpp>#RmQ9dn55d-t1hw*lRX> zp29%0Y&lb+tV;%kco-T9K-n_R~l$@*IRPpv*G_~rYP zHhv23A*@sTsr0TbbpW8Ox zSeUlIcLd%~_4j+p3mHG}{ZF2Z&N$*BI|b1V(Gz{Fc|Mf9#CgoVre6>3|MQPji~SuP zdpBRNKl1Z_Bsp7u%h#qFv8{K=48e;l{NBE;?)#Y&(Vu&@$D#CfK47_1_&Tmer(^N5BG=SOdDsY0bbAdO8n4$e48cv{~7B^ zgE-ds4V^kZjPv6+t8_Nc3Cu@N_F?+Or}yRDOS4aUao=gZ+RY2vpVh??e7qR{#W%gU z2lGC|5PvXF?>~K9kuRBdZ_bmO|2vo=a8JRt9jTLN@a7d?`FW^k_BDB{`(^E|->roa z2b;X_D!jPS`)51njcvs1g%O`Yk55&>d*SS|AJ5;H;m4Q0S^a$PN)OHp^ow8dfAdoH z8>5%7G20tEqbK<#`$XsOeyTS(w9@s0_`d$Z!qcBBt=e_o4ys88Mby`N9K^xdWYrX{Dte_0rQy^_39y^lNa z{M`CMa+e304_<$If#-kf?VJ+-w+~pqasB@+@_5^}9-eLqYaoUv2Up y7u)GyzQ&J=>e_GTm+;T!dB9foVf)tFlSScs9&ypQ=d?s)RIkjF)SyNoO+*A!CY!c__v7;fyBHp;X#5 z4r-Ka$`CqDAHsbIB_lGyi1<(t9O7etgdsE{Qt}|B5xGO-gKsLKNPWQ3;bk_g1_g;JLwfArTe)sev!!Z2J;s4(M@23I`|9g*+j~Lt#{mfHOf9mOrUw!uC z-C`8ML5yD&je|QD^mt|%?mQwl;7a)7142f@4yYGHaL-)7fSw+C$>c1qB!n+?jHk0(BcM!=j8zR3G9liatq2~Qe;~Ln@y8-Dx^4|E7?`nS8Ki~sdKg??^$3{367&N<-STrj#il;{_{f8-!>yZP$Ks6mgw{uIU7-hLUx#++<4;q1>F_kIv>_&mpX9Lc*gHm|3Cr)>v6 zlk-3Qd$rYv%6^N1ANE7^R>Be*^tY!^JwSfH#xxa5>@|Vnzd=tCQOulV&hh+a^KK8XAx;lE~Pj{Y5SYG`^z_e3;JubB4^8+9KUYyYD zL7u;;cfESjvv+>^^_D~a;NSNJ;^p)6IgfLZ-_E>|4{htiKz!hnFVH~r-qAh|fBcoe zUetVXJ{*cPYQU=c!+0efQV(_Of9{8p(+~WmA4AYHc9O)~_dW8Mem-pV>%<%3;<}y} zdhIY@&zn8%W=DinyT9uB#g6{Xe?K@~eTygdK-O2r`67~lS>IdsK)v#FH~qq|nGOH* zso-p!4Tyf~t>`a$Cu9#zdp@5}+P+JEvL13iUd2Rct6K;L2Et?(kqXL zVB>d9VKf@j_*+jQ2uQm#?m1fV+7#pa(a+0dyEXmxXIgqCRYHTj@Ov)oiT{1XO>xS3 zsL})$(H+1%vTpO7CHqTX48wbAPkzXk=YA80;F{pY&D*mpo%0vX(I03a_nFK)<0QWZ zn>Wr0{+yZjM-|V@A@k>T8UFbngxwk41F=83w^MJ5Gqw`ui~QZua~l0+U-R=I?YG_* zRGlsG4`Q4n*taKfR{mjwoIHi!+ZT~LEZ-7m42a*&bEau>;(2XPdL`f<@fLgjeIe>U z{>9Dulk}L_5z>7gtD!f0G$45w)G#nl(D^<=4DSc~$CF=>!WeMfc#l54+8O3rp_BcwS>Z-LLy`IeycR6zG5ITmD|Ao~Od` zNXEBHuLy1n3hdn}q_lkM#J=vgOgdmkzo~<}H(hf%J=r&xs}0X;+_Xc#i1T^sb?rD1 zoB5{SU*i4I3Om~C%{Q&mKiYwxuq5`O5Z)UxZg+A+^E9{f19{;5gBADUd43z!5>O;{ z-0QcuBlThOKCFbq>oNLs?H|xU_8E`!k9Dr$JdpbshAW`gzxj7%Py(`UantoLee`O? z#d9god2YYZ>|izc)PW1ycQ2@)S(jy=&!Oh-pJi8PCRmdk$!qB!Kx^I(9t?r+H=JLx znK7{=#N$gIun%zF+x&&r&Z)O9w4BTdfvHW%d|#~kGEs0wX&GLxcJG(Gwt7&L zbCc<1USQy`1S9L&7vvHGTy?GuKozpy@83+3l zjyQina}NW(3U;5bWk_5n{gK?~i~2#kJfA>2WZihadR*Mj*Q?8&rrtA-9eb`opYgKq zQ#W4uH8eU<^NoMB{JHqj$&h6Ki!^~X=o_C5==CSKi|_x)3y*vH$3XDl`^M;yjDz_> z&F2YzodxFoeM`@?lka0o=sTbIhqV9iyLNwYVD&sLYhXSO2qjQBA=&YR@-WPRhH zfEYCXJ$JQo-Ybw_wC{O;UFW^l`tWsB=N!Vk^S-M$Zl5Qohg;Di&qt%a|D;_%Kd{fT zZ({HLhGsoN1AYAP6V|{ke=}gVhl1pVJQs~P?>mzb2>t2J^Q0ZwS3O5@ZuWZY^G>rs zf36_D$cOE~$jjslDLluCo;;TnvrwHf?%?|h@?QQRr~W|WhjruUAAj$Ko$nLG``@1n zjY)pWFx$bAU_CH~NcY1&oZ8C!q(N#RPpE#w#vc$qkbHh!_gU~AGH)Jl@&W&KCd(4g z^MZWL{Y`qZ8t|WaqkOc~NMvxZr9-!qeo}X?! z6cW!D;_G&vx4b8$zM@aOGp{w?)Em(ZILXUma-Ac2erlzM)M%U%G;#FtUHj`+pL}F} zG4JQK&Zsw7y}sgk%<~VsF03Khe59+<2*`KV2m2*T>`(cgv6FATe)|09ywQLlaWeMe zyO1)G95GmLNPT|jm)Ae^dk{O9zhd7{dB0Ei`~4aovL3j@QHg^{Vc)AIEa>77Z4Ai=TjP*Kh6!<&Gh`>>#1iCpLIvOocoY}Sm@6g>BrUs z{YJj4a}?u%?~u3yd+P){^q_&n4?3UYt)KLZdr98dz-BxsNB~QbW<@t=B_JdYr%bUWfX`^LEbP#DN&4CkX!4k9YL8 zkGFFv<92?Xiz9#NWxO#M17ZIBoJ9TDycv3oJL6h{U-W#6Jh;dFJ4yyL)y=1jAu{)o zDn=)7=Jvcne|NQx@m}@4Ff@>RA!yXuY*o!@z#^_Me5s*BSK&JN64`Anil(p--rO5GVNgcjhCUfLnLAG}K)8@zj6zJKCFk--KZeWHL_iSx=}F zH~P!DV_)|)___a|(&7j~A8wvAv`)((ha~HDLDOIjn)xB$(Bk<9ntHKo_ek*3<2zUS z55Fjn-}*Z^0ew8wfn0C%R-e2O!6#1I{G&}qeC+^$!G_Ux{ziWWuu(k~LiR6xn9gAc? zrR8-7$&#UC8sV~TJfxeIN9G%=682g0tlQzWPM`S#t2TeSQ z3vM3%gTOBTOF;4ndBRaxNzSndesY~lJNsk%9P87aa_>W(OnD8=$AjGS7ykY)f(7(C zozg1oVsefU#znpLdFQ*&)@t!R1LJo&^yW>lfZP*$z0Nw=drz)@&s7)FYYoi(vF~yZ zoccmGe=iWmU+w(m>vJ++@3Kwd-Ot+mQI^WXRrB)R7uuHmy>=@d zhJ8Dau6ho}o^v`$>^qj9=5Amg^1QkFLuo3WtV_;;?x)&46Lkx{Nqs6vRzS7y^*`5%=J`PPsEmVo z1$rw*buIn4f@t;)>S0GM+30g__H|0#V;>}M{C$G=n||_KmJK?hczHj#??D}ZzP^yJ z3;CTT_Q{@I=EE2o*?!MpO+NF;dF{gCFJ-RtA9huFKpc4Q(|aCw-}r%lRqv&Jog&Zn zVDzB!I>jZi7U}Sb2kVw~k;ZN(n)g4Mr>b`o`*gG(g5O^K%=q6C{_4#9R?&hUU-D~Y zKBfNY-wI@|^AqiJj-jrw4!QS{`Qp2XWGqLj)*bnS)>ZxvvcNGYyH)F&?{<21h&=T1 z5A$e`aryp0KGy4@$2`*xafKgRi*qhC_Ycs({5=ni6aJ(wO2GGr%v0(M$=`p|enY>2 z=w}>8ILAZAInk=Y!5{HLk37ou3zFq9p9d9>{CihZr#;q%i8C*0mwxo>D)TwqWcel{J`DeW7(a!VyojL;MMRc+H{quXJyTpAA3+i6lUrg)>;hwwY_NjIA1rNkC_lq$k zMi`q_wEQ*!77EZRrKEpZ>3Xe z@?Ej=1Wo20I^h^t=BWfepX7P!$&UI9-{bb!SHq#_JM;9;o*EQz`;?Z%8if59CjEe( zK;LIkxCPQeerE)~xlW~)ag|@*OH#*Juj#cUc7))! ze&o6-tuA%b$JJ^bQW_ug=5pddKi_`HHAEM|F<|yfZ2x^}L&r9zQX7smy z4^F_Gw;D9x#f~qvdQLs5_Y=hBG4hmgy8(0wg!#=nh}>ZfhUc-XWnQ>oO`13YduI8Z zUP6QMPxi|y52IiY)DV2;uAahjKJ$sR7oUtXzhj4iK_7ST{@cuw^t#}5Dz1r?yF-z6 z!+w1AndJ_A|C(sk3FnBh|8(%Vr*SRnpV!4+dz|y|<8g)0zMXly9TI59oBD>#bv~tT zW_?d(DNBABe z`rC^)a^jxxsCaB4=Qj2)+Vk^h`jNTQllyp$zs}qCh!guo#<^8X>IZ)x=xd)$))7)? zLG%8OxVBo0-w|>?b%pV|9NNDd=bj3=$ANjtd8naTw9b)Z--Dc&T%P~G7e$iofIeT$ zKkX9dJwH!b1Ml{pGo?5GdLYC*_tg^o$@)|tl4^{)z2HG<()cO#92(u0xVltA9G1soU+wJ!D z@Bjb)gTFsM^4neT^~Jw`d-(Kl|KsQV^Go=8;qAlYKR#bv!L$GT*O%MvG4}EjUtIWp z;3eN)WiJ1Y;X*!hoQ=O|4$X0W(EQD1aKC-@`ukpUc{$vENSCYbE?A%Ypt)YSbiI(z z9k6pg@3Y@NI`o|uw?`gtZ|CmdcPAgLkM16G9Mbu_r}vr5LAqY(UdRXOu;0;LbUAm~ z-+gk>9MbjavB#XgeCEx`L%xOH!|zTy4t+0kxpH^&yHk%`eK;QuLd-Z!C-^!aePlxtEdvLym{O+>PUEV3L*Bpm@`e5hw z+Q%nvj=O`7L-&|>AD?}Y&m8J0caL26J~bz=AD8bQ_nP}&xH%oy>rNy(nKl9qNH}Ip{m#_R*We`S9ku7l(4b107b^ zZ{FX<+}$`oUNpC_@1jF@%R#+3pPYO7>@mmXAwM5%KV8mus1LVKe|2-ceC_YM`0VE| z>WB5y^}9oldm(?3AL?^wQJ#+5qX&m{IqxxteCD|K*@v6+!|M8>JM?(Besf$O|LSQD zZ?1X1b%(q=p!dq@t-tS(Z%&@>E@&>V*FHW-cPFl2zP<9$J{;=7+hZ>s+6&!f-oET;5(B%H#6-%=JNc)Q`*Q>3+O^z4G0MH%Hei zkMkG3n;(bncUSXt`{fqr*JE$@(2H`=+&l5^ahEv`?bi#ddl!_4bh&c9&^>(g_LcK@ z&&u;dKHmxIgYL5*@bT(2HJ@22ya)4NAM zzjxHvd2{y2+lRYP4z{;BdHwwI?u7h&P+qQ_4)s7i-U013m)BpPxn6S|`VM;QJ8?OE zP|h8=eR%Jem-B8qZccXx)Nfz?=KOm2pgDhibl9Bt>2VLNANNlC%K7v|d*pf-UEbV1 z^3~<&(BAg()Ad68VEg3zZh5^>9_Q0zk3N3)^O-kqUVruGdQZ7Kn$t^%_CtEnT#t7_ zcbK~aUvqiiQ?C1Qe!cF}-yZX#KKsn+`s}0g*}rso^M!m+ue%o4XO4TnK6>vqw+Hg! z-hbL7*Iqe2dg<$4cSAXQrhY!;E=z#BybG@D8_7%=xc|LR8 zJ?^NE>*0s?K)&w5va$0gYx|L@YDIttJ}+`*E{6&R+rOjt{3Oi z1LYQ%FY>q7-uCkAG50>4Pmlg`y6+{Y2lAQYo$H~y3(D7r!}inlLO%Vt?^A9+G?#~+ zm-D#;=d-_e<9tQ;=*40Eeoy=Px`z(y?>>3Ha_@7GUOn!@&7nN52X?Lp^1;=^Zw~2@ z?!7p)$6a#JoFDRcE?1OuM{|A$zLoFymuoMdxjx(;Ip{9P*Z1<kydfP+iFUorlZV!}eAAj}bx5r%mbhq!z=Pr3%FMs#Z^_=eJ(_6GxuY1a?%jq*; z=uW+I-bZ&YfBWR<>kfG+XOEnHknS$Iay{?&P=ksodSo={1LX?Z+WqpZ9lO zU+41OtIv0DkGu61`C3rSSJM5*qUmvaqhkW+p_T!LG9-6xcn#1bu>OOnueDe13 z;l6_$-F`aWK036wd-UpS55FG%a`&3+lgIh&q070m{Wz46Mf=<(&xiBT-LrC? z^TEDj`Mtyb=H=Qa$L}3{a^~Iz`HSZ6>0G~FbNisYJN3%T@!@>><<03h)Wh!%ecelk za?l=}uRVP3D4Ih(?p>Up59f#Wc`vNLoDb@QdhN%dJ}3wIpzm0}IUUy{=T68E>HN_B zP(Q2>x1SI9``b&euX=mkqerhE?=gpZA)g$deK@~8c(H!H?#1QoU-SCqpgg2QdhhZ+ zKJ$hA?tyfBpk6+@ez`TLpL}}P?_v)1`QGj^x7WPfyYxUl*j_nv9MbKj_g#GY`Fe*Q z{=Tn%KDvI~9eU~Z%iBZeH$S1A{kWWd`*C~Z%pqNmUUT|Fc{x6Lci`sy@{kXAKfk;= z4&}?`pWlrQJEM_F&VI^)%P_bS}q-_x`od{@$fWFCWyew|P20&gUKU`s_dLk>khp z$`|Wfem?!~#(f`Lj~whAcNd>NJ@(P%_|0+XJIm>lx0es{>ya~ubjWXR53J7~x*Q#v zw+H95pC9)7(D{9brOQM6?59J$tB(%(^|`0IeskZeJ#yxH>Ghk_?ZNGX-fzDAeD-z^ zZmzFfPvps2oa{W**t{2w}^~t#pniuuaalJ)zdvHFwzM}WL zOCHz5XHM5g#~~kVFP$IiXW1EldI0({_f?|XP-WE+`PT(4!I(~ zdseR;KXjKn+QVnA$DaDl`OWR=oX;M;e7L+me);8>=hMTl2j|1>lY{2^ak%~ts@vBd zyy#xKJl%e1U+!X{Psh4$nmYc_3L*pY+f(FeSCDtZ|+{)9ejBAn?pWa z??QJjzdm>Aq2mj^*B-vUqnzI!b9p-Me)~F?*9-aNAYDH+$L)pm`smR9-epc--`yPQ zgYIA49(VH7as7QCZr+@`&H3AF4&5W~ey9i6hr0vU1L=C~mD6WlefprgI`=MlK6<%4bca23IlX-5{GIce>uC<>$Mwm32cJ2e&mP>IE|1%{bUwci z&hHLAeCF=J<@r13GcTG$d3#}V-Ro|7eR6vG{mkX`7WKjA^wZ_}adUUz=KT6_K3G3r zd#mG+Umxy!;PyiPa(DYZ?BRoSJvh{Z%kjGpn)5@toL=1f^iO$k&`XUB1W%`SJSA>D}+Un)CZUbZCyt>xJgaZ|-h6eyG2GTuu+YyzenD+6z0! zJIC#DH@)cZ%KiNM^>`2D*T)a(P_BLUnU^=u=WhGlV-AFcelKEnd9gC71r`-3V&H9S1NH00eHZznyj zoDZ7YOV@*&_xs30_d>cnuD{&->@jctq?^0nJ@)E@d~$Rg^0|`^^yL_1ed;57HqW+6(pJeC}uuF4z0yi|)5a56(}wU%xpH z-NDb-Ub=qs=JdH2hkE4cdg$&ew_gwBGndC9zd7z+oS(1#_4R&vIaptNdIv7gk3;*Q zyZL=*b9b22o72ya7xlN#{dAmPuen}xcXh7EoWE#(>eUaMqwBYyzEF?vD(4<^y6-{f iE817Rx%SDy&fV#qxIN``dz+&}`}DX2>MP3A<^KTv3y6jQ literal 0 HcmV?d00001 diff --git a/test/data/dream/4bins13window1error.gff b/test/data/dream/4bins13window1error.gff new file mode 100644 index 00000000..21e1814a --- /dev/null +++ b/test/data/dream/4bins13window1error.gff @@ -0,0 +1,80 @@ +chr2 Stellar eps-matches 559 708 98 + . 0;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=15G,56C,113T +chr2 Stellar eps-matches 11 160 98 + . 1;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=68A,79T,142T +chr2 Stellar eps-matches 654 803 98 + . 2;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=16G,23C,67A +chr2 Stellar eps-matches 1375 1524 98 + . 0;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=87C,113A,136A +chr2 Stellar eps-matches 841 989 98 + . 1;seq2Range=1,150;eValue=1.40284e-68;cigar=1M1I148M;mutations=2G,103G,124T +chr2 Stellar eps-matches 753 902 98 - . 9;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=56G,79A,94G +chr1 Stellar eps-matches 841 989 98 + . 1;seq2Range=1,150;eValue=6.18503e-69;cigar=1M1I148M;mutations=2G,103G,124T +chr1 Stellar eps-matches 974 1023 98 + . 2;seq2Range=9,58;eValue=2.35246e-20;cigar=50M;mutations=8A +chr1 Stellar eps-matches 753 902 98 - . 9;seq2Range=1,150;eValue=6.18503e-69;cigar=150M;mutations=56G,79A,94G +chr3 Stellar eps-matches 966 1115 98 + . 2;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=8A,16A,113C +chr3 Stellar eps-matches 1141 1290 98 + . 3;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=19G,49T,67T +chr3 Stellar eps-matches 94 243 98 + . 4;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=45A,105C,139C +chr2 Stellar eps-matches 966 1115 98 + . 2;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=8A,16A,113C +chr2 Stellar eps-matches 1141 1290 98 + . 3;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=19G,49T,67T +chr2 Stellar eps-matches 94 243 98 + . 4;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=45A,105C,139C +chr2 Stellar eps-matches 1100 1249 98 - . 5;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=32G,74C,101C +chr2 Stellar eps-matches 1954 2103 98 - . 6;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=12C,67G,93G +chr2 Stellar eps-matches 1128 1277 98 - . 7;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=45G,67C,124C +chr3 Stellar eps-matches 1100 1249 98 - . 5;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=32G,74C,101C +chr3 Stellar eps-matches 1128 1277 98 - . 7;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=45G,67C,124C +chr3 Stellar eps-matches 509 658 98 - . 8;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=46G,110T,119T +chr1 Stellar eps-matches 94 243 98 + . 4;seq2Range=1,150;eValue=6.18503e-69;cigar=150M;mutations=45A,105C,139C +chr1 Stellar eps-matches 509 658 98 - . 8;seq2Range=1,150;eValue=6.18503e-69;cigar=150M;mutations=46G,110T,119T +chr1 Stellar eps-matches 137 288 98.0263 - . 9;seq2Range=1,150;eValue=5.72428e-70;cigar=2D150M;mutations=111G +chr1 Stellar eps-matches 135 286 98.0263 - . 9;seq2Range=1,150;eValue=5.72428e-70;cigar=150M2D;mutations=111G +chr2 Stellar eps-matches 1842 1991 98 + . 0;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=87C,113A,136A +chr2 Stellar eps-matches 509 658 98 - . 8;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=46G,110T,119T +chr2 Stellar eps-matches 137 288 98.0263 - . 9;seq2Range=1,150;eValue=1.29834e-69;cigar=2D150M;mutations=111G +chr2 Stellar eps-matches 135 286 98.0263 - . 9;seq2Range=1,150;eValue=1.29834e-69;cigar=150M2D;mutations=111G +chr3 Stellar eps-matches 1842 1991 98 + . 0;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=87C,113A,136A +chr3 Stellar eps-matches 2154 2303 98 + . 2;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=16G,23C,67T +chr3 Stellar eps-matches 1954 2103 98 - . 6;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=12C,67G,93G +chr2 Stellar eps-matches 1125 1274 98 + . 1;seq2Range=2,150;eValue=1.40284e-68;cigar=1M1D148M;mutations=67A,141T +chr2 Stellar eps-matches 2170 2300 98.4732 + . 2;seq2Range=17,147;eValue=2.60601e-60;cigar=131M;mutations=7C,51T +chr2 Stellar eps-matches 2147 2296 98 + . 3;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=49T,121A,139G +chr3 Stellar eps-matches 1125 1274 98 + . 1;seq2Range=2,150;eValue=1.85121e-68;cigar=1M1D148M;mutations=67A,141T +chr3 Stellar eps-matches 858 1007 98 + . 4;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=3G,32G,69C +chr3 Stellar eps-matches 137 288 98.0263 - . 9;seq2Range=1,150;eValue=1.71331e-69;cigar=2D150M;mutations=111G +chr3 Stellar eps-matches 135 286 98.0263 - . 9;seq2Range=1,150;eValue=1.71331e-69;cigar=150M2D;mutations=111G +chr2 Stellar eps-matches 858 1007 98 + . 4;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=3G,32G,69C +chr2 Stellar eps-matches 1775 1924 98 - . 5;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=12C,66A,101C +chr2 Stellar eps-matches 14 163 98 - . 6;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=72T,87T,90G +chr3 Stellar eps-matches 2147 2296 98 + . 3;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=49T,121A,139G +chr3 Stellar eps-matches 1775 1924 98 - . 5;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=12C,66A,101C +chr3 Stellar eps-matches 1601 1750 98 - . 7;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=46T,47C,79T +chr3 Stellar eps-matches 2482 2631 98 - . 9;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=9C,79G,94G +chr2 Stellar eps-matches 1601 1750 98 - . 7;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=46T,47C,79T +chr2 Stellar eps-matches 1003 1152 98 - . 8;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=53C,111G,120T +chr1 Stellar eps-matches 858 1007 98 + . 4;seq2Range=1,150;eValue=6.18503e-69;cigar=150M;mutations=3G,32G,69C +chr1 Stellar eps-matches 14 163 98 - . 6;seq2Range=1,150;eValue=6.18503e-69;cigar=150M;mutations=72T,87T,90G +chr3 Stellar eps-matches 14 163 98 - . 6;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=72T,87T,90G +chr3 Stellar eps-matches 1601 1750 98 - . 7;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=46T,47C,79T +chr3 Stellar eps-matches 1003 1152 98 - . 8;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=53C,111G,120T +chr3 Stellar eps-matches 1375 1524 98 + . 0;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=87C,113A,136A +chr3 Stellar eps-matches 841 989 98 + . 1;seq2Range=1,150;eValue=1.85121e-68;cigar=1M1I148M;mutations=2G,103G,124T +chr3 Stellar eps-matches 753 902 98 - . 9;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=56G,79A,94G +chr1 Stellar eps-matches 122 271 98 - . 6;seq2Range=1,150;eValue=6.18503e-69;cigar=150M;mutations=47C,72T,74C +chr1 Stellar eps-matches 556 705 98 - . 7;seq2Range=1,150;eValue=6.18503e-69;cigar=150M;mutations=110G,115G,123T +chr1 Stellar eps-matches 181 330 98 - . 8;seq2Range=1,150;eValue=6.18503e-69;cigar=150M;mutations=53C,61T,111G +chr2 Stellar eps-matches 122 271 98 - . 6;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=47C,72T,74C +chr2 Stellar eps-matches 556 705 98 - . 7;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=110G,115G,123T +chr2 Stellar eps-matches 181 330 98 - . 8;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=53C,61T,111G +chr3 Stellar eps-matches 122 271 98 - . 6;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=47C,72T,74C +chr3 Stellar eps-matches 556 705 98 - . 7;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=110G,115G,123T +chr3 Stellar eps-matches 181 330 98 - . 8;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=53C,61T,111G +chr1 Stellar eps-matches 283 432 98 + . 3;seq2Range=1,150;eValue=6.18503e-69;cigar=150M;mutations=63C,113A,139G +chr1 Stellar eps-matches 612 761 98 + . 4;seq2Range=1,150;eValue=6.18503e-69;cigar=150M;mutations=69G,74A,92G +chr1 Stellar eps-matches 495 644 98 - . 5;seq2Range=1,150;eValue=6.18503e-69;cigar=150M;mutations=14A,90G,124T +chr2 Stellar eps-matches 283 432 98 + . 3;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=63C,113A,139G +chr2 Stellar eps-matches 612 761 98 + . 4;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=69G,74A,92G +chr2 Stellar eps-matches 495 644 98 - . 5;seq2Range=1,150;eValue=1.40284e-68;cigar=150M;mutations=14A,90G,124T +chr3 Stellar eps-matches 283 432 98 + . 3;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=63C,113A,139G +chr3 Stellar eps-matches 612 761 98 + . 4;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=69G,74A,92G +chr3 Stellar eps-matches 495 644 98 - . 5;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=14A,90G,124T +chr1 Stellar eps-matches 559 708 98 + . 0;seq2Range=1,150;eValue=6.18503e-69;cigar=150M;mutations=15G,56C,113T +chr1 Stellar eps-matches 11 160 98 + . 1;seq2Range=1,150;eValue=6.18503e-69;cigar=150M;mutations=68A,79T,142T +chr1 Stellar eps-matches 654 803 98 + . 2;seq2Range=1,150;eValue=6.18503e-69;cigar=150M;mutations=16G,23C,67A +chr3 Stellar eps-matches 559 708 98 + . 0;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=15G,56C,113T +chr3 Stellar eps-matches 11 160 98 + . 1;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=68A,79T,142T +chr3 Stellar eps-matches 654 803 98 + . 2;seq2Range=1,150;eValue=1.85121e-68;cigar=150M;mutations=16G,23C,67A diff --git a/test/data/dream/4bins13window1error.gff.out b/test/data/dream/4bins13window1error.gff.out new file mode 100644 index 00000000..a55b86d1 --- /dev/null +++ b/test/data/dream/4bins13window1error.gff.out @@ -0,0 +1,1107 @@ +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_1_0.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_1_0.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr2. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr2 + chr2, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_1_1.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_1_1.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr2. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr2 + chr2, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_0_0.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_0_0.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr1. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr1 + chr1, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_2_0.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_2_0.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr3. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr3 + chr3, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_1_2.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_1_2.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr2. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr2 + chr2, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_1_3.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_1_3.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr2. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr2 + chr2, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_2_1.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_2_1.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr3. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr3 + chr3, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_0_1.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_0_1.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr1. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr1 + chr1, complement + +# Eps-matches : 4 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_1_4.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_1_4.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr2. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr2 + chr2, complement + +# Eps-matches : 4 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_3_0.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_3_0.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr3. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr3 + chr3, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_1_5.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_1_5.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr2. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr2 + chr2, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_2_2.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_2_2.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr3. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr3 + chr3, complement + +# Eps-matches : 4 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_1_6.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_1_6.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr2. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr2 + chr2, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_3_1.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_3_1.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr3. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr3 + chr3, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_3_2.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_3_2.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 1 query sequence. +Loaded sequence chr3. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr3 + chr3, complement + +# Eps-matches : 1 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_1_7.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_1_7.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 2 query sequences. +Loaded sequence chr2. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr2 + chr2, complement + +# Eps-matches : 2 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_0_2.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_0_2.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 2 query sequences. +Loaded sequence chr1. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr1 + chr1, complement + +# Eps-matches : 2 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_2_3.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_2_3.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr3. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr3 + chr3, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_2_4.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_2_4.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr3. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr3 + chr3, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_0_3.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_0_3.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr1. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr1 + chr1, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_1_8.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_1_8.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr2. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr2 + chr2, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_2_5.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_2_5.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr3. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr3 + chr3, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_0_4.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_0_4.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr1. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr1 + chr1, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_1_9.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_1_9.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr2. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr2 + chr2, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_2_6.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_2_6.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr3. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr3 + chr3, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_0_5.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_0_5.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr1. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr1 + chr1, complement + +# Eps-matches : 3 + + +I/O options: + database file : ref.fasta + query file : tmp/valik/my_dir/query_2_7.fasta + alphabet : dna5 + output file : tmp/valik/my_dir/query_2_7.fasta.gff + output format : gff + +User specified parameters: + minimal match length : 50 + maximal error rate (epsilon) : 0.02 = (1/50) + maximal x-drop : 5 + search forward strand : yes + search reverse complement : yes + + verification strategy : exact + maximal number of matches : 50 + duplicate removal every : 500 + +Calculated parameters: + k-mer length : 25 + s^min : 25 + threshold : 1 + distance cut : 25 + delta : 16 + overlap : 0 + +Loaded 3 query sequences. +Loaded sequence chr3. + +All matches resulting from your search have an E-value of: + 1.48039e-19 or smaller (match score = 1, error penalty = -2) + +Constructing index... + +Aligning all query sequences to database sequence... + chr3 + chr3, complement + +# Eps-matches : 3 + + diff --git a/test/data/dream/4bins15window.ibf b/test/data/dream/4bins15window.ibf new file mode 100644 index 0000000000000000000000000000000000000000..2be7edb54feb5d59dfa3fa93e8d06df7d4d8510b GIT binary patch literal 32891 zcmb`{YpSHx5`^Ju5pUpS2F}1g4nU{UKkNah19WCy*!8sVLa&#r*{u7&wcukPM=;Ks@JCv?L)5)>!*Ws_Q9b#?ECk< z_==sUUq4;z^yaUA?=6=D^^4}H(|4}-*N6Q2%_*KEJohXvS0Bph-s+IvJL3BARJUj6)kkMBzK@)R^7xldo!)$?5AD%=&4KdxZjqnfJRCN^ zKKnyHyt(}P^sx7;)AOtQez{^x#p_F z&ZAT3hkWMIsoM+JZ(YB+t1bu9ozOk@UHa}}4&;*u)uB0jMLPAxPj!Dka{9i#=fuq` zx`)4W=;^$Rz4W2J96tHWr!NQ6L-kwqj`pzEt-m^X>h@as-lO;N$yqx6&Q)(td3C*C zt~$Scp#Amb>f>~H^>}skkPa_;@9N~LLw@Z-4iEt=DIsz4>0n?qy#&ovXgk z-u7!ApFX5>kDMZZ(H(N+^1DNw-#mSK{S&_GGjHXtzWeO&&Z1m8sO~3+HP;A zbIa-Y*B<7{Zw`O4dHQmoI+Wjc!0l5#4&~tdxV}7nxc1YRQBsEhkWwPTY5fttLq$lnX9iZzera+`Sf9P@$RDzPky?E{vOS#&U^X}`Z!dVhZnt* zy&;`>bn2a>-g)xsU;5VT>pt!CFRb3aYtQQC(&Otr^ozc)9Da5B&ZpOh-bD_byxz$N z`S8vyuTH+YefXMBr;jgm$LhB(7k0k9qC3=AZu83P!+jT=&z=kEyO(|Si}bz+UftsM zhx||-sy9y`dKdTDL%*o+ z9=_`IZ}(U^%{9OEayqDQ4pg@zZMx$5gKb5^b#dq8>R=0W*5Y+a7N zxpeyG(v_>Pdwd7Jr87@nxg7Q8*}r~1s9%)Zx;d@m-pl;X(U*hstK<6iX`ilhTDK3> zmx~vx@Aooy0Wn>h8tmGMV@UOmo>0tG^`|UyB96I~a)2Y`_rw&(Lzi40c+Ap_{IsA0$bWk7C;jrKTRlj|p zT-f~XfkVC%E`8s%y7ukeJI*w>>D0RqKcs{9v=1Flw~!t#U+eyE`0SzH-%b7Y zq0^VA|6fsV{jJ-Z&ilJVy~sz`+@){5_u|d77i`Wg_s-Ru+d8h#k9&vi%jbP?=nnnn z(G}@n`?y@3-oEPgXq_I?7u^lz(Uq&iQ=bm@u6~d1i<`6dln?3n)uFjKG=~q$v;Xqb z)A2!l^NMu#Xn%cYy0w>m=!$&iw{LH#kMlu&Se-hwpFPz34sz(}ybFK(bdV031NqlI z&B2TE%+WvP(m{9QP!6PnbmpsjFMZs;^3?fpeg1`fusZWvw7YAd_2s=+&cgcF-szA)jUw`+)>D|Q#<(gwY$89Td~#s@^~v*2^vyfn*<3mHXkWe9Ipy@-+rDu5_}4z(fv$Oc z@?X7^PmVcI9v#$&>h#^y`_M1sTYYu&aA-bW?48Zg@4o8I!>eyDZaFzt5hkO% zPrf}Mee>1%aLC`jx%%|>#o=w8&b;Q!gZ5Zl9lomPvxoPypLeITKThAeJ>=3?httzR zef$4xm`*iQ>aoD}&Li4Lrr-O2FSbh2O@fW*~e(TNc`;^Op_G~Vne0_C( zNQcwor{6VQQSQpq?;SX7u6U8`#w8z?4UA`P# zjymo;yW>=^-(HZv*gf<+cjc(N)82BRJ`OvF4${G;*XN_d;kvi)S1!Nx=IN{BMS1qZ z>H3cDf%LHT&Nm0@Lpck5=l1QZE`QbOg>>+w>pRk)?v`&)K6QRvzqsbpwclKQ zbL`XaEzkSox481u&2OH0(0=w_etLQI_zBlO%SUe?=)Ko`eYy6Qr_QghZcg?33p=+u zdFGi@ADuc~^YG^Jw~xc>)S+BDd*F24lTW|M58E%N^S#=)mpV?*57q6>2kF{x-JGS9 z2hFWdoer9(?_F`ah5T^o?aprXs@L=clqrDSI+XcUcddS zTfBPj(|Y}#>-Pus<<*bV!B^+fo74AKH?P>fJUaI*t`2(#E~oqPolwq6*S`DsA>Hlo zkd67ei7cT$}`eNmsUIqH1wqO&hfCl{};b#wS2o$m$Jp*_rL9v!T{`_rj+AALxN7rQro zbM4JJKcfPdoP?1hjg8f%coyR2j$YMyF%1&W{(}-96N+$L&wY-}!WW zP@T_xINjR6bw2w*I`ib};|uBB1|{=y4I^-@8ezU-5mTjkG^P*db#>7^1I7^>khh=@15J1xBTYT zf6|-7k3;_6i7y{Noxk6co{rx=a@3*yaL5PM>Ak1AeD#yAdo~B}y>eTp*N5ix4thBY z>Ez*Zad+eNP~AJ(2kIB)LVdYVu0C`Z4%MN0v3FL#`u1|4{p#;Ly!W;)*PQ06+mF6F zxp2+Zm**aQ<=aDF&WYO-^4U+WKBQ}39qPl@aeG2_cjF7alfCio%fIUU-Gg2(UGr9* zuDR;4_nz|XBiEkIYh53j)A{Y=3p+/dev/null + + sed -i "s/^>.*$/>chr$i/g" $chr_out + let i=i+1 + + #----------- Sample reads from reference sequence ----------- + echo "Generating $read_count reads of length $read_length with error rate $error_rate" + generate_local_matches \ + --output $read_dir \ + --max-error-rate $error_rate \ + --num-matches $read_count \ + --min-match-length $read_length \ + --max-match-length $read_length \ + --verbose-ids \ + --reverse \ + --ref-len $length \ + --seed $SEED \ + $chr_out +done + +cat chr*.fasta > ref.fasta +rm chr*.fasta + +cat $read_dir/chr*.fastq > query.fastq +rm -r $read_dir diff --git a/test/data/dream/cli_test_output.sh b/test/data/dream/cli_test_output.sh new file mode 100755 index 00000000..6c3c450e --- /dev/null +++ b/test/data/dream/cli_test_output.sh @@ -0,0 +1,56 @@ +#!/bin/bash +cd dream +set -Eeuo pipefail + +if [ -z "${VALIK_TMP}" ]; then + echo "no VALIK_TMP folder given" + exit 127 +fi + +mkdir -p $VALIK_TMP + +#----------- Index and search the reference genome ----------- + +# Split parameters +seg_overlap="150" # how much adjacent segments overlap + +# Build parameters +k=13 +ibf_size="32k" + +# Search parameters +pattern=50 # min local match length +pat_overlap=49 # how much adjacent patterns overlap + +ref_input="ref.fasta" +query="query.fastq" +for b in 4 16 +do + echo "Splitting the genome into $b segments that overlap by $seg_overlap" + ref_meta="ref_meta.txt" + seg_meta="seg_meta"$seg_overlap"overlap"$b"bins.txt" + valik split "$ref_input" --overlap "$seg_overlap" --bins "$b" --ref-meta "$ref_meta" --seg-meta "$seg_meta" + + for w in 13 15 + do + echo "Creating IBF for w=$w and k=$k where segments overlap by $seg_overlap" + index=$b"bins"$w"window.ibf" + valik build "$ref_input" --kmer "$k" --window "$w" --size "$ibf_size" --output "$index" --from-segments --ref-meta "$ref_meta" --seg-meta "$seg_meta" + + for e in 1 + do + echo "Searching IBF with $e errors" + dist_out=$b"bins"$w"window"$e"error.gff" + local_out="local"$b"bins"$w"window"$e"error.gff" + valik search --index "$index" --query "$query" --output "$dist_out" --error "$e" --pattern "$pattern" --overlap "$pat_overlap" --ref-meta "$ref_meta" --seg-meta "$seg_meta" + #valik search --shared-memory --index "$index" --query "$query" --output "$local_out" --error "$e" --pattern "$pattern" --overlap "$pat_overlap" --ref-meta "$ref_meta" --seg-meta "$seg_meta" + done + + rm $VALIK_TMP/* + done +done + +#stellar_out="stellar.gff" +#stellar ref.fasta query.fasta -e 0.02 -l 50 -o $stellar_out + +rm -r $VALIK_TMP diff --git a/test/data/dream/dummy_reads.fastq b/test/data/dream/dummy_reads.fastq new file mode 100644 index 00000000..292cc6e8 --- /dev/null +++ b/test/data/dream/dummy_reads.fastq @@ -0,0 +1,24 @@ +@0 reverse,start_position=558,length=150,errors=3,reference_id='chr1',reference_file='chr1.fasta' +TAATATATATATATAATATATATATATATATATATATATATATATATATATATATATATATAATATATATATATATAATATACCGGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCG ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@1 reverse,start_position=558,length=150,errors=3,reference_id='chr1',reference_file='chr1.fasta' +TAATATATATATATAATATATATATATATATATATATATATATATATATATATATATATATAATATATATATATATAATATACCGGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCG ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@2 reverse,start_position=558,length=150,errors=3,reference_id='chr1',reference_file='chr1.fasta' +TAATATATATATATAATATATATATATATATATATATATATATATATATATATATATATATAATATATATATATATAATATACCGGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCG ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@3 reverse,start_position=558,length=150,errors=3,reference_id='chr1',reference_file='chr1.fasta' +TAATATATATATATAATATATATATATATATATATATATATATATATATATATATATATATAATATATATATATATAATATACCGGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCG ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@4 reverse,start_position=558,length=150,errors=3,reference_id='chr1',reference_file='chr1.fasta' +TAATATATATATATAATATATATATATATATATATATATATATATATATATATATATATATAATATATATATATATAATATACCGGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCG ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@5 reverse,start_position=558,length=150,errors=3,reference_id='chr1',reference_file='chr1.fasta' +TAATATATATATATAATATATATATATATATATATATATATATATATATATATATATATATAATATATATATATATAATATACCGGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCG ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII \ No newline at end of file diff --git a/test/data/dream/query.fastq b/test/data/dream/query.fastq new file mode 100644 index 00000000..032e427f --- /dev/null +++ b/test/data/dream/query.fastq @@ -0,0 +1,120 @@ +@0 reverse,start_position=558,length=150,errors=3,reference_id='chr1',reference_file='chr1.fasta' +TGTTGTGGGAGGCTGGGTCTTAAGCAGCGCGCGAGCTGTGATCCAGGCTACCACGCACATAGTGTATGGAAAGTGATCCAGAGTAGACCCGCGGGGGCCTGACCTAACCTATTTAAGTTGTATCGTGGCTATGAGGGTAGTCGCCGGAGA ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@1 reverse,start_position=10,length=150,errors=3,reference_id='chr1',reference_file='chr1.fasta' +AGTATGGAAGCATAAGCTCTGCATGCAAAGGTACATCAGATCCTGCGGTTGGGTGCCAACCCAAGTGAGTTCACGGGCTCTTGACAGACATCGGAGGATGGTGCACACTCACTCGACCAGCGCAAAGCACAGGATCTCACGTGCGGACAT ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@2 reverse,start_position=653,length=150,errors=3,reference_id='chr1',reference_file='chr1.fasta' +GGCCTGACCTAACCTGTATAAGCTGTATCGTGGCTATGAGGGTAGTCGCCGGAGAAAACGTATGCTAACTGATTTTTAAGTCGGCGTGGCGCCGAAGCCGGATCGGTTGTAAGCTAGCCGGGCCTAGGGGTTCACCGTAACGGATTAGTC ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@3 reverse,start_position=282,length=150,errors=3,reference_id='chr1',reference_file='chr1.fasta' +GCGCATTTCACGCTCTCTACGAATGACCGCAACGATCAAATGGGCGAGAACAACTAATTCCGCTTCATGGGGTTTGTGGATTGTGACACAGCGCGCCCGCTACTGCGGGACGAGAGGACGCCCAATTCTGCCAAGGATGATTTAGGGTGT ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@4 reverse,start_position=611,length=150,errors=3,reference_id='chr1',reference_file='chr1.fasta' +CGGACATAGTGTATGGAAAGTGATCCAGAGTAGACCCGCGGGGGCCTGACCTAACCTATATAAGTTGTGTCGTAGCTATGAGGGTAGTCGCGGGAGAAAACGTATGCTTACTGATTTTTAAGTCGGCGTGGCGCCGAAGCCGGATCGGTT ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@5 reverse,start_position=379,length=150,errors=3,reference_id='chr1',reference_file='chr1.fasta' +CTACTCTGGATCAATTTCCATACACTATGTCCGTGGTAGCCTGGATCACAGCTCGCGCGCTGCTTAAGACCGAGCCTCCCACAACAGGCGTAAGAGGTGTAATGGTTGACCACCCTTTTTCAGTGAGAGTCATACGATTGCGGTGGGGTG ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@6 reverse,start_position=752,length=150,errors=3,reference_id='chr1',reference_file='chr1.fasta' +GGCCTAGGGGCTTAAGGGGTGTACCGACAGGATACGTACCGGAGACCCCCGCCGTGTTAGGGGAAGCCAAATGCACGTACAAGCATTCCTCCACGATGACTGACCTAAGAGATGTCCGCCCGTGAGATCCTGTGCTTTGCGCTGGTCGAG ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@7 reverse,start_position=318,length=150,errors=3,reference_id='chr1',reference_file='chr1.fasta' +CCGGCGACTACCCTCATAGCCACGATACAACTTATATAGGTTAGGTCAGGCCCCCGCGGGTCTACTCTGGATCACTTTCCATACACTATGTCCGTGGTAGCCTGGATCAGAGCTGGCGCGCTTCTTAAGACCGAGCCTCCCACAACAGGC ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@8 reverse,start_position=693,length=150,errors=3,reference_id='chr1',reference_file='chr1.fasta' +CTCGCCCATTTGATCGTTGCGGTCATTCGTAGAGAGCGTGAAATGCGCTATGCTCTTCGGTCCTAGGGGCTTAAGGGGTGTACCGACAGGATACGTACCGGAGACGCCCGGCGTGTTAGGGGAAGCCAAAAGAACGTACAAGCATTCCTC ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@9 reverse,start_position=121,length=150,errors=3,reference_id='chr1',reference_file='chr1.fasta' +TACCTGGTAAACAACCACGCCTGCGAAAACAGATGTAGGCCCGCAGCGGAGGGGTGACGACTTGAGTTCTATCAGGAAATCATCGCTGGATTTGAATTTGACTAATCCGTTACGGTGAACCCCTAGGCCCGGCTAGCTTACAACCGATCC ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@0 reverse,start_position=1374,length=150,errors=3,reference_id='chr2',reference_file='chr2.fasta' +ACGGGAGCCTAGGCAATCCCGACGTCCCGCGTGCTGGATAAAGAAAAGGCCGACTGCGCGAAATGAAGAATCGTCAATTTATTGTTCGCAGCTTTACAGTTCTTCTCCGCGGACGGGCAGAGTGGTTTTAAGACCAGGGTCTATGCACAA ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@1 reverse,start_position=839,length=150,errors=3,reference_id='chr2',reference_file='chr2.fasta' +AGGTCGTGACCCCTCCGCTGCGGGCCTACATCTGTTTTCGCAGGCGTGGTTGTTTACCAGGTATGGTGCTCATCTCTATTAGTCACGGGCAGCATGGTGTCAGCGAACCGCGCGTCTCCTAATTTCTGGTCTACCGATTTAGCCCCGGCA ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@2 reverse,start_position=965,length=150,errors=3,reference_id='chr2',reference_file='chr2.fasta' +TGGTCTAACGATTTAACCCCGGCAAATAACTTTGGATTGTGGTTGGAGAGTGCCAGAACTGACGGGCGCTGCCGTGGGGCTCCTAACTAAAAACGCCACGGACCTGGCTAACCTTCGTTGTTGACTATAACATTTGAGGGCGCTTCGGAT ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@3 reverse,start_position=1140,length=150,errors=3,reference_id='chr2',reference_file='chr2.fasta' +GGGTGGTAAACATAGATTGTATATAGTCAACGACATACACTCATTATTTTGCAATTGCGGCATCTCTACTATGTCTTAATTAGTTTTCCCGGATGGCGAAAACGATCTTACAGGAGAAGCGCTACGCTGGTTTGGAAGACACTTAGTATC ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@4 reverse,start_position=93,length=150,errors=3,reference_id='chr2',reference_file='chr2.fasta' +ACAGACATCGGAGGATGGTGCACACTCACTCGACCAGCGCAAAGAACAGGATCTCACGGGCGGACATCTCTTAGGTCAGTCATCGTGGAGGAATGCTTGTACGTCCTTTTGGCTTCCCCTAACACGGCGGGCGTCTCCCGTACGTATCCT ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@5 reverse,start_position=1051,length=150,errors=3,reference_id='chr2',reference_file='chr2.fasta' +AAGATCGTTTTCGCCATCCGGGAAAACTAATGAAGACATAGTTGAGATGCCGCAATTGCATAATAATGAGTGTCTGTCGTTGACTATATAGAATCTATGTCTACCACCCACATAATACTCTGGCAGTATGGGGAATCCGAAGCGCCCTCA ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@6 reverse,start_position=197,length=150,errors=3,reference_id='chr2',reference_file='chr2.fasta' +GCATCGGAGTTCGTGGGAGGGTCCACAGTCTTAACAGGAAGTAGCGATTCTGATCCACGTACCTGCGTAAGGTCTAGCAAGATCCTTAGGCCGATGAGGAAGGTTGTGAGTTTTAAATCCAGGGGTATAACCCCTACTACCACTGCTGCA ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@7 reverse,start_position=1023,length=150,errors=3,reference_id='chr2',reference_file='chr2.fasta' +TTCCAAACCAGCGTAGCGCTTCTCCTGTAAGATCGTTTTCGCCAGCCGGGAAAACTAATTAAGACACAGTTGAGATGCCGCAATTGCATAATAATGAGTGTATGTCGTTGACTATATAGAATCCATGTTTACCACCCACATAATACTCTG ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@8 reverse,start_position=1642,length=150,errors=3,reference_id='chr2',reference_file='chr2.fasta' +AGGCCCCCGCGGGTCTACTCTGGATCACTTTCCATACACTATGTCGGTGGTAGCCTGGATCACAGCTCGCGCGCTGCTTAAGACCGAGCCTCCCACAACAGGCATAAGATGTGTAATGTTTGACCACCCTTTTTCAGCGAGAGTCATACG ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@9 reverse,start_position=2014,length=150,errors=3,reference_id='chr2',reference_file='chr2.fasta' +GCGCTATGTTCTTCGGGCCTAGGGGCTTAAGGGGTGTACCGACAGGATACGTACCGGAGACGCCCGCCGTGTTAGGGGAAGCCAAAAGAACGTACAAGCATTCCTCCACGGTGACTGACCTAAGAGATGTCCGCCCGTGAGATCCTGTGC ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@0 reverse,start_position=1841,length=150,errors=3,reference_id='chr3',reference_file='chr3.fasta' +GCACTGCATGCTCGGATGGAACTCGGAGATCACCTGGAAAGTCAGTGTCATGCGTGGCGGTTTAGTGTTCGACGTAAGAAAAACCTCGAAGACGGACGAGGTATGCAGACATAGCAGCAGTGGTAGTAGGGGTTAAACCCCTGGATTTAA ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@1 reverse,start_position=1124,length=150,errors=3,reference_id='chr3',reference_file='chr3.fasta' +TTCCAGAGTATTATGTGGGTGGTAAACATAGATTCTATATAGTCAACGACATACACTCATTATTATGAAATTGCGGCATCTCAACTATGTCTTAATTAGTTTTCCCGGATGGCGAAAACGATCTTACAGGAGAAGCGCTACTCTGGTTTG ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@2 reverse,start_position=2153,length=150,errors=3,reference_id='chr3',reference_file='chr3.fasta' +AGCTGAGCGGTTCAGGCAGAGTCACTACATCTTATATGTAACCACACTCACATAGTTGTTGGGGGCTAACAGCTAAGGATTCCTGGTCCCTGGCACGGATATAGATCACAATCTGGAATTCCCTCCTAAGTACCCGCCCGGTATTCCCAC ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@3 reverse,start_position=2146,length=150,errors=3,reference_id='chr3',reference_file='chr3.fasta' +CCTTGAAAGCTGAGCGGTTCAGACAGAGTAACTACATCTTATATGTAATCACACTCACATAGTTGTTGGGGGCAAACAGCTAAGGATTCCTGGTCCCTGGCACGGATATAGATCACAATCAGGAATTCCCTCCTAAGTGCCCGCCCGGTA ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@4 reverse,start_position=857,length=150,errors=3,reference_id='chr3',reference_file='chr3.fasta' +TGGGGGCCTACATCTGTTTTCGCAGGCGTGGGTGTTTACCAGGTATGGTGCTCATCTCTATTAGTCACCGGCAGCATGGTGTCACCGAACCGCGCGTCTCCTAATATCTGGTCTACCGATTTAGCCCCGGCAAATAACTTTGGATTGTGG ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@5 reverse,start_position=1106,length=150,errors=3,reference_id='chr3',reference_file='chr3.fasta' +TTTTTCTTACGCCGAACACTAAACCGCCACGCATGACACTGACTTTCCAGGTGATCTCCGAGTTCAATCCGAGCATGCAGTGCGTCTTTCCAGTGTGAGACGGTCATAACTGTACGGAAAAGGCTTACCTTGATAGATGGGAAGAGTAAC ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@6 reverse,start_position=2867,length=150,errors=3,reference_id='chr3',reference_file='chr3.fasta' +GAGATGTCCGCCCGTGAGATCCTGTGCTTTGCGCTGGTCGAGTGAGTGTGCACCATCCTCCGATGTCTGTCTAGCGCCCGTGAACATACGTGGGTTGGCACCCAACCGCAGGATCTGATGTACCTTTGCATGCAGAGCTTATGCTTCCAT ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@7 reverse,start_position=1280,length=150,errors=3,reference_id='chr3',reference_file='chr3.fasta' +TCCAGTTAGTGCGCGACACTTTTCCCTGATTGGCAGTTCGGCTTATCACTATTCGTCAAGAATCGGCACATTTGATTCTCCCGCGCCATAAGACTGGGTTTTCCAAGTCTACTATGGAGTAGGTACATCGGAGGTTGTCCGCTCAGCGGG ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@8 reverse,start_position=1878,length=150,errors=3,reference_id='chr3',reference_file='chr3.fasta' +TGTTTACCACCCACATAATACTCTGGCAGTATGGGGAATCCGAAGCGCCCTCCAATGTTATAGTCAACAACGAATGTTAGCCAGGTCCGTGGCGTTTTTAGTTAGGAGCCGCACGGCAGTGCCCGTCAGTTCTGGCACTCTCCAACCACA ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@9 reverse,start_position=399,length=150,errors=3,reference_id='chr3',reference_file='chr3.fasta' +AACTTCAGCAACACTAATAGGCACCATTGGCAATGCATCGGTGCCCAGACTAGTTTCACTGTGGATCCTGTATCATTCGCCGTCGCGTCAAAAGTCGTTTATAACCGACCCATAACTATGGTGCTTAGACCGGACGACGCCGGGATCAAT ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII diff --git a/test/data/dream/ref.fasta b/test/data/dream/ref.fasta new file mode 100644 index 00000000..7fee5ae9 --- /dev/null +++ b/test/data/dream/ref.fasta @@ -0,0 +1,95 @@ +>chr1 +TATGCACCAGAGTATGGAAGCATAAGCTCTGCATGCAAAGGTACATCAGATCCTGCGGTTGGGTGCCAAC +CCAAGTGTGTTCACGGGCGCTTGACAGACATCGGAGGATGGTGCACACTCACTCGACCAGCGCAAAGCAC +AGGATCTCACGGGCGGACATCTCTTAGGTCAGTCATCGTGGAGGAATGCTTGTACGTTCTTTTGGCTTCC +CCTAACACGGCGGGCGTCTCCGGTACGTATCCTGTCGGTACACCCCTTAAGCCCCTAGGCCCGAAGAACA +TAGCGCATTTCACGCTCTCTACGAATGACCGCAACGATCAAATGGGCGAGAACAACTAATTCCGATTCAT +GGGGTTTGTGGATTGTGACACAGCGCGCCCGCTACTGCGGGACGTGAGGACGCCCAATTCTGCCAAGGAT +TATTTAGGGTGTTTCACTAGAGTTATGCGCCGACCCCGGTTGGACCAGCTTGCATTCGAAACTGCGTTAC +ACAGCACCCCACCGCAATCGTATGACTCTCGCTGAAAAAGGGTGGTCAACCATTACACCTCTTATGCCTG +TTGTGGGAGGCTCGGTCTTAAGCAGCGCGCGAGCTGTGATCCAGGCTACCACGGACATAGTGTATGGAAA +GTGATCCAGAGTAGACCCGCGGGGGCCTGACCTAACCTATATAAGTTGTATCGTGGCTATGAGGGTAGTC +GCCGGAGAAAACGTATGCTTACTGATTTTTAAGTCGGCGTGGCGCCGAAGCCGGATCGGTTGTAAGCTAG +CCGGGCCTAGGGGTTCACCGTAACGGATTAGTCAAATTAAAATCCAGCGATGACTTCCTGATAGAACTCA +AGTCGTGACCCCTCCGCTGCGGGCCTACATCTGTTTTCGCAGGCGTGGTTGTTTACCAGGTATGGTGCTC +ATCTCTATTAGTCACGGGCAGCATGGTGTCACCGAACCGCGCGTCTCCTAATATCTGGTCTACCGATTTA +GCCCCGGCAAATAACTTTGGATTGTGGTTGGAGAGTGCCAGAA +>chr2 +TATGCACCAGAGTATGGAAGCATAAGCTCTGCATGCAAAGGTACATCAGATCCTGCGGTTGGGTGCCAAC +CCAAGTGTGTTCACGGGCGCTTGACAGACATCGGAGGATGGTGCACACTCACTCGACCAGCGCAAAGCAC +AGGATCTCACGGGCGGACATCTCTTAGGTCAGTCATCGTGGAGGAATGCTTGTACGTTCTTTTGGCTTCC +CCTAACACGGCGGGCGTCTCCGGTACGTATCCTGTCGGTACACCCCTTAAGCCCCTAGGCCCGAAGAACA +TAGCGCATTTCACGCTCTCTACGAATGACCGCAACGATCAAATGGGCGAGAACAACTAATTCCGATTCAT +GGGGTTTGTGGATTGTGACACAGCGCGCCCGCTACTGCGGGACGTGAGGACGCCCAATTCTGCCAAGGAT +TATTTAGGGTGTTTCACTAGAGTTATGCGCCGACCCCGGTTGGACCAGCTTGCATTCGAAACTGCGTTAC +ACAGCACCCCACCGCAATCGTATGACTCTCGCTGAAAAAGGGTGGTCAACCATTACACCTCTTATGCCTG +TTGTGGGAGGCTCGGTCTTAAGCAGCGCGCGAGCTGTGATCCAGGCTACCACGGACATAGTGTATGGAAA +GTGATCCAGAGTAGACCCGCGGGGGCCTGACCTAACCTATATAAGTTGTATCGTGGCTATGAGGGTAGTC +GCCGGAGAAAACGTATGCTTACTGATTTTTAAGTCGGCGTGGCGCCGAAGCCGGATCGGTTGTAAGCTAG +CCGGGCCTAGGGGTTCACCGTAACGGATTAGTCAAATTAAAATCCAGCGATGACTTCCTGATAGAACTCA +AGTCGTGACCCCTCCGCTGCGGGCCTACATCTGTTTTCGCAGGCGTGGTTGTTTACCAGGTATGGTGCTC +ATCTCTATTAGTCACGGGCAGCATGGTGTCACCGAACCGCGCGTCTCCTAATATCTGGTCTACCGATTTA +GCCCCGGCAAATAACTTTGGATTGTGGTTGGAGAGTGCCAGAACTGACGGGCGCTGCCGTGGGGCTCCTA +ACTAAAAACGCCACGGACCTGGCTAACATTCGTTGTTGACTATAACATTTGAGGGCGCTTCGGATTCCCC +ATACTGCCAGAGTATTATGTGGGTGGTAAACATAGATTCTATATAGTCAACGACATACACTCATTATTAT +GCAATTGCGGCATCTCAACTATGTCTTAATTAGTTTTCCCGGATGGCGAAAACGATCTTACAGGAGAAGC +GCTACGCTGGTTTGGAAGACACTTAGTATCCTAGTAGTATGGGCTTGTGCGGGTCAACGGGCGCCGTCAA +AGCGCACACATATCTGGTGGGGACGGTGTCCCCTATCGGCGCACACGGGAGCCTAGGCAATCCCGACGTC +CCGCGTGCTGGATAAAGAAAAGGCCGACTGCGCGAAATGAAGAATCGTCAATTTATTGTTGGCAGCTTTA +CAGTTCTTCTCCGCGGGCGGGCAGAGTGGTTTTAAGACCGGGGTCTATGCACAAGGGTGGAGCTTGATTA +CTATCATCGAAGGGTGACTTGCCGTGTTACAATCGACAAGCGAACGGCCGACTGCTTCGGCCCGCTGAGC +GGACAACCTCCGATGTACCTACTCCATAGTAGACTTGGAAAACCCAGTCTTATGGCGCGGGGGAATCAAA +TGTGCCGATTCTTGACGAATAGTTCTAAGCCGAACTGCCAATCAGGGAAAAGTGTCGCGCACTAACTGGA +GCTGAAACCGCCAATAGTGTCTAAGTTACTCTTCCCATCTATCAAGGTAAGCCTTTTCCGTACAGTTATG +ACCATCTCACACTGGAAAGACGCACTGCATGCTCGGATGGAACTCGGAGATCACCTGGAAAGTCAGTGTC +ATGCGTGGCGGTTTAGTGTTCGACGTAAGAAAAACCTGGAAGACGGACGAGGTATGCAGACATTGCAGCA +GTGGTAGTAGGGGTTATACCCCTGGATTTAAAACTCACAACCTTCCTCATAGGCCTAAGGATCTTGCTAG +ACCTTAAGCAGGTACGTGGATCAGAATCGCTACTTCCTGTTAAGACTGTGGACCCTCCCACAAACTCCGA +TGCGAGCTAGGACGTCTTTAGCTCAGCTTGAGAATACTCCTATTTGCCTTGAAAGCTGAGCGGTTCAGAC +AGAGTAACTACATCTTATATGTAACCACACTCACATAGTTGTTGGGGGCAAACAGCTAAGGATTCCTGGT +CCCTGGCACGGATATAGATCACAATCTGGAATTCCCTCCTAAGTACCCGCCCGGTATTCC +>chr3 +TATGCACCAGAGTATGGAAGCATAAGCTCTGCATGCAAAGGTACATCAGATCCTGCGGTTGGGTGCCAAC +CCAAGTGTGTTCACGGGCGCTTGACAGACATCGGAGGATGGTGCACACTCACTCGACCAGCGCAAAGCAC +AGGATCTCACGGGCGGACATCTCTTAGGTCAGTCATCGTGGAGGAATGCTTGTACGTTCTTTTGGCTTCC +CCTAACACGGCGGGCGTCTCCGGTACGTATCCTGTCGGTACACCCCTTAAGCCCCTAGGCCCGAAGAACA +TAGCGCATTTCACGCTCTCTACGAATGACCGCAACGATCAAATGGGCGAGAACAACTAATTCCGATTCAT +GGGGTTTGTGGATTGTGACACAGCGCGCCCGCTACTGCGGGACGTGAGGACGCCCAATTCTGCCAAGGAT +TATTTAGGGTGTTTCACTAGAGTTATGCGCCGACCCCGGTTGGACCAGCTTGCATTCGAAACTGCGTTAC +ACAGCACCCCACCGCAATCGTATGACTCTCGCTGAAAAAGGGTGGTCAACCATTACACCTCTTATGCCTG +TTGTGGGAGGCTCGGTCTTAAGCAGCGCGCGAGCTGTGATCCAGGCTACCACGGACATAGTGTATGGAAA +GTGATCCAGAGTAGACCCGCGGGGGCCTGACCTAACCTATATAAGTTGTATCGTGGCTATGAGGGTAGTC +GCCGGAGAAAACGTATGCTTACTGATTTTTAAGTCGGCGTGGCGCCGAAGCCGGATCGGTTGTAAGCTAG +CCGGGCCTAGGGGTTCACCGTAACGGATTAGTCAAATTAAAATCCAGCGATGACTTCCTGATAGAACTCA +AGTCGTGACCCCTCCGCTGCGGGCCTACATCTGTTTTCGCAGGCGTGGTTGTTTACCAGGTATGGTGCTC +ATCTCTATTAGTCACGGGCAGCATGGTGTCACCGAACCGCGCGTCTCCTAATATCTGGTCTACCGATTTA +GCCCCGGCAAATAACTTTGGATTGTGGTTGGAGAGTGCCAGAACTGACGGGCGCTGCCGTGGGGCTCCTA +ACTAAAAACGCCACGGACCTGGCTAACATTCGTTGTTGACTATAACATTTGAGGGCGCTTCGGATTCCCC +ATACTGCCAGAGTATTATGTGGGTGGTAAACATAGATTCTATATAGTCAACGACATACACTCATTATTAT +GCAATTGCGGCATCTCAACTATGTCTTAATTAGTTTTCCCGGATGGCGAAAACGATCTTACAGGAGAAGC +GCTACGCTGGTTTGGAAGACACTTAGTATCCTAGTAGTATGGGCTTGTGCGGGTCAACGGGCGCCGTCAA +AGCGCACACATATCTGGTGGGGACGGTGTCCCCTATCGGCGCACACGGGAGCCTAGGCAATCCCGACGTC +CCGCGTGCTGGATAAAGAAAAGGCCGACTGCGCGAAATGAAGAATCGTCAATTTATTGTTGGCAGCTTTA +CAGTTCTTCTCCGCGGGCGGGCAGAGTGGTTTTAAGACCGGGGTCTATGCACAAGGGTGGAGCTTGATTA +CTATCATCGAAGGGTGACTTGCCGTGTTACAATCGACAAGCGAACGGCCGACTGCTTCGGCCCGCTGAGC +GGACAACCTCCGATGTACCTACTCCATAGTAGACTTGGAAAACCCAGTCTTATGGCGCGGGGGAATCAAA +TGTGCCGATTCTTGACGAATAGTTCTAAGCCGAACTGCCAATCAGGGAAAAGTGTCGCGCACTAACTGGA +GCTGAAACCGCCAATAGTGTCTAAGTTACTCTTCCCATCTATCAAGGTAAGCCTTTTCCGTACAGTTATG +ACCATCTCACACTGGAAAGACGCACTGCATGCTCGGATGGAACTCGGAGATCACCTGGAAAGTCAGTGTC +ATGCGTGGCGGTTTAGTGTTCGACGTAAGAAAAACCTGGAAGACGGACGAGGTATGCAGACATTGCAGCA +GTGGTAGTAGGGGTTATACCCCTGGATTTAAAACTCACAACCTTCCTCATAGGCCTAAGGATCTTGCTAG +ACCTTAAGCAGGTACGTGGATCAGAATCGCTACTTCCTGTTAAGACTGTGGACCCTCCCACAAACTCCGA +TGCGAGCTAGGACGTCTTTAGCTCAGCTTGAGAATACTCCTATTTGCCTTGAAAGCTGAGCGGTTCAGAC +AGAGTAACTACATCTTATATGTAACCACACTCACATAGTTGTTGGGGGCAAACAGCTAAGGATTCCTGGT +CCCTGGCACGGATATAGATCACAATCTGGAATTCCCTCCTAAGTACCCGCCCGGTATTCCCACACTCTGT +GAGACTACGTGCGCGTGTAGTATCGTGAGGTCCGCGGTGGAAAAGGGTTTGGCACTTACTACTCAGTGAC +CGTATACACGGAGATTCGCACTGATGTGGAATATGAAATCCCACATCCCCTGAGAATTTCGAATCTGAGG +ATGAGTATATGCCTCGATGTAGGCCAGGAGCATTGATCCCGGCGTCGTCCGGTCTAAGCACCATAGTTAT +GGGTCGGTTATAAACGAATTTTGACGCGACGGGGAATGATACAGGATCCACAGTGAAACTAGTCTGGGCA +CCGATGCATTGCCAATGGTGCCTATTAGTGTTCCTGAAGTTGACTACAGTCCGTACCTCAGTATAGCGCT +GGTTACTAGTAGCGAAGTTGAGATTGTAGCTCGTACTCCAATGACCACCCGAGGGGGTGGTGCAATGTGC +AGGTAGGGGTAGGTTCCTGTAGTTCGGAGGTCAACCTCTTGTTGACGTCTGATGCGAGCCTGACTAAAAT +GCGCTTCTTCACTTTTGTTCGTATAGTCACTATATTCGCGAAACCGTCGCTTTTATTATAGACGGCCTAC +TTCTTTGACCGAGCCTCATAGTCTGCACTCGGGACGAAACTAACGGCTGTTCCACTCATGACCTACGCGC +CTGAGTGATCAAATAATCAAAAGAATGCGCCGCTATATGTAGGGGGCCCATGTATTGGCTGACTTTGAAA +ACACTCTGACACGAACTTGA diff --git a/test/data/dream/ref_meta.txt b/test/data/dream/ref_meta.txt new file mode 100644 index 00000000..5dbfaa67 --- /dev/null +++ b/test/data/dream/ref_meta.txt @@ -0,0 +1,3 @@ +chr1 0 1023 +chr2 1 2300 +chr3 2 3030 diff --git a/test/data/dream/seg_meta150overlap16bins.txt b/test/data/dream/seg_meta150overlap16bins.txt new file mode 100644 index 00000000..9776fd3d --- /dev/null +++ b/test/data/dream/seg_meta150overlap16bins.txt @@ -0,0 +1,16 @@ +0 0 0 492 +1 0 342 492 +2 0 684 339 +3 1 0 534 +4 1 384 534 +5 1 768 534 +6 1 1152 534 +7 1 1536 534 +8 1 1920 380 +9 2 0 583 +10 2 433 583 +11 2 866 583 +12 2 1299 583 +13 2 1732 583 +14 2 2165 583 +15 2 2598 432 diff --git a/test/data/dream/seg_meta150overlap4bins.txt b/test/data/dream/seg_meta150overlap4bins.txt new file mode 100644 index 00000000..df37c68b --- /dev/null +++ b/test/data/dream/seg_meta150overlap4bins.txt @@ -0,0 +1,4 @@ +0 0 0 1023 +1 1 0 2300 +2 2 0 1666 +3 2 1516 1514 diff --git a/test/data/simulate_input.sh b/test/data/simulate_input.sh index 5caeb858..dab9acc1 100755 --- a/test/data/simulate_input.sh +++ b/test/data/simulate_input.sh @@ -39,4 +39,6 @@ done ./search/cli_test_input.sh $SEED $BIN_NUMBER $HAPLOTYPE_COUNT +./dream/cli_test_input.sh $SEED + ./consolidate/cli_test_input.sh diff --git a/test/data/split/api_test_input.sh b/test/data/split/api_test_input.sh index dbd365a5..e9624d0c 100755 --- a/test/data/split/api_test_input.sh +++ b/test/data/split/api_test_input.sh @@ -26,6 +26,6 @@ do cat chr*.fasta > ${out_dir}/ref.fasta rm chr*.fasta - valik split ${out_dir}/ref.fasta --overlap ${overlap} --bins ${bins} --reference-output ${out_dir}/reference_metadata.txt --segment-output ${out_dir}/reference_segments.txt + valik split ${out_dir}/ref.fasta --overlap ${overlap} --bins ${bins} --ref-meta ${out_dir}/reference_metadata.txt --seg-meta ${out_dir}/reference_segments.txt done done diff --git a/test/data/split/cli_test_input.sh b/test/data/split/cli_test_input.sh index 9870ea5c..ffb63c9d 100755 --- a/test/data/split/cli_test_input.sh +++ b/test/data/split/cli_test_input.sh @@ -43,6 +43,7 @@ generate_local_matches \ --min-match-length $read_length \ --max-match-length $read_length \ --verbose-ids \ + --ref-len $ref_len \ --seed $SEED \ $ref_out diff --git a/test/data/update_datasources.sh b/test/data/update_datasources.sh index 34330d81..5b3bc1f8 100755 --- a/test/data/update_datasources.sh +++ b/test/data/update_datasources.sh @@ -118,3 +118,20 @@ do echo -n $sha >> ../datasources.cmake echo ")" >> ../datasources.cmake done + +echo -e "\n" >> ../datasources.cmake + +cd ../dream + +for file in * +do + [[ -d $file ]] && continue # skip folders + [[ $file == *.sh ]] && continue + echo -n "declare_datasource (FILE ${file} + URL \${CMAKE_SOURCE_DIR}/test/data/dream/${file} + URL_HASH SHA256=" >> ../datasources.cmake + + sha=($(shasum -a 256 $file)) + echo -n $sha >> ../datasources.cmake + echo ")" >> ../datasources.cmake +done \ No newline at end of file