diff --git a/external/pthash b/external/pthash index 28aedcb..09b9bec 160000 --- a/external/pthash +++ b/external/pthash @@ -1 +1 @@ -Subproject commit 28aedcb03af096c0e9988b1ad240da7b4cf010d7 +Subproject commit 09b9bec3a9027b47c131061ec6a485454a698bcc diff --git a/include/minimizers.hpp b/include/minimizers.hpp index f58cb90..937c4db 100644 --- a/include/minimizers.hpp +++ b/include/minimizers.hpp @@ -13,7 +13,7 @@ struct minimizers { mphf_config.seed = util::get_seed_for_hash_function(build_config); mphf_config.minimal_output = true; mphf_config.verbose_output = false; - mphf_config.num_threads = std::thread::hardware_concurrency(); + mphf_config.num_threads = build_config.num_threads; mphf_config.num_partitions = 4 * mphf_config.num_threads; if (size / mphf_config.num_partitions < pthash::constants::min_partition_size) { @@ -29,6 +29,10 @@ struct minimizers { mphf_config.ram = 4 * essentials::GB; mphf_config.tmp_dir = build_config.tmp_dirname; + if (size <= 1) { + throw std::runtime_error( + "each partition must contain more than one key: use less partitions"); + } m_mphf.build_in_external_memory(begin, size, mphf_config); } @@ -49,4 +53,4 @@ struct minimizers { minimizers_pthash_type m_mphf; }; -} // namespace sshash \ No newline at end of file +} // namespace sshash diff --git a/include/util.hpp b/include/util.hpp index 0667fda..3cc1218 100644 --- a/include/util.hpp +++ b/include/util.hpp @@ -3,6 +3,7 @@ #include #include #include +#include #include // for std::ceil on linux #include "hash_util.hpp" @@ -74,6 +75,7 @@ struct build_configuration { : k(31) , m(17) , seed(constants::seed) + , num_threads(std::thread::hardware_concurrency()) , l(constants::min_l) , c(constants::c) @@ -87,6 +89,7 @@ struct build_configuration { uint64_t k; // kmer size uint64_t m; // minimizer size uint64_t seed; + uint64_t num_threads; uint64_t l; // drive dictionary trade-off double c; // drive PTHash trade-off @@ -302,4 +305,4 @@ struct buffered_lines_iterator { uint64_t m_read_chars; }; -} // namespace sshash \ No newline at end of file +} // namespace sshash