From 5a9c224bdd84514fd8c6901dae23179f42237822 Mon Sep 17 00:00:00 2001 From: Stefan Hermann Date: Sun, 7 Apr 2024 15:00:11 +0200 Subject: [PATCH 1/5] align bucket count per partition to GPU impl --- .../builders/internal_memory_builder_partitioned_phf.hpp | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/include/builders/internal_memory_builder_partitioned_phf.hpp b/include/builders/internal_memory_builder_partitioned_phf.hpp index 670408d..30fdb5c 100644 --- a/include/builders/internal_memory_builder_partitioned_phf.hpp +++ b/include/builders/internal_memory_builder_partitioned_phf.hpp @@ -35,6 +35,7 @@ struct internal_memory_builder_partitioned_phf { m_bucketer.init(num_partitions); m_offsets.resize(num_partitions + 1); m_builders.resize(num_partitions); + m_num_buckets_per_partition = compute_num_buckets(config.avg_partition_size, config.lambda); std::vector> partitions(num_partitions); for (auto& partition : partitions) partition.reserve(1.1 * config.avg_partition_size); @@ -72,12 +73,7 @@ struct internal_memory_builder_partitioned_phf { auto partition_config = config; partition_config.seed = m_seed; - - const uint64_t num_buckets_single_phf = compute_num_buckets(num_keys, config.lambda); - const uint64_t num_buckets_per_partition = - std::ceil(static_cast(num_buckets_single_phf) / num_partitions); - m_num_buckets_per_partition = num_buckets_per_partition; - partition_config.num_buckets = num_buckets_per_partition; + partition_config.num_buckets = m_num_buckets_per_partition; if (config.verbose_output) { std::cout << "num_buckets_per_partition = " << partition_config.num_buckets << std::endl; From 0a3b0947356d2e2d8742f8451d937117c2147c35 Mon Sep 17 00:00:00 2001 From: Stefan Hermann Date: Sun, 7 Apr 2024 15:39:45 +0200 Subject: [PATCH 2/5] anything that is independent of the input keys should not count towards the total space consumption --- include/dense_partitioned_phf.hpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/include/dense_partitioned_phf.hpp b/include/dense_partitioned_phf.hpp index 28864c5..b193bc5 100644 --- a/include/dense_partitioned_phf.hpp +++ b/include/dense_partitioned_phf.hpp @@ -93,8 +93,7 @@ struct dense_partitioned_phf { } size_t num_bits_for_mapper() const { - return m_partitioner.num_bits() + m_bucketer.num_bits() + m_offsets.num_bits() + - m_free_slots.num_bits(); + return m_offsets.num_bits() + (needsFreeArray ? m_free_slots.num_bits() : 0); } size_t num_bits() const { From cd4b10c22cd038815341b8a4abd4bcf8caddfdd2 Mon Sep 17 00:00:00 2001 From: Stefan Hermann Date: Mon, 8 Apr 2024 08:58:20 +0200 Subject: [PATCH 3/5] corrected space --- include/dense_partitioned_phf.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/dense_partitioned_phf.hpp b/include/dense_partitioned_phf.hpp index b193bc5..c80a8c8 100644 --- a/include/dense_partitioned_phf.hpp +++ b/include/dense_partitioned_phf.hpp @@ -93,7 +93,7 @@ struct dense_partitioned_phf { } size_t num_bits_for_mapper() const { - return m_offsets.num_bits() + (needsFreeArray ? m_free_slots.num_bits() : 0); + return m_partitioner.num_bits() + m_bucketer.num_bits() + m_offsets.num_bits() + (needsFreeArray ? m_free_slots.num_bits() : 0); } size_t num_bits() const { From 58d2007793ae9b07453f30d0582d69dfa07d7e07 Mon Sep 17 00:00:00 2001 From: Stefan Hermann Date: Mon, 8 Apr 2024 09:01:21 +0200 Subject: [PATCH 4/5] corrected visitor --- include/dense_partitioned_phf.hpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/dense_partitioned_phf.hpp b/include/dense_partitioned_phf.hpp index c80a8c8..c9b0b3a 100644 --- a/include/dense_partitioned_phf.hpp +++ b/include/dense_partitioned_phf.hpp @@ -117,7 +117,8 @@ struct dense_partitioned_phf { visitor.visit(m_bucketer); visitor.visit(m_pilots); visitor.visit(m_offsets); - visitor.visit(m_free_slots); + if(needsFreeArray) + visitor.visit(m_free_slots); } private: From d4c220f7171f30f1f5ca99c9a3d3a7710d5b6dbe Mon Sep 17 00:00:00 2001 From: Stefan Hermann Date: Mon, 8 Apr 2024 13:37:28 +0200 Subject: [PATCH 5/5] multithreaded encoding --- include/dense_partitioned_phf.hpp | 4 +- include/encoders/dense_encoders.hpp | 57 ++++++++++++++++++++++++----- 2 files changed, 50 insertions(+), 11 deletions(-) diff --git a/include/dense_partitioned_phf.hpp b/include/dense_partitioned_phf.hpp index c9b0b3a..d07fd41 100644 --- a/include/dense_partitioned_phf.hpp +++ b/include/dense_partitioned_phf.hpp @@ -21,7 +21,7 @@ struct dense_partitioned_phf { } template - double build(Builder& builder, build_configuration const& /* config */) // + double build(Builder& builder, build_configuration const& config) // { auto start = clock_type::now(); @@ -40,7 +40,7 @@ struct dense_partitioned_phf { const uint64_t increment = m_table_size / num_partitions; m_offsets.encode(offsets.begin(), offsets.size(), increment); m_pilots.encode(builder.interleaving_pilots_iterator_begin(), num_partitions, - num_buckets_per_partition); + num_buckets_per_partition, config.num_threads); if constexpr (needsFreeArray) { assert(builder.free_slots().size() == m_table_size - m_num_keys); m_free_slots.encode(builder.free_slots().data(), m_table_size - m_num_keys); diff --git a/include/encoders/dense_encoders.hpp b/include/encoders/dense_encoders.hpp index 42756fb..3fbf510 100644 --- a/include/encoders/dense_encoders.hpp +++ b/include/encoders/dense_encoders.hpp @@ -2,6 +2,7 @@ #include #include +#include namespace pthash { @@ -53,7 +54,7 @@ struct mono_interleaved { template void encode(Iterator begin, // const uint64_t num_partitions, // - const uint64_t num_buckets_per_partition) // + const uint64_t num_buckets_per_partition, const uint64_t /*num_threads*/) // { m_num_partitions = num_partitions; m_encoder.encode(begin, num_partitions * num_buckets_per_partition); @@ -92,11 +93,36 @@ struct multi_interleaved { template void encode(Iterator begin, // const uint64_t num_partitions, // - const uint64_t num_buckets_per_partition) // + const uint64_t num_buckets_per_partition, const uint64_t num_threads) // { m_encoders.resize(num_buckets_per_partition); - for (uint64_t i = 0; i != num_buckets_per_partition; ++i) { - m_encoders[i].encode(begin + i * num_partitions, num_partitions); + if(num_threads==1) { + for (uint64_t i = 0; i != num_buckets_per_partition; ++i) { + m_encoders[i].encode(begin + i * num_partitions, num_partitions); + } + } else { + auto exe = [&](uint64_t beginEncoder, uint64_t endEncoder) { + for (; beginEncoder != endEncoder; ++beginEncoder) { + m_encoders[beginEncoder].encode(begin + beginEncoder * num_partitions, num_partitions); + } + }; + + std::vector threads(num_threads); + uint64_t currentEncoder = 0; + uint64_t i = 0; + const uint64_t enc_per_thread = + (num_buckets_per_partition + num_threads - 1) / num_threads; + while (currentEncoder < num_buckets_per_partition) { + uint64_t endEncoder = currentEncoder + enc_per_thread; + if (endEncoder > num_buckets_per_partition) endEncoder = num_buckets_per_partition; + threads[i] = std::thread(exe, currentEncoder, endEncoder); + currentEncoder = endEncoder; + i++; + } + for (auto& t : threads) { + if (t.joinable()) t.join(); + } + } } @@ -129,16 +155,29 @@ struct dual_interleaved { template void encode(Iterator begin, // const uint64_t num_partitions, // - const uint64_t num_buckets_per_partition) // + const uint64_t num_buckets_per_partition, const uint64_t num_threads) // { m_front_size = num_buckets_per_partition * (static_cast(numerator) / denominator); - m_front.encode(begin, num_partitions, m_front_size); - m_back.encode(begin + m_front_size * num_partitions, num_partitions, - num_buckets_per_partition - m_front_size); + if(num_threads == 1) { + if(m_front_size > 0) m_front.encode(begin, num_partitions, m_front_size, 1); + if(num_buckets_per_partition - m_front_size > 0) m_back.encode(begin + m_front_size * num_partitions, num_partitions, + num_buckets_per_partition - m_front_size, 1); + } else { + uint64_t m_front_threads = + (num_threads * m_front_size + num_buckets_per_partition - 1) / + num_buckets_per_partition; + auto exe = [&]() { + if(m_front_size > 0) m_front.encode(begin, num_partitions, m_front_size, m_front_threads); + }; + std::thread frontThread = std::thread(exe); + if(num_buckets_per_partition - m_front_size > 0) m_back.encode(begin + m_front_size * num_partitions, num_partitions, + num_buckets_per_partition - m_front_size, num_threads - m_front_threads); + if (frontThread.joinable()) frontThread.join(); + } } static std::string name() { - return Front::name() + "-" + Back::name(); + return Front::name() + "-" + Back::name() + "-" + std::to_string(static_cast(numerator)/denominator); } size_t num_bits() const {