diff --git a/include/encoders/dense_encoders.hpp b/include/encoders/dense_encoders.hpp index 3fbf510..8825e84 100644 --- a/include/encoders/dense_encoders.hpp +++ b/include/encoders/dense_encoders.hpp @@ -50,7 +50,7 @@ struct diff { }; template -struct mono_interleaved { +struct dense_mono { template void encode(Iterator begin, // const uint64_t num_partitions, // @@ -89,7 +89,7 @@ struct mono_interleaved { }; template -struct multi_interleaved { +struct dense_interleaved { template void encode(Iterator begin, // const uint64_t num_partitions, // @@ -127,7 +127,7 @@ struct multi_interleaved { } static std::string name() { - return "multi-" + Encoder::name(); + return "inter-" + Encoder::name(); } inline uint64_t access(const uint64_t partition, const uint64_t bucket) const { @@ -151,7 +151,7 @@ struct multi_interleaved { }; template -struct dual_interleaved { +struct dense_dual { template void encode(Iterator begin, // const uint64_t num_partitions, // @@ -207,19 +207,19 @@ struct dual_interleaved { Back m_back; }; -typedef mono_interleaved mono_R; -typedef multi_interleaved multi_R; -typedef mono_interleaved mono_C; -typedef multi_interleaved multi_C; -typedef mono_interleaved mono_D; -typedef multi_interleaved multi_D; -typedef mono_interleaved mono_EF; -typedef multi_interleaved multi_EF; +typedef dense_mono mono_R; +typedef dense_interleaved inter_R; +typedef dense_mono mono_C; +typedef dense_interleaved inter_C; +typedef dense_mono mono_D; +typedef dense_interleaved inter_D; +typedef dense_mono mono_EF; +typedef dense_interleaved inter_EF; /* dual_interleaved encoders */ -typedef dual_interleaved mono_C_mono_R; -typedef dual_interleaved multi_C_multi_R; -typedef dual_interleaved mono_D_mono_R; -typedef dual_interleaved multi_D_multi_R; +typedef dense_dual mono_C_mono_R; +typedef dense_dual inter_C_inter_R; +typedef dense_dual mono_D_mono_R; +typedef dense_dual inter_D_inter_R; } // namespace pthash \ No newline at end of file diff --git a/src/build.cpp b/src/build.cpp index fc0e0ca..8408c45 100644 --- a/src/build.cpp +++ b/src/build.cpp @@ -152,7 +152,7 @@ void choose_dual_encoder_tradeoff(build_parameters const& params, if (tradeoff == uint64_t(std::round(params.dual_encoder_tradeoff * granularity))) { choose_needs_free_array< Builder, Iterator, search_type, - dual_interleaved>(builder, timings, + dense_dual>(builder, timings, params, config); } if constexpr (tradeoff > 0) { @@ -232,48 +232,48 @@ void choose_encoder(build_parameters const& params, build_configuratio choose_needs_free_array(builder, timings, params, config); } - if (encode_all or params.encoder_type == "multi-R") { - choose_needs_free_array(builder, timings, + if (encode_all or params.encoder_type == "inter-R") { + choose_needs_free_array(builder, timings, params, config); } if (encode_all or params.encoder_type == "mono-C") { choose_needs_free_array(builder, timings, params, config); } - if (encode_all or params.encoder_type == "multi-C") { - choose_needs_free_array(builder, timings, + if (encode_all or params.encoder_type == "inter-C") { + choose_needs_free_array(builder, timings, params, config); } if (encode_all or params.encoder_type == "mono-D") { choose_needs_free_array(builder, timings, params, config); } - if (encode_all or params.encoder_type == "multi-D") { - choose_needs_free_array(builder, timings, + if (encode_all or params.encoder_type == "inter-D") { + choose_needs_free_array(builder, timings, params, config); } if (encode_all or params.encoder_type == "mono-EF") { choose_needs_free_array(builder, timings, params, config); } - if (encode_all or params.encoder_type == "multi-EF") { - choose_needs_free_array(builder, timings, + if (encode_all or params.encoder_type == "inter-EF") { + choose_needs_free_array(builder, timings, params, config); } if (encode_all or params.encoder_type == "mono-C-mono-R") { choose_dual_encoder_tradeoff(params, config, builder, timings); } - if (encode_all or params.encoder_type == "multi-C-multi-R") { - choose_dual_encoder_tradeoff(params, config, + if (encode_all or params.encoder_type == "inter-C-inter-R") { + choose_dual_encoder_tradeoff(params, config, builder, timings); } if (encode_all or params.encoder_type == "mono-D-mono-R") { choose_dual_encoder_tradeoff(params, config, builder, timings); } - if (encode_all or params.encoder_type == "multi-D-multi-R") { - choose_dual_encoder_tradeoff(params, config, + if (encode_all or params.encoder_type == "inter-D-inter-R") { + choose_dual_encoder_tradeoff(params, config, builder, timings); } @@ -356,8 +356,8 @@ void build(cmd_line_parser::parser const& parser, Iterator keys, uint64_t num_ke /* only for dense partitioning */ "mono-R", "mono-C", "mono-D", "mono-EF", // mono - "multi-R", "multi-C", "multi-D", "multi-EF", // multi - "mono-C-mono-R", "multi-C-multi-R", "mono-D-mono-R", "multi-D-multi-R", // dual + "inter-R", "inter-C", "inter-D", "inter-EF", // inter + "mono-C-mono-R", "inter-C-inter-R", "mono-D-mono-R", "inter-D-inter-R", // dual /**/ "all" // @@ -436,8 +436,8 @@ int main(int argc, char** argv) { "The encoder type. Possibile values are: " "'R-R', 'PC', 'D-D', 'EF', " "'mono-R', 'mono-C', 'mono-D', 'mono-EF', " - "'multi-R', 'multi-C', 'multi-D', 'multi-EF', " - "'mono-C-mono-R', 'multi-C-multi-R', 'mono-D-mono-R', 'multi-D-multi-R', " + "'inter-R', 'inter-C', 'inter-D', 'inter-EF', " + "'mono-C-mono-R', 'inter-C-inter-R', 'mono-D-mono-R', 'inter-D-inter-R', " "'all'.\n\t" "The 'all' type will just benchmark all encoders. (Useful for benchmarking " "purposes.)", @@ -481,11 +481,8 @@ int main(int argc, char** argv) { input.close(); } build(parser, keys.begin(), keys.size()); - } else { // use num_keys random 64-bit keys - std::vector keys; - keys.reserve(num_keys); - for (size_t i = 0; i < num_keys; ++i) { keys.push_back(std::to_string(i)); } - build(parser, keys.begin(), keys.size()); + } else { // use num_keys random strings + build(parser, generateBenchmarkInput(num_keys).begin(), num_keys); } return 0; diff --git a/src/util.hpp b/src/util.hpp index 461dba8..12e2dea 100644 --- a/src/util.hpp +++ b/src/util.hpp @@ -11,6 +11,10 @@ #include "utils/util.hpp" #include "essentials.hpp" +#include +#include +#include +#include namespace pthash { @@ -162,6 +166,69 @@ std::vector distinct_keys(uint64_t num_keys, uint64_t seed = constants::in return keys; } + +class XorShift64 { +private: + uint64_t x64; +public: + explicit XorShift64(uint64_t seed = 88172645463325252ull) : x64(seed) { + } + + inline uint64_t operator()() { + x64 ^= x64 << 13; + x64 ^= x64 >> 7; + x64 ^= x64 << 17; + return x64; + } + + inline uint64_t operator()(uint64_t range) { +#ifdef __SIZEOF_INT128__ // then we know we have a 128-bit int + return (uint64_t)(((__uint128_t)operator()() * (__uint128_t)range) >> 64); +#elif defined(_MSC_VER) && defined(_WIN64) + // supported in Visual Studio 2005 and better + uint64_t highProduct; + _umul128(operator()(), range, &highProduct); // ignore output + return highProduct; + unsigned __int64 _umul128( + unsigned __int64 Multiplier, + unsigned __int64 Multiplicand, + unsigned __int64 *HighProduct + ); +#else + return word / (UINT64_MAX / p); // fallback +#endif // __SIZEOF_INT128__ + } +}; + +std::vector generateBenchmarkInput(size_t n) { + std::vector inputData; + inputData.reserve(n); + auto time = std::chrono::system_clock::now(); + long constructionTime = std::chrono::duration_cast(time.time_since_epoch()).count(); + XorShift64 prng(constructionTime); + std::cout<<"Generating input"< bool check(Iterator keys, Function const& f) { __uint128_t n = f.num_keys();