From 8832c532b8aa11aad6caa91be011dd1a419bc552 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Mon, 25 Nov 2024 11:59:48 -0500 Subject: [PATCH] lint --- benchmarks/apple_arm_events.h | 28 ++++----- benchmarks/benchmark.cpp | 98 ++++++++++++++---------------- benchmarks/event_counter.h | 107 ++++++++++++++++++--------------- benchmarks/linux-perf-events.h | 19 +++--- 4 files changed, 125 insertions(+), 127 deletions(-) diff --git a/benchmarks/apple_arm_events.h b/benchmarks/apple_arm_events.h index 3a94081..cd63176 100644 --- a/benchmarks/apple_arm_events.h +++ b/benchmarks/apple_arm_events.h @@ -101,8 +101,6 @@ inline performance_counters operator-(const performance_counters &a, a.instructions - b.instructions); } - - typedef float f32; typedef double f64; typedef int8_t i8; @@ -616,9 +614,7 @@ typedef struct { #define lib_nelems(x) (sizeof(x) / sizeof((x)[0])) #define lib_symbol_def(name) \ - { \ -#name, (void **)&name \ - } + { #name, (void **)&name } static const lib_symbol lib_symbols_kperf[] = { lib_symbol_def(kpc_pmu_version), @@ -933,7 +929,7 @@ typedef struct { static const event_alias profile_events[] = { {"cycles", { - "FIXED_CYCLES", // Apple A7-A15//CORE_ACTIVE_CYCLE + "FIXED_CYCLES", // Apple A7-A15//CORE_ACTIVE_CYCLE "CPU_CLK_UNHALTED.THREAD", // Intel Core 1th-10th "CPU_CLK_UNHALTED.CORE", // Intel Yonah, Merom }}, @@ -976,7 +972,6 @@ u64 counters_0[KPC_MAX_COUNTERS] = {0}; u64 counters_1[KPC_MAX_COUNTERS] = {0}; const usize ev_count = sizeof(profile_events) / sizeof(profile_events[0]); - bool setup_performance_counters() { static bool init = false; static bool worked = false; @@ -995,7 +990,7 @@ bool setup_performance_counters() { // check permission int force_ctrs = 0; if (kpc_force_all_ctrs_get(&force_ctrs)) { - //printf("Permission denied, xnu/kpc requires root privileges.\n"); + // printf("Permission denied, xnu/kpc requires root privileges.\n"); return (worked = false); } int ret; @@ -1101,17 +1096,16 @@ inline performance_counters get_counters() { } return 1; } - /*printf("counters value:\n"); - for (usize i = 0; i < ev_count; i++) { - const event_alias *alias = profile_events + i; - usize idx = counter_map[i]; - u64 val = counters_1[idx] - counters_0[idx]; - printf("%14s: %llu\n", alias->alias, val); - }*/ + /*printf("counters value:\n"); + for (usize i = 0; i < ev_count; i++) { + const event_alias *alias = profile_events + i; + usize idx = counter_map[i]; + u64 val = counters_1[idx] - counters_0[idx]; + printf("%14s: %llu\n", alias->alias, val); + }*/ return performance_counters{ counters_0[counter_map[0]], counters_0[counter_map[2]], - counters_0[counter_map[3]], - counters_0[counter_map[1]]}; + counters_0[counter_map[3]], counters_0[counter_map[1]]}; } #endif diff --git a/benchmarks/benchmark.cpp b/benchmarks/benchmark.cpp index c6b091f..993411e 100644 --- a/benchmarks/benchmark.cpp +++ b/benchmarks/benchmark.cpp @@ -1,4 +1,4 @@ -#if defined(__linux__) || (__APPLE__ && __aarch64__) +#if defined(__linux__) || (__APPLE__ && __aarch64__) #define USING_COUNTERS #include "event_counter.h" #endif @@ -22,7 +22,6 @@ #include #include - template double findmax_fastfloat64(std::vector> &s) { double answer = 0; @@ -55,8 +54,9 @@ event_collector collector{}; #ifdef USING_COUNTERS template -std::vector time_it_ns(std::vector> &lines, - T const &function, size_t repeat) { +std::vector +time_it_ns(std::vector> &lines, T const &function, + size_t repeat) { std::vector aggregate; bool printed_bug = false; for (size_t i = 0; i < repeat; i++) { @@ -71,7 +71,8 @@ std::vector time_it_ns(std::vector> &lines return aggregate; } -void pretty_print(double volume, size_t number_of_floats, std::string name, std::vector events) { +void pretty_print(double volume, size_t number_of_floats, std::string name, + std::vector events) { double volumeMB = volume / (1024. * 1024.); double average_ns{0}; double min_ns{DBL_MAX}; @@ -83,7 +84,7 @@ void pretty_print(double volume, size_t number_of_floats, std::string name, std: double branches_avg{0}; double branch_misses_min{0}; double branch_misses_avg{0}; - for(event_count e : events) { + for (event_count e : events) { double ns = e.elapsed_ns(); average_ns += ns; min_ns = min_ns < ns ? min_ns : ns; @@ -94,7 +95,8 @@ void pretty_print(double volume, size_t number_of_floats, std::string name, std: double instructions = e.instructions(); instructions_avg += instructions; - instructions_min = instructions_min < instructions ? instructions_min : instructions; + instructions_min = + instructions_min < instructions ? instructions_min : instructions; double branches = e.branches(); branches_avg += branches; @@ -102,43 +104,37 @@ void pretty_print(double volume, size_t number_of_floats, std::string name, std: double branch_misses = e.missed_branches(); branch_misses_avg += branch_misses; - branch_misses_min = branch_misses_min < branch_misses ? branch_misses_min : branch_misses; + branch_misses_min = + branch_misses_min < branch_misses ? branch_misses_min : branch_misses; } cycles_avg /= events.size(); instructions_avg /= events.size(); average_ns /= events.size(); branches_avg /= events.size(); printf("%-40s: %8.2f MB/s (+/- %.1f %%) ", name.data(), - volumeMB * 1000000000 / min_ns, - (average_ns - min_ns) * 100.0 / average_ns); - printf("%8.2f Mfloat/s ", - number_of_floats * 1000 / min_ns); - if(instructions_min > 0) { - printf(" %8.2f i/B %8.2f i/f (+/- %.1f %%) ", - instructions_min / volume, - instructions_min / number_of_floats, + volumeMB * 1000000000 / min_ns, + (average_ns - min_ns) * 100.0 / average_ns); + printf("%8.2f Mfloat/s ", number_of_floats * 1000 / min_ns); + if (instructions_min > 0) { + printf(" %8.2f i/B %8.2f i/f (+/- %.1f %%) ", instructions_min / volume, + instructions_min / number_of_floats, (instructions_avg - instructions_min) * 100.0 / instructions_avg); - printf(" %8.2f c/B %8.2f c/f (+/- %.1f %%) ", - cycles_min / volume, - cycles_min / number_of_floats, + printf(" %8.2f c/B %8.2f c/f (+/- %.1f %%) ", cycles_min / volume, + cycles_min / number_of_floats, (cycles_avg - cycles_min) * 100.0 / cycles_avg); - printf(" %8.2f i/c ", - instructions_min /cycles_min); - printf(" %8.2f b/f ", - branches_avg /number_of_floats); - printf(" %8.2f bm/f ", - branch_misses_avg /number_of_floats); - printf(" %8.2f GHz ", - cycles_min / min_ns); + printf(" %8.2f i/c ", instructions_min / cycles_min); + printf(" %8.2f b/f ", branches_avg / number_of_floats); + printf(" %8.2f bm/f ", branch_misses_avg / number_of_floats); + printf(" %8.2f GHz ", cycles_min / min_ns); } printf("\n"); - } #else template -std::pair time_it_ns(std::vector> &lines, - T const &function, size_t repeat) { +std::pair +time_it_ns(std::vector> &lines, T const &function, + size_t repeat) { std::chrono::high_resolution_clock::time_point t1, t2; double average = 0; double min_value = DBL_MAX; @@ -160,21 +156,16 @@ std::pair time_it_ns(std::vector> &line return std::make_pair(min_value, average); } - - - -void pretty_print(double volume, size_t number_of_floats, std::string name, std::pair result) { +void pretty_print(double volume, size_t number_of_floats, std::string name, + std::pair result) { double volumeMB = volume / (1024. * 1024.); printf("%-40s: %8.2f MB/s (+/- %.1f %%) ", name.data(), - volumeMB * 1000000000 / result.first, - (result.second - result.first) * 100.0 / result.second); - printf("%8.2f Mfloat/s ", - number_of_floats * 1000 / result.first); - printf(" %8.2f ns/f \n", - double(result.first) /number_of_floats ); + volumeMB * 1000000000 / result.first, + (result.second - result.first) * 100.0 / result.second); + printf("%8.2f Mfloat/s ", number_of_floats * 1000 / result.first); + printf(" %8.2f ns/f \n", double(result.first) / number_of_floats); } -#endif - +#endif // this is okay, all chars are ASCII inline std::u16string widen(std::string line) { @@ -195,21 +186,23 @@ std::vector widen(const std::vector &lines) { return u16lines; } - void process(std::vector &lines, size_t volume) { size_t repeat = 100; double volumeMB = volume / (1024. * 1024.); std::cout << "ASCII volume = " << volumeMB << " MB " << std::endl; - pretty_print(volume, lines.size(), "fastfloat (64)", time_it_ns(lines, findmax_fastfloat64, repeat)); - pretty_print(volume, lines.size(), "fastfloat (32)", time_it_ns(lines, findmax_fastfloat32, repeat)); + pretty_print(volume, lines.size(), "fastfloat (64)", + time_it_ns(lines, findmax_fastfloat64, repeat)); + pretty_print(volume, lines.size(), "fastfloat (32)", + time_it_ns(lines, findmax_fastfloat32, repeat)); std::vector lines16 = widen(lines); volume = 2 * volume; volumeMB = volume / (1024. * 1024.); std::cout << "UTF-16 volume = " << volumeMB << " MB " << std::endl; - pretty_print(volume, lines.size(), "fastfloat (64)", time_it_ns(lines16, findmax_fastfloat64, repeat)); - pretty_print(volume, lines.size(), "fastfloat (32)", time_it_ns(lines16, findmax_fastfloat32, repeat)); - + pretty_print(volume, lines.size(), "fastfloat (64)", + time_it_ns(lines16, findmax_fastfloat64, repeat)); + pretty_print(volume, lines.size(), "fastfloat (32)", + time_it_ns(lines16, findmax_fastfloat32, repeat)); } void fileload(std::string filename) { @@ -233,13 +226,14 @@ void fileload(std::string filename) { process(lines, volume); } - int main(int argc, char **argv) { - if(collector.has_events()) { + if (collector.has_events()) { std::cout << "# Using hardware counters" << std::endl; } else { -#if defined(__linux__) || (__APPLE__ && __aarch64__) - std::cout << "# Hardware counters not available, try to run in privileged mode (e.g., sudo)." << std::endl; +#if defined(__linux__) || (__APPLE__ && __aarch64__) + std::cout << "# Hardware counters not available, try to run in privileged " + "mode (e.g., sudo)." + << std::endl; #endif } fileload(std::string(BENCHMARK_DATA_DIR) + "/canada.txt"); diff --git a/benchmarks/event_counter.h b/benchmarks/event_counter.h index fb6db3a..3b7bb69 100644 --- a/benchmarks/event_counter.h +++ b/benchmarks/event_counter.h @@ -17,16 +17,19 @@ #include #endif -#if __APPLE__ && __aarch64__ +#if __APPLE__ && __aarch64__ #include "apple_arm_events.h" #endif struct event_count { std::chrono::duration elapsed; std::vector event_counts; - event_count() : elapsed(0), event_counts{0,0,0,0,0} {} - event_count(const std::chrono::duration _elapsed, const std::vector _event_counts) : elapsed(_elapsed), event_counts(_event_counts) {} - event_count(const event_count& other): elapsed(other.elapsed), event_counts(other.event_counts) { } + event_count() : elapsed(0), event_counts{0, 0, 0, 0, 0} {} + event_count(const std::chrono::duration _elapsed, + const std::vector _event_counts) + : elapsed(_elapsed), event_counts(_event_counts) {} + event_count(const event_count &other) + : elapsed(other.elapsed), event_counts(other.event_counts) {} // The types of counters (so we can read the getter more easily) enum event_counter_types { @@ -36,31 +39,42 @@ struct event_count { MISSED_BRANCHES = 3 }; - double elapsed_sec() const { return std::chrono::duration(elapsed).count(); } - double elapsed_ns() const { return std::chrono::duration(elapsed).count(); } - double cycles() const { return static_cast(event_counts[CPU_CYCLES]); } - double instructions() const { return static_cast(event_counts[INSTRUCTIONS]); } - double branches() const { return static_cast(event_counts[BRANCHES]); } - double missed_branches() const { return static_cast(event_counts[MISSED_BRANCHES]); } + double elapsed_sec() const { + return std::chrono::duration(elapsed).count(); + } + double elapsed_ns() const { + return std::chrono::duration(elapsed).count(); + } + double cycles() const { + return static_cast(event_counts[CPU_CYCLES]); + } + double instructions() const { + return static_cast(event_counts[INSTRUCTIONS]); + } + double branches() const { + return static_cast(event_counts[BRANCHES]); + } + double missed_branches() const { + return static_cast(event_counts[MISSED_BRANCHES]); + } - event_count& operator=(const event_count& other) { + event_count &operator=(const event_count &other) { this->elapsed = other.elapsed; this->event_counts = other.event_counts; return *this; } - event_count operator+(const event_count& other) const { - return event_count(elapsed+other.elapsed, { - event_counts[0]+other.event_counts[0], - event_counts[1]+other.event_counts[1], - event_counts[2]+other.event_counts[2], - event_counts[3]+other.event_counts[3], - event_counts[4]+other.event_counts[4], - }); + event_count operator+(const event_count &other) const { + return event_count(elapsed + other.elapsed, + { + event_counts[0] + other.event_counts[0], + event_counts[1] + other.event_counts[1], + event_counts[2] + other.event_counts[2], + event_counts[3] + other.event_counts[3], + event_counts[4] + other.event_counts[4], + }); } - void operator+=(const event_count& other) { - *this = *this + other; - } + void operator+=(const event_count &other) { *this = *this + other; } }; struct event_aggregate { @@ -72,7 +86,7 @@ struct event_aggregate { event_aggregate() = default; - void operator<<(const event_count& other) { + void operator<<(const event_count &other) { if (iterations == 0 || other.elapsed < best.elapsed) { best = other; } @@ -88,53 +102,48 @@ struct event_aggregate { double cycles() const { return total.cycles() / iterations; } double instructions() const { return total.instructions() / iterations; } double branches() const { return total.branches() / iterations; } - double missed_branches() const { return total.missed_branches() / iterations; } + double missed_branches() const { + return total.missed_branches() / iterations; + } }; struct event_collector { event_count count{}; std::chrono::time_point start_clock{}; -#if defined(__linux__) +#if defined(__linux__) LinuxEvents linux_events; - event_collector() : linux_events(std::vector{ - PERF_COUNT_HW_CPU_CYCLES, - PERF_COUNT_HW_INSTRUCTIONS, - PERF_COUNT_HW_BRANCH_INSTRUCTIONS, // Retired branch instructions - PERF_COUNT_HW_BRANCH_MISSES - }) {} - bool has_events() { - return linux_events.is_working(); - } -#elif __APPLE__ && __aarch64__ + event_collector() + : linux_events(std::vector{ + PERF_COUNT_HW_CPU_CYCLES, PERF_COUNT_HW_INSTRUCTIONS, + PERF_COUNT_HW_BRANCH_INSTRUCTIONS, // Retired branch instructions + PERF_COUNT_HW_BRANCH_MISSES}) {} + bool has_events() { return linux_events.is_working(); } +#elif __APPLE__ && __aarch64__ performance_counters diff; - event_collector() : diff(0) { - setup_performance_counters(); - } - bool has_events() { - return setup_performance_counters(); - } + event_collector() : diff(0) { setup_performance_counters(); } + bool has_events() { return setup_performance_counters(); } #else event_collector() {} - bool has_events() { - return false; - } + bool has_events() { return false; } #endif inline void start() { #if defined(__linux) linux_events.start(); -#elif __APPLE__ && __aarch64__ - if(has_events()) { diff = get_counters(); } +#elif __APPLE__ && __aarch64__ + if (has_events()) { + diff = get_counters(); + } #endif start_clock = std::chrono::steady_clock::now(); } - inline event_count& end() { + inline event_count &end() { const auto end_clock = std::chrono::steady_clock::now(); #if defined(__linux) linux_events.end(count.event_counts); -#elif __APPLE__ && __aarch64__ - if(has_events()) { +#elif __APPLE__ && __aarch64__ + if (has_events()) { performance_counters end = get_counters(); diff = end - diff; } diff --git a/benchmarks/linux-perf-events.h b/benchmarks/linux-perf-events.h index 73cfbaf..0a9e553 100644 --- a/benchmarks/linux-perf-events.h +++ b/benchmarks/linux-perf-events.h @@ -42,7 +42,8 @@ template class LinuxEvents { uint32_t i = 0; for (auto config : config_vec) { attribs.config = config; - int _fd = static_cast(syscall(__NR_perf_event_open, &attribs, pid, cpu, group, flags)); + int _fd = static_cast( + syscall(__NR_perf_event_open, &attribs, pid, cpu, group, flags)); if (_fd == -1) { report_error("perf_event_open"); } @@ -56,7 +57,11 @@ template class LinuxEvents { temp_result_vec.resize(num_events * 2 + 1); } - ~LinuxEvents() { if (fd != -1) { close(fd); } } + ~LinuxEvents() { + if (fd != -1) { + close(fd); + } + } inline void start() { if (fd != -1) { @@ -85,19 +90,15 @@ template class LinuxEvents { results[i / 2] = temp_result_vec[i]; } for (uint32_t i = 2; i < temp_result_vec.size(); i += 2) { - if(ids[i/2-1] != temp_result_vec[i]) { + if (ids[i / 2 - 1] != temp_result_vec[i]) { report_error("event mismatch"); } } } - bool is_working() { - return working; - } + bool is_working() { return working; } private: - void report_error(const std::string &) { - working = false; - } + void report_error(const std::string &) { working = false; } }; #endif \ No newline at end of file