diff --git a/bench/models/benchmark.cc b/bench/models/benchmark.cc index c423a21ee690..ad4c2869457c 100644 --- a/bench/models/benchmark.cc +++ b/bench/models/benchmark.cc @@ -5,14 +5,12 @@ #include -#include #include #include #include #include #include #include -#include #include #include @@ -23,8 +21,9 @@ #include "xnnpack/subgraph.h" #include "pthreadpool.h" -int FLAGS_num_threads = 1; -uint32_t FLAGS_xnn_runtime_flags = 0; +namespace { +static const size_t kMinIterations = 10; +} // namespace struct ModelRuntime { std::unique_ptr model; @@ -116,11 +115,15 @@ static void BenchmarkInvoke(benchmark::State& state, return; } - for (auto _ : state) { - benchmark::utils::WipePthreadpoolL2Caches(state, model_runtime.threadpool); - if (!model_runtime.Invoke()) { - state.SkipWithError("failed to invoke runtime"); - return; + while (state.KeepRunningBatch(kMinIterations)) { + for (int iter = 0; iter < kMinIterations; iter++) { + benchmark::utils::WipePthreadpoolL2Caches(state, + model_runtime.threadpool); + + if (!model_runtime.Invoke()) { + state.SkipWithError("failed to invoke runtime"); + return; + } } } @@ -239,46 +242,4 @@ BENCHMARK(QD8Attention) BENCHMARK(QS8MobileNetV2)->Unit(benchmark::kMicrosecond)->UseRealTime(); -int ProcessArgs(int& argc, char**& argv) { - for (int i = 1; i < argc;) { - if (strncmp(argv[i], "--num_threads=", 14) == 0) { - FLAGS_num_threads = atoi(argv[i] + 14); - if (FLAGS_num_threads <= 0) { - std::cerr << "Invalid --num_threads: " << FLAGS_num_threads << "\n"; - return 1; - } - std::copy(argv + i + 1, argv + argc, argv + i); - argc -= 1; - } else if (strncmp(argv[i], "--xnn_runtime_flags=", 20) == 0) { - const char* v = argv[i] + 20; - if (strlen(v) > 2 && strncmp(v, "0x", 2) == 0) { - FLAGS_xnn_runtime_flags = strtoul(v + 2, nullptr, 16); - } else { - FLAGS_xnn_runtime_flags = strtoul(v, nullptr, 10); - } - std::copy(argv + i + 1, argv + argc, argv + i); - argc -= 1; - } else { - ++i; - } - } - return 0; -} - -#ifdef BENCHMARK_ARGS_BOTTLENECK -// We are provided with a main that will call this function -extern "C" { -int BenchmarkArgBottleneck(int& argc, char**& argv) { - return ProcessArgs(argc, argv); -} -} -#else -int main(int argc, char** argv) { - ::benchmark::Initialize(&argc, argv); - int status = ProcessArgs(argc, argv); - if (status != 0) return status; - if (::benchmark::ReportUnrecognizedArguments(argc, argv)) return 1; - ::benchmark::RunSpecifiedBenchmarks(); -} -#endif - +XNN_BENCHMARK_MAIN(); diff --git a/bench/utils.cc b/bench/utils.cc index 61e4f862c9d7..0018cc0e0db3 100644 --- a/bench/utils.cc +++ b/bench/utils.cc @@ -3,33 +3,38 @@ // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. +#include "utils.h" + +#include #include #include #include #include +#include #include -#include "xnnpack/common.h" -#include -#include "pthreadpool.h" - #ifdef __linux__ - #include +#include #endif #if defined(__ANDROID__) || defined(_WIN32) || defined(__CYGWIN__) - #include +#include #endif #if defined(__SSE__) || defined(__x86_64__) - #include +#include #endif #if XNN_ENABLE_CPUINFO - #include +#include #endif // XNN_ENABLE_CPUINFO +#include "xnnpack/common.h" #include "xnnpack/hardware-config.h" +#include +#include "pthreadpool.h" -#include "utils.h" +// Common flags for all benchmarks. +int FLAGS_num_threads = 1; +uint32_t FLAGS_xnn_runtime_flags = 0; namespace benchmark { namespace utils { @@ -88,6 +93,33 @@ void PthreadpoolClearL2Cache(void* context, size_t id) { }; // namespace +int ProcessArgs(int& argc, char**& argv) { + for (int i = 1; i < argc;) { + if (strncmp(argv[i], "--num_threads=", 14) == 0) { + FLAGS_num_threads = atoi(argv[i] + 14); + if (FLAGS_num_threads <= 0) { + std::cerr << "Invalid --num_threads: " << FLAGS_num_threads << "\n"; + return 1; + } + std::copy(argv + i + 1, argv + argc, argv + i); + argc -= 1; + } else if (strncmp(argv[i], "--xnn_runtime_flags=", 20) == 0) { + const char* v = argv[i] + 20; + if (strlen(v) > 2 && strncmp(v, "0x", 2) == 0) { + FLAGS_xnn_runtime_flags = strtoul(v + 2, nullptr, 16); + } else { + FLAGS_xnn_runtime_flags = strtoul(v, nullptr, 10); + } + std::copy(argv + i + 1, argv + argc, argv + i); + argc -= 1; + } else { + ++i; + } + } + // InitGoogle(...); + return 0; +} + uint32_t PrefetchToL1(const void* ptr, size_t size) { uint32_t step = 16; #if XNN_ENABLE_CPUINFO @@ -154,7 +186,7 @@ void DisableDenormals() { #endif } -// Return clockrate in Hz +// Return clock rate in Hz. uint64_t GetCurrentCpuFrequency() { #ifdef __linux__ int freq = 0; diff --git a/bench/utils.h b/bench/utils.h index 168aeecd8282..354208c145c0 100644 --- a/bench/utils.h +++ b/bench/utils.h @@ -13,9 +13,34 @@ #include #include "pthreadpool.h" +#ifdef BENCHMARK_ARGS_BOTTLENECK +#define XNN_BENCHMARK_MAIN() \ + extern "C" { \ + int BenchmarkArgBottleneck(int& argc, char**& argv) { \ + return benchmark::utils::ProcessArgs(argc, argv); \ + } \ + } +#else +#define XNN_BENCHMARK_MAIN() \ + int main(int argc, char** argv) { \ + ::benchmark::Initialize(&argc, argv); \ + int status = benchmark::utils::ProcessArgs(argc, argv); \ + if (status != 0) return status; \ + if (::benchmark::ReportUnrecognizedArguments(argc, argv)) return 1; \ + ::benchmark::RunSpecifiedBenchmarks(); \ + } \ + int main(int, char**) +#endif // BENCHMARK_ARGS_BOTTLENECK + +// Common flags for all benchmarks. +extern int FLAGS_num_threads; +extern uint32_t FLAGS_xnn_runtime_flags; + namespace benchmark { namespace utils { +int ProcessArgs(int& argc, char**& argv); + uint32_t WipeCache(); uint32_t PrefetchToL1(const void* ptr, size_t size);