Skip to content

Commit

Permalink
Add Halide Conv Layer Benchmark.
Browse files Browse the repository at this point in the history
  • Loading branch information
taiqzheng committed Mar 14, 2023
1 parent 3a0b816 commit e9a29bf
Show file tree
Hide file tree
Showing 9 changed files with 502 additions and 0 deletions.
8 changes: 8 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,14 @@ if(DEFINED IMAGE_PROCESSING_BENCHMARKS OR DEEP_LEARNING_BENCHMARKS OR OP_OPTIMIZ
include_directories(${OpenCV_INCLUDE_DIRS})
endif()

#-------------------------------------------------------------------------------
# Find Halide
#-------------------------------------------------------------------------------

if(DEFINED DEEP_LEARNING_BENCHMARKS)
find_package(Halide REQUIRED)
endif()

#-------------------------------------------------------------------------------
# Find PNG
#-------------------------------------------------------------------------------
Expand Down
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,12 @@ $ cd bin && ./image-processing-benchmark <image path> <kernel name> <kernelmorph

## Deep Learning Benchmark

Currently, the deep learning benchmark includes the following frameworks or optimizers:

- Halide ([link](https://github.com/halide/Halide/blob/main/README_cmake.md))

*NOTE: Please build Halide 15.0.0 from source to achieve the best performance.*

| CMake Options | Default Value |
| -------------- | ------------- |
| `-DBUDDY_OPT_ATTR` | avx512f |
Expand All @@ -78,6 +84,7 @@ $ mkdir build && cd build
$ cmake -G Ninja .. \
-DDEEP_LEARNING_BENCHMARKS=ON \
-DOpenCV_DIR=/PATH/TO/OPENCV/BUILD/ \
-DCMAKE_PREFIX_PATH=/PATH/TO/Halide-install/ \
-DBUDDY_MLIR_BUILD_DIR=/PATH/TO/BUDDY-MLIR/BUILD/
$ ninja
```
Expand Down
1 change: 1 addition & 0 deletions benchmarks/DeepLearning/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
add_subdirectory(Layers)
add_subdirectory(Models)
add_subdirectory(Ops)

45 changes: 45 additions & 0 deletions benchmarks/DeepLearning/Layers/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#-------------------------------------------------------------------------------
# Generate Non-Schedule Version Conv Layer Static Library
#-------------------------------------------------------------------------------

add_executable(conv_layer_generator conv_layer_generator.cpp)
target_link_libraries(conv_layer_generator
PRIVATE
Halide::Generator)
add_halide_library(conv_layer_nonschedule FROM conv_layer_generator)

#-------------------------------------------------------------------------------
# Generate Auto-Schedule Version Conv Layer Static Library
#-------------------------------------------------------------------------------

add_executable(conv_layer_autoschedule_generator conv_layer_generator-autoschedule.cpp)
target_link_libraries(conv_layer_autoschedule_generator
PRIVATE
Halide::Generator)
add_halide_library(conv_layer_autoschedule FROM conv_layer_autoschedule_generator
AUTOSCHEDULER Halide::Mullapudi2016)

#-------------------------------------------------------------------------------
# Generate Manually-Schedule Version Conv Layer Static Library
#-------------------------------------------------------------------------------

add_executable(conv_layer_manually_generator conv_layer_generator-manually.cpp)
target_link_libraries(conv_layer_manually_generator
PRIVATE
Halide::Generator)
add_halide_library(conv_layer_manuallyschedule FROM conv_layer_manually_generator)

#-------------------------------------------------------------------------------
# Halide ConvLayer Benchmark Target
#-------------------------------------------------------------------------------

add_executable(halide-convlayer-benchmark
Main.cpp
HalideConvLayerBenchmark.cpp)

target_link_libraries(halide-convlayer-benchmark
GoogleBenchmark
Halide::ImageIO
conv_layer_nonschedule
conv_layer_manuallyschedule
conv_layer_autoschedule)
87 changes: 87 additions & 0 deletions benchmarks/DeepLearning/Layers/HalideConvLayerBenchmark.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
#include <chrono>
#include <cstdio>

#include "conv_layer_nonschedule.h"
#include "conv_layer_manuallyschedule.h"
#include "conv_layer_autoschedule.h"
#include <benchmark/benchmark.h>
#include "HalideBuffer.h"

using namespace Halide::Runtime;

const int N = 5, CI = 128, CO = 128, W = 100, H = 80;

Buffer<float, 4> input(CI, W + 2, H + 2, N), input1(CI, W + 2, H + 2, N), input2(CI, W + 2, H + 2, N);
Buffer<float, 4> filter(CO, 3, 3, CI), filter1(CO, 3, 3, CI), filter2(CO, 3, 3, CI);
Buffer<float, 1> bias(CO), bias1(CO), bias2(CO);
Buffer<float, 4> output(CO, W, H, N), output1(CO, W, H, N), output2(CO, W, H, N);

void initializeHalideConvLayerBenchmark(char **argv) {
for (int c = 0; c < input.dim(3).extent(); c++) {
for (int z = 0; z < input.channels(); z++) {
for (int y = 0; y < input.height(); y++) {
for (int x = 0; x < input.width(); x++) {
input(x, y, z, c) = rand();
input1(x, y, z, c) = input(x, y, z, c);
input2(x, y, z, c) = input(x, y, z, c);
}
}
}
}

for (int c = 0; c < filter.dim(3).extent(); c++) {
for (int z = 0; z < filter.channels(); z++) {
for (int y = 0; y < filter.height(); y++) {
for (int x = 0; x < filter.width(); x++) {
filter(x, y, z, c) = rand();
filter1(x, y, z, c) = filter(x, y, z, c);
filter2(x, y, z, c) = filter(x, y, z, c);
}
}
}
}

for (int x = 0; x < bias.width(); x++) {
bias(x) = rand();
bias1(x) = bias(x);
bias2(x) = bias(x);
}

#ifdef _WIN32
_putenv_s("HL_CUDA_JIT_MAX_REGISTERS", "256");
#else
setenv("HL_CUDA_JIT_MAX_REGISTERS", "256", 1);
#endif
}

static void Halide_ConvLayer_NonSchedule(benchmark::State &state) {
for (auto _ : state) {
for (int i = 0; i < state.range(0); ++i) {
conv_layer_nonschedule(input, filter, bias, output);
}
}
}

static void Halide_ConvLayer_MaunallySchedule(benchmark::State &state) {
for (auto _ : state) {
for (int i = 0; i < state.range(0); ++i) {
conv_layer_manuallyschedule(input1, filter1, bias1, output1);
}
}
}

static void Halide_ConvLayer_AutoSchedule(benchmark::State &state) {
for (auto _ : state) {
for (int i = 0; i < state.range(0); ++i) {
conv_layer_autoschedule(input2, filter2, bias2, output2);
}
}
}

// Register benchmarking function.
void registerBenchmarkHalideConvLayer() {
BENCHMARK(Halide_ConvLayer_NonSchedule)->Arg(1)->Unit(benchmark::kMillisecond);
BENCHMARK(Halide_ConvLayer_MaunallySchedule)->Arg(1)->Unit(benchmark::kMillisecond);
BENCHMARK(Halide_ConvLayer_AutoSchedule)->Arg(1)->Unit(benchmark::kMillisecond);
}

46 changes: 46 additions & 0 deletions benchmarks/DeepLearning/Layers/Main.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
//===- Main.cpp -----------------------------------------------------------===//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
//===----------------------------------------------------------------------===//
//
// This is the main file of the Halide Conv Layer benchmark.
//
//===----------------------------------------------------------------------===//

#include <benchmark/benchmark.h>
#include <stdexcept>

void initializeHalideConvLayerBenchmark(char **);

void registerBenchmarkHalideConvLayer();

// Run benchmarks.
int main(int argc, char **argv) {
if (argc != 1) {
throw std::invalid_argument(
"No arguments needed.\n");
}

initializeHalideConvLayerBenchmark(argv);

// Register Benchmark Function.
registerBenchmarkHalideConvLayer();

::benchmark::Initialize(&argc, argv);
::benchmark::RunSpecifiedBenchmarks();

// Generate result.

return 0;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
#include "Halide.h"

namespace {

using namespace Halide;

class ConvolutionLayer : public Halide::Generator<ConvolutionLayer> {
public:
Input<Buffer<float, 4>> input{"input"};
Input<Buffer<float, 4>> filter{"filter"};
Input<Buffer<float, 1>> bias{"bias"};
Output<Buffer<float, 4>> relu{"relu"};

void generate() {
const int N = 5, CI = 128, CO = 128, W = 100, H = 80;

/* THE ALGORITHM */

Var x("x"), y("y"), c("c"), n("n");

Func conv("conv");
RDom r(0, CI, 0, 3, 0, 3);

conv(c, x, y, n) = bias(c);
conv(c, x, y, n) += filter(c, r.y, r.z, r.x) * input(r.x, x + r.y, y + r.z, n);

relu(c, x, y, n) = max(0, conv(c, x, y, n));

/* THE SCHEDULE */

relu.dim(0).set_bounds(0, CO).set_stride(1);
relu.dim(1).set_bounds(0, W).set_stride(CO);
relu.dim(2).set_bounds(0, H).set_stride(CO * W);
relu.dim(3).set_bounds(0, N).set_stride(CO * H * W);

input.dim(0).set_bounds(0, CI).set_stride(1);
input.dim(1).set_bounds(0, W + 2).set_stride(CI);
input.dim(2).set_bounds(0, H + 2).set_stride(CI * (W + 2));
input.dim(3).set_bounds(0, N).set_stride(CI * (W + 2) * (H + 2));

filter.dim(0).set_bounds(0, CO).set_stride(1);
filter.dim(1).set_bounds(0, 3).set_stride(CO);
filter.dim(2).set_bounds(0, 3).set_stride(CO * 3);
filter.dim(3).set_bounds(0, CI).set_stride(CO * 3 * 3);

bias.dim(0).set_bounds(0, CO).set_stride(1);

if (using_autoscheduler()) {
input.dim(0).set_estimate(0, CI);
input.dim(1).set_estimate(0, W + 2);
input.dim(2).set_estimate(0, H + 2);
input.dim(3).set_estimate(0, N);

filter.dim(0).set_estimate(0, CO);
filter.dim(1).set_estimate(0, 3);
filter.dim(2).set_estimate(0, 3);
filter.dim(3).set_estimate(0, CI);

bias.dim(0).set_estimate(0, CO);

relu.dim(0).set_estimate(0, W);
relu.dim(1).set_estimate(0, H);
relu.dim(2).set_estimate(0, CO);
relu.dim(3).set_estimate(0, N);
}
}
};

} // namespace

HALIDE_REGISTER_GENERATOR(ConvolutionLayer, conv_layer_autoschedule)
Loading

0 comments on commit e9a29bf

Please sign in to comment.