Skip to content

Commit

Permalink
Merge branch 'buddy-compiler:main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
zhxzh-2001 authored Oct 23, 2024
2 parents 8a0989d + 2b2a8df commit 41784eb
Show file tree
Hide file tree
Showing 26 changed files with 3,200 additions and 496 deletions.
5 changes: 5 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,11 @@ if(BUDDY_MLIR_ENABLE_DIP_LIB)
find_package(PNG REQUIRED)
endif()

if(BUDDY_ENABLE_PNG)
add_definitions(-DBUDDY_ENABLE_PNG)
find_package(PNG REQUIRED)
endif()

# Generate libraries into `lib` of build directory.
set(LIBRARY_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/lib)

Expand Down
14 changes: 14 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,20 @@ $ export LLVM_MLIR_BUILD_DIR=$PWD/../llvm/build
$ export PYTHONPATH=${LLVM_MLIR_BUILD_DIR}/tools/mlir/python_packages/mlir_core:${BUDDY_MLIR_BUILD_DIR}/python_packages:${PYTHONPATH}
```

To configure the build environment for using image processing libraries, follow these steps:

```
$ cmake -G Ninja .. \
-DMLIR_DIR=$PWD/../llvm/build/lib/cmake/mlir \
-DLLVM_DIR=$PWD/../llvm/build/lib/cmake/llvm \
-DLLVM_ENABLE_ASSERTIONS=ON \
-DCMAKE_BUILD_TYPE=RELEASE \
-DBUDDY_MLIR_ENABLE_DIP_LIB=ON \
-DBUDDY_ENABLE_PNG=ON
$ ninja
$ ninja check-buddy
```

To build buddy-mlir with custom LLVM sources:

```
Expand Down
1 change: 0 additions & 1 deletion examples/BuddyMobileNetV3/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
add_custom_command(
OUTPUT ${BUDDY_EXAMPLES_DIR}/BuddyMobileNetV3/arg0.data
${BUDDY_EXAMPLES_DIR}/BuddyMobileNetV3/arg1.data
${BUDDY_EXAMPLES_DIR}/BuddyMobileNetV3/forward.mlir
${BUDDY_EXAMPLES_DIR}/BuddyMobileNetV3/subgraph0.mlir
COMMAND python3 ${BUDDY_EXAMPLES_DIR}/BuddyMobileNetV3/buddy-mobilenetv3-import.py
Expand Down
3 changes: 2 additions & 1 deletion examples/BuddyMobileNetV3/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ $ cmake -G Ninja .. \
-DCMAKE_BUILD_TYPE=RELEASE \
-DBUDDY_MLIR_ENABLE_PYTHON_PACKAGES=ON \
-DPython3_EXECUTABLE=$(which python3) \
-DBUDDY_MLIR_ENABLE_DIP_LIB=ON
-DBUDDY_MLIR_ENABLE_DIP_LIB=ON \
-DBUDDY_ENABLE_PNG=ON
$ ninja
$ ninja check-buddy
```
Expand Down
21 changes: 14 additions & 7 deletions examples/BuddyMobileNetV3/buddy-mobilenetv3-import.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,17 @@
"The environment variable 'MOBILENETV3_MODEL_PATH' is not set or is invalid."
)

model = models.mobilenet_v3_small(weights=models.MobileNet_V3_Small_Weights.IMAGENET1K_V1, pretrained=True)
model = models.mobilenet_v3_small(
weights=models.MobileNet_V3_Small_Weights.IMAGENET1K_V1, pretrained=True
)
model = model.eval()

# Remove the num_batches_tracked attribute.
for layer in model.modules():
if isinstance(layer, torch.nn.BatchNorm2d):
if hasattr(layer, "num_batches_tracked"):
del layer.num_batches_tracked

# Initialize Dynamo Compiler with specific configurations as an importer.
dynamo_compiler = DynamoCompiler(
primary_registry=tosa.ops_registry,
Expand Down Expand Up @@ -68,11 +76,10 @@


float32_param = np.concatenate(
[param.detach().numpy().reshape([-1]) for param in params if param.dtype == torch.float32]
[
param.detach().numpy().reshape([-1])
for param in params
if param.dtype == torch.float32
]
)
float32_param.tofile(Path(current_path) / "arg0.data")

int64_param = np.concatenate(
[param.detach().numpy().reshape([-1]) for param in params if param.dtype == torch.int64]
)
int64_param.tofile(Path(current_path) / "arg1.data")
65 changes: 31 additions & 34 deletions examples/BuddyMobileNetV3/buddy-mobilenetv3-main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,43 +33,43 @@ const std::string ImgName = "dog.png";
// Declare the mobilenet C interface.
extern "C" void _mlir_ciface_forward(MemRef<float, 2> *output,
MemRef<float, 1> *arg0,
MemRef<long long, 1> *arg1,
MemRef<float, 4> *input);

/// Print [Log] label in bold blue format.
void printLogLabel() { std::cout << "\033[34;1m[Log] \033[0m"; }

void loadParameters(const std::string &floatParamPath,
const std::string &int64ParamPath,
MemRef<float, 1> &floatParam,
MemRef<long long, 1> &int64Param) {
std::ifstream floatParamFile(floatParamPath, std::ios::in | std::ios::binary);
if (!floatParamFile.is_open()) {
std::string errMsg = "Failed to open float param file: " +
std::filesystem::canonical(floatParamPath).string();
throw std::runtime_error(errMsg);
/// Load parameters into data container.
void loadParameters(const std::string &paramFilePath,
MemRef<float, 1> &params) {
const auto loadStart = std::chrono::high_resolution_clock::now();
// Open the parameter file in binary mode.
std::ifstream paramFile(paramFilePath, std::ios::in | std::ios::binary);
if (!paramFile.is_open()) {
throw std::runtime_error("[Error] Failed to open params file!");
}
floatParamFile.read(reinterpret_cast<char *>(floatParam.getData()),
floatParam.getSize() * sizeof(float));
if (floatParamFile.fail()) {
throw std::runtime_error("Failed to read float param file");
printLogLabel();
std::cout << "Loading params..." << std::endl;
printLogLabel();
// Print the canonical path of the parameter file.
std::cout << "Params file: " << std::filesystem::canonical(paramFilePath)
<< std::endl;
// Read the parameter data into the provided memory reference.
paramFile.read(reinterpret_cast<char *>(params.getData()),
sizeof(float) * (params.getSize()));
if (paramFile.fail()) {
throw std::runtime_error("Error occurred while reading params file!");
}
floatParamFile.close();

std::ifstream int64ParamFile(int64ParamPath, std::ios::in | std::ios::binary);
if (!int64ParamFile.is_open()) {
std::string errMsg = "Failed to open int64 param file: " +
std::filesystem::canonical(int64ParamPath).string();
throw std::runtime_error(errMsg);
}
int64ParamFile.read(reinterpret_cast<char *>(int64Param.getData()),
int64Param.getSize() * sizeof(long long));
if (int64ParamFile.fail()) {
throw std::runtime_error("Failed to read int64 param file");
}
int64ParamFile.close();
paramFile.close();
const auto loadEnd = std::chrono::high_resolution_clock::now();
const std::chrono::duration<double, std::milli> loadTime =
loadEnd - loadStart;
printLogLabel();
std::cout << "Params load time: " << (double)(loadTime.count()) / 1000
<< "s\n"
<< std::endl;
}


// Softmax function.
void softmax(float *input, size_t size) {
size_t i;
Expand Down Expand Up @@ -124,13 +124,10 @@ int main() {

// Load model parameters from the specified file.
std::string paramsDir = mobilenetDir + "/arg0.data";
std::string intDir = mobilenetDir + "/arg1.data";
MemRef<float, 1> paramsContainerf32({ParamsSize});
MemRef<long long, 1> ParamsContainerInt64({34});
loadParameters(paramsDir, intDir, paramsContainerf32, ParamsContainerInt64);
MemRef<float, 1> paramsContainer({ParamsSize});
loadParameters(paramsDir, paramsContainer);
// Call the forward function of the model.
_mlir_ciface_forward(&output, &paramsContainerf32, &ParamsContainerInt64,
&inputResize);
_mlir_ciface_forward(&output, &paramsContainer, &inputResize);

auto out = output.getData();
softmax(out, 1000);
Expand Down
7 changes: 7 additions & 0 deletions examples/DAPDialect/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -62,3 +62,10 @@ target_link_libraries(buddy-whisper-preprocess
BuddyLibDAP
mlir_c_runner_utils
)

add_executable(buddy-rfft RFFT.cpp)
add_dependencies(buddy-rfft buddy-opt)
target_link_libraries(buddy-rfft
BuddyLibDAP
mlir_c_runner_utils
)
75 changes: 75 additions & 0 deletions examples/DAPDialect/RFFT.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
//===- RFFT.cpp - Example of DAP RFFT Operation ---------------------------===//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
//===----------------------------------------------------------------------===//
//
// An example of the RFFT function from Whisper Preprocessor operation.
//
//===----------------------------------------------------------------------===//

#include <buddy/DAP/DAP.h>
#include <chrono>
#include <fstream>
#include <iostream>

#define testLength 840

using namespace dap;
using namespace std;

// Print [Log] label in bold blue format.
void printLogLabel() { std::cout << "\033[34;1m[Log] \033[0m"; }

// Write preprocessing results to a text file.
void printResult(MemRef<double, 1> &outputMemRef) {
ofstream fout("whisperPreprocessResultRFFT.txt");
// Print title.
fout << "-----------------------------------------" << std::endl;
fout << "[ Buddy RFFT Result ]" << std::endl;
fout << "-----------------------------------------" << std::endl;
// Print reuslt data.
for (int i = 0; i < testLength; ++i) {
fout << outputMemRef[i] << std::endl;
}
fout.close();
}

int main() {
// Print the title of this example.
const std::string title = "RFFT Operation Powered by Buddy Compiler";
std::cout << "\033[33;1m" << title << "\033[0m" << std::endl;

double *inputAlign = new double[testLength];
for (int i = 0; i < testLength; ++i) {
inputAlign[i] = static_cast<double>(i);
}
intptr_t inputSizes[1] = {testLength};
MemRef<double, 1> inputMemRef(inputAlign, inputSizes);

printLogLabel();
std::cout << "Running RFFT operation" << std::endl;
const auto loadStart = std::chrono::high_resolution_clock::now();
dap::RFFT(&inputMemRef);
const auto loadEnd = std::chrono::high_resolution_clock::now();
const std::chrono::duration<double, std::milli> loadTime =
loadEnd - loadStart;
printLogLabel();
std::cout << "RFFT time: " << (double)(loadTime.count()) / 1000
<< "s\n"
<< std::endl;

printResult(inputMemRef);

return 0;
}
20 changes: 10 additions & 10 deletions examples/MLIRLinalg/linalg-conv2d_nhwc_fhwc.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ module {
%current_image_h = arith.constant 4 : index
%current_image_w = arith.constant 4 : index

%current_filter_f = arith.constant 1 : index
%current_filter_f = arith.constant 2 : index
%current_filter_c = arith.constant 2 : index
%current_filter_h = arith.constant 2 : index
%current_filter_w = arith.constant 2 : index
Expand Down Expand Up @@ -71,12 +71,12 @@ module {
}

// CHECK: Unranked Memref base@ = {{.*}} rank = 4 offset = 0 sizes = [1, 3, 3, 2] strides = [18, 6, 2, 1] data =
// CHECK{LITERAL}: [[[[4, 1],
// CHECK{LITERAL}: [4, 1],
// CHECK{LITERAL}: [4, 1]],
// CHECK{LITERAL}: [[4, 1],
// CHECK{LITERAL}: [4, 1],
// CHECK{LITERAL}: [4, 1]],
// CHECK{LITERAL}: [[4, 1],
// CHECK{LITERAL}: [4, 1],
// CHECK{LITERAL}: [4, 1]]]]
// CHECK{LITERAL}: [[[[4, 5],
// CHECK{LITERAL}: [4, 5],
// CHECK{LITERAL}: [4, 5]],
// CHECK{LITERAL}: [[4, 5],
// CHECK{LITERAL}: [4, 5],
// CHECK{LITERAL}: [4, 5]],
// CHECK{LITERAL}: [[4, 5],
// CHECK{LITERAL}: [4, 5],
// CHECK{LITERAL}: [4, 5]]]]
28 changes: 12 additions & 16 deletions examples/MLIRVector/vector-iteration.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -60,16 +60,14 @@ func.func @main() -> i32 {
%load_vec2 = vector.load %mem_pat_1[%i] : memref<10xf32>, vector<4xf32>
%res = arith.addf %load_vec1, %load_vec2 : vector<4xf32>
vector.store %res, %mem_pat_1[%i] : memref<10xf32>, vector<4xf32>
scf.yield %i : index
%i_next = arith.addi %i, %vl_step_pat_1 : index
scf.yield %i_next : index
}
// CHECK: [0, 2, 4, 6, 8, 10, 12, 14, 8, 9]
call @printMemrefF32(%print_mem_pat_1) : (memref<*xf32>) -> ()

// 5. Calculate the position for tail processing.
%tail_idx_pat_1 = arith.addi %iter_idx_pat_1, %vl_step_pat_1 : index

// 6. Process the remainder of the elements with scalar operations.
scf.for %i = %tail_idx_pat_1 to %vl_total_pat_1 step %c1 {
// 5. Process the remainder of the elements with scalar operations.
scf.for %i = %iter_idx_pat_1 to %vl_total_pat_1 step %c1 {
%ele1 = memref.load %mem_pat_1[%i] : memref<10xf32>
%ele2 = memref.load %mem_pat_1[%i] : memref<10xf32>
%res = arith.addf %ele1, %ele2 : f32
Expand Down Expand Up @@ -105,25 +103,23 @@ func.func @main() -> i32 {
%load_vec2 = vector.load %mem_pat_2[%i] : memref<10xf32>, vector<4xf32>
%res = arith.addf %load_vec1, %load_vec2 : vector<4xf32>
vector.store %res, %mem_pat_2[%i] : memref<10xf32>, vector<4xf32>
scf.yield %i : index
%i_next = arith.addi %i, %vl_step_pat_1 : index
scf.yield %i_next : index
}
// CHECK: [0, 2, 4, 6, 8, 10, 12, 14, 8, 9]
call @printMemrefF32(%print_mem_pat_2) : (memref<*xf32>) -> ()

// 5. Calculate the position for tail processing.
%tail_idx_pat_2 = arith.addi %iter_idx_pat_2, %vl_step_pat_2 : index

// 6. Compute the tail size and create mask and pass-through vector for the
// 5. Compute the tail size and create mask and pass-through vector for the
// remaining elements.
%tail_size_pat_2 = arith.subi %vl_total_pat_2, %iter_idx_pat_2 :index
%tail_size_pat_2 = arith.subi %vl_total_pat_2, %iter_idx_pat_2 : index
%mask_pat_2 = vector.create_mask %tail_size_pat_2 : vector<4xi1>
%pass_thr_vec = arith.constant dense<0.> : vector<4xf32>

// 7. Process the remaining elements using masked vector operations.
%ele1 = vector.maskedload %mem_pat_2[%tail_idx_pat_2], %mask_pat_2, %pass_thr_vec : memref<10xf32>, vector<4xi1>, vector<4xf32> into vector<4xf32>
%ele2 = vector.maskedload %mem_pat_2[%tail_idx_pat_2], %mask_pat_2, %pass_thr_vec : memref<10xf32>, vector<4xi1>, vector<4xf32> into vector<4xf32>
// 6. Process the remaining elements using masked vector operations.
%ele1 = vector.maskedload %mem_pat_2[%iter_idx_pat_2], %mask_pat_2, %pass_thr_vec : memref<10xf32>, vector<4xi1>, vector<4xf32> into vector<4xf32>
%ele2 = vector.maskedload %mem_pat_2[%iter_idx_pat_2], %mask_pat_2, %pass_thr_vec : memref<10xf32>, vector<4xi1>, vector<4xf32> into vector<4xf32>
%res = arith.addf %ele1, %ele2 : vector<4xf32>
vector.maskedstore %mem_pat_2[%tail_idx_pat_2], %mask_pat_2, %res : memref<10xf32>, vector<4xi1>, vector<4xf32>
vector.maskedstore %mem_pat_2[%iter_idx_pat_2], %mask_pat_2, %res : memref<10xf32>, vector<4xi1>, vector<4xf32>
// CHECK: [0, 2, 4, 6, 8, 10, 12, 14, 16, 18]
call @printMemrefF32(%print_mem_pat_2) : (memref<*xf32>) -> ()

Expand Down
21 changes: 21 additions & 0 deletions examples/VectorExpDialect/makefile
Original file line number Diff line number Diff line change
Expand Up @@ -319,3 +319,24 @@ vector-exp-dynamic-vector-run:
-L${CROSS_MLIR_LIB} -lmlir_runner_utils -lmlir_c_runner_utils \
-o a.out
@LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max a.out

vector-exp-iteration-aot:
@${BUDDY_OPT} ./vector-exp-iteration.mlir \
-lower-vector-exp \
-lower-affine \
-convert-vector-to-scf \
-convert-scf-to-cf \
-convert-vector-to-llvm \
-convert-index-to-llvm \
-convert-arith-to-llvm \
-convert-func-to-llvm \
-finalize-memref-to-llvm \
-reconcile-unrealized-casts | \
${BUDDY_TRANSLATE} -buddy-to-llvmir -o log.ll
${LOCAL_CLANG} -O3 log.ll \
-march=rv64gcv --target=riscv64-unknown-linux-gnu -fPIC \
--sysroot=${RISCV_GNU_TOOLCHAIN}/sysroot \
--gcc-toolchain=${RISCV_GNU_TOOLCHAIN} \
-L${CROSS_MLIR_LIB} -lmlir_runner_utils -lmlir_c_runner_utils \
-o a.out

Loading

0 comments on commit 41784eb

Please sign in to comment.