Merge branch 'buddy-compiler:main' into main

buddy-compiler · Oct 23, 2024 · 41784eb · 41784eb
2 parents 8a0989d + 2b2a8df
commit 41784eb
Show file tree

Hide file tree

Showing 26 changed files with 3,200 additions and 496 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -106,6 +106,11 @@ if(BUDDY_MLIR_ENABLE_DIP_LIB)
   find_package(PNG REQUIRED)
 endif()
 
+if(BUDDY_ENABLE_PNG)
+  add_definitions(-DBUDDY_ENABLE_PNG)
+  find_package(PNG REQUIRED)
+endif()
+
 # Generate libraries into `lib` of build directory.
 set(LIBRARY_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/lib)
 

diff --git a/README.md b/README.md
@@ -103,6 +103,20 @@ $ export LLVM_MLIR_BUILD_DIR=$PWD/../llvm/build
 $ export PYTHONPATH=${LLVM_MLIR_BUILD_DIR}/tools/mlir/python_packages/mlir_core:${BUDDY_MLIR_BUILD_DIR}/python_packages:${PYTHONPATH}
 ```
 
+To configure the build environment for using image processing libraries, follow these steps:
+
+```
+$ cmake -G Ninja .. \
+    -DMLIR_DIR=$PWD/../llvm/build/lib/cmake/mlir \
+    -DLLVM_DIR=$PWD/../llvm/build/lib/cmake/llvm \
+    -DLLVM_ENABLE_ASSERTIONS=ON \
+    -DCMAKE_BUILD_TYPE=RELEASE \
+    -DBUDDY_MLIR_ENABLE_DIP_LIB=ON \
+    -DBUDDY_ENABLE_PNG=ON
+$ ninja
+$ ninja check-buddy
+```
+
 To build buddy-mlir with custom LLVM sources:
 
 ```

diff --git a/examples/BuddyMobileNetV3/CMakeLists.txt b/examples/BuddyMobileNetV3/CMakeLists.txt
@@ -1,6 +1,5 @@
 add_custom_command(
   OUTPUT ${BUDDY_EXAMPLES_DIR}/BuddyMobileNetV3/arg0.data
-         ${BUDDY_EXAMPLES_DIR}/BuddyMobileNetV3/arg1.data
          ${BUDDY_EXAMPLES_DIR}/BuddyMobileNetV3/forward.mlir
          ${BUDDY_EXAMPLES_DIR}/BuddyMobileNetV3/subgraph0.mlir
   COMMAND python3 ${BUDDY_EXAMPLES_DIR}/BuddyMobileNetV3/buddy-mobilenetv3-import.py

diff --git a/examples/BuddyMobileNetV3/README.md b/examples/BuddyMobileNetV3/README.md
@@ -16,7 +16,8 @@ $ cmake -G Ninja .. \
     -DCMAKE_BUILD_TYPE=RELEASE \
     -DBUDDY_MLIR_ENABLE_PYTHON_PACKAGES=ON \
     -DPython3_EXECUTABLE=$(which python3) \
-    -DBUDDY_MLIR_ENABLE_DIP_LIB=ON
+    -DBUDDY_MLIR_ENABLE_DIP_LIB=ON \
+    -DBUDDY_ENABLE_PNG=ON
 $ ninja
 $ ninja check-buddy
 ```

diff --git a/examples/BuddyMobileNetV3/buddy-mobilenetv3-import.py b/examples/BuddyMobileNetV3/buddy-mobilenetv3-import.py
@@ -38,9 +38,17 @@
         "The environment variable 'MOBILENETV3_MODEL_PATH' is not set or is invalid."
     )
 
-model = models.mobilenet_v3_small(weights=models.MobileNet_V3_Small_Weights.IMAGENET1K_V1, pretrained=True)
+model = models.mobilenet_v3_small(
+    weights=models.MobileNet_V3_Small_Weights.IMAGENET1K_V1, pretrained=True
+)
 model = model.eval()
 
+# Remove the num_batches_tracked attribute.
+for layer in model.modules():
+    if isinstance(layer, torch.nn.BatchNorm2d):
+        if hasattr(layer, "num_batches_tracked"):
+            del layer.num_batches_tracked
+
 # Initialize Dynamo Compiler with specific configurations as an importer.
 dynamo_compiler = DynamoCompiler(
     primary_registry=tosa.ops_registry,
@@ -68,11 +76,10 @@
 
 
 float32_param = np.concatenate(
-    [param.detach().numpy().reshape([-1]) for param in params if param.dtype == torch.float32]
+    [
+        param.detach().numpy().reshape([-1])
+        for param in params
+        if param.dtype == torch.float32
+    ]
 )
 float32_param.tofile(Path(current_path) / "arg0.data")
-
-int64_param = np.concatenate(
-    [param.detach().numpy().reshape([-1]) for param in params if param.dtype == torch.int64]
-)
-int64_param.tofile(Path(current_path) / "arg1.data")
diff --git a/examples/BuddyMobileNetV3/buddy-mobilenetv3-main.cpp b/examples/BuddyMobileNetV3/buddy-mobilenetv3-main.cpp
@@ -33,43 +33,43 @@ const std::string ImgName = "dog.png";
 // Declare the mobilenet C interface.
 extern "C" void _mlir_ciface_forward(MemRef<float, 2> *output,
                                      MemRef<float, 1> *arg0,
-                                     MemRef<long long, 1> *arg1,
                                      MemRef<float, 4> *input);
 
 /// Print [Log] label in bold blue format.
 void printLogLabel() { std::cout << "\033[34;1m[Log] \033[0m"; }
 
-void loadParameters(const std::string &floatParamPath,
-                    const std::string &int64ParamPath,
-                    MemRef<float, 1> &floatParam,
-                    MemRef<long long, 1> &int64Param) {
-  std::ifstream floatParamFile(floatParamPath, std::ios::in | std::ios::binary);
-  if (!floatParamFile.is_open()) {
-    std::string errMsg = "Failed to open float param file: " +
-                         std::filesystem::canonical(floatParamPath).string();
-    throw std::runtime_error(errMsg);
+/// Load parameters into data container.
+void loadParameters(const std::string &paramFilePath,
+                    MemRef<float, 1> &params) {
+  const auto loadStart = std::chrono::high_resolution_clock::now();
+  // Open the parameter file in binary mode.
+  std::ifstream paramFile(paramFilePath, std::ios::in | std::ios::binary);
+  if (!paramFile.is_open()) {
+    throw std::runtime_error("[Error] Failed to open params file!");
   }
-  floatParamFile.read(reinterpret_cast<char *>(floatParam.getData()),
-                      floatParam.getSize() * sizeof(float));
-  if (floatParamFile.fail()) {
-    throw std::runtime_error("Failed to read float param file");
+  printLogLabel();
+  std::cout << "Loading params..." << std::endl;
+  printLogLabel();
+  // Print the canonical path of the parameter file.
+  std::cout << "Params file: " << std::filesystem::canonical(paramFilePath)
+            << std::endl;
+  // Read the parameter data into the provided memory reference.
+  paramFile.read(reinterpret_cast<char *>(params.getData()),
+                 sizeof(float) * (params.getSize()));
+  if (paramFile.fail()) {
+    throw std::runtime_error("Error occurred while reading params file!");
   }
-  floatParamFile.close();
-
-  std::ifstream int64ParamFile(int64ParamPath, std::ios::in | std::ios::binary);
-  if (!int64ParamFile.is_open()) {
-    std::string errMsg = "Failed to open int64 param file: " +
-                         std::filesystem::canonical(int64ParamPath).string();
-    throw std::runtime_error(errMsg);
-  }
-  int64ParamFile.read(reinterpret_cast<char *>(int64Param.getData()),
-                      int64Param.getSize() * sizeof(long long));
-  if (int64ParamFile.fail()) {
-    throw std::runtime_error("Failed to read int64 param file");
-  }
-  int64ParamFile.close();
+  paramFile.close();
+  const auto loadEnd = std::chrono::high_resolution_clock::now();
+  const std::chrono::duration<double, std::milli> loadTime =
+      loadEnd - loadStart;
+  printLogLabel();
+  std::cout << "Params load time: " << (double)(loadTime.count()) / 1000
+            << "s\n"
+            << std::endl;
 }
 
+
 // Softmax function.
 void softmax(float *input, size_t size) {
   size_t i;
@@ -124,13 +124,10 @@ int main() {
 
   // Load model parameters from the specified file.
   std::string paramsDir = mobilenetDir + "/arg0.data";
-  std::string intDir = mobilenetDir + "/arg1.data";
-  MemRef<float, 1> paramsContainerf32({ParamsSize});
-  MemRef<long long, 1> ParamsContainerInt64({34});
-  loadParameters(paramsDir, intDir, paramsContainerf32, ParamsContainerInt64);
+  MemRef<float, 1> paramsContainer({ParamsSize});
+  loadParameters(paramsDir, paramsContainer);
   // Call the forward function of the model.
-  _mlir_ciface_forward(&output, &paramsContainerf32, &ParamsContainerInt64,
-                       &inputResize);
+  _mlir_ciface_forward(&output, &paramsContainer, &inputResize);
 
   auto out = output.getData();
   softmax(out, 1000);

diff --git a/examples/DAPDialect/CMakeLists.txt b/examples/DAPDialect/CMakeLists.txt
@@ -62,3 +62,10 @@ target_link_libraries(buddy-whisper-preprocess
   BuddyLibDAP
   mlir_c_runner_utils
 )
+
+add_executable(buddy-rfft RFFT.cpp)
+add_dependencies(buddy-rfft buddy-opt)
+target_link_libraries(buddy-rfft
+  BuddyLibDAP
+  mlir_c_runner_utils
+)
diff --git a/examples/DAPDialect/RFFT.cpp b/examples/DAPDialect/RFFT.cpp
@@ -0,0 +1,75 @@
+//===- RFFT.cpp - Example of DAP RFFT Operation ---------------------------===//
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//===----------------------------------------------------------------------===//
+//
+// An example of the RFFT function from Whisper Preprocessor operation.
+//
+//===----------------------------------------------------------------------===//
+
+#include <buddy/DAP/DAP.h>
+#include <chrono>
+#include <fstream>
+#include <iostream>
+
+#define testLength 840
+
+using namespace dap;
+using namespace std;
+
+// Print [Log] label in bold blue format.
+void printLogLabel() { std::cout << "\033[34;1m[Log] \033[0m"; }
+
+// Write preprocessing results to a text file.
+void printResult(MemRef<double, 1> &outputMemRef) {
+  ofstream fout("whisperPreprocessResultRFFT.txt");
+  // Print title.
+  fout << "-----------------------------------------" << std::endl;
+  fout << "[ Buddy RFFT Result ]" << std::endl;
+  fout << "-----------------------------------------" << std::endl;
+  // Print reuslt data.
+  for (int i = 0; i < testLength; ++i) {
+    fout << outputMemRef[i] << std::endl;
+  }
+  fout.close();
+}
+
+int main() {
+  // Print the title of this example.
+  const std::string title = "RFFT Operation Powered by Buddy Compiler";
+  std::cout << "\033[33;1m" << title << "\033[0m" << std::endl;
+
+  double *inputAlign = new double[testLength];
+  for (int i = 0; i < testLength; ++i) {
+    inputAlign[i] = static_cast<double>(i);
+  }
+  intptr_t inputSizes[1] = {testLength};
+  MemRef<double, 1> inputMemRef(inputAlign, inputSizes);
+
+  printLogLabel();
+  std::cout << "Running RFFT operation" << std::endl;
+  const auto loadStart = std::chrono::high_resolution_clock::now();
+  dap::RFFT(&inputMemRef);
+  const auto loadEnd = std::chrono::high_resolution_clock::now();
+  const std::chrono::duration<double, std::milli> loadTime =
+      loadEnd - loadStart;
+  printLogLabel();
+  std::cout << "RFFT time: " << (double)(loadTime.count()) / 1000
+            << "s\n"
+            << std::endl;
+
+  printResult(inputMemRef);
+
+  return 0;
+}
diff --git a/examples/MLIRLinalg/linalg-conv2d_nhwc_fhwc.mlir b/examples/MLIRLinalg/linalg-conv2d_nhwc_fhwc.mlir
@@ -40,7 +40,7 @@ module {
     %current_image_h = arith.constant 4 : index
     %current_image_w = arith.constant 4 : index
 
-    %current_filter_f = arith.constant 1 : index
+    %current_filter_f = arith.constant 2 : index
     %current_filter_c = arith.constant 2 : index
     %current_filter_h = arith.constant 2 : index
     %current_filter_w = arith.constant 2 : index
@@ -71,12 +71,12 @@ module {
 }
 
 // CHECK: Unranked Memref base@ = {{.*}} rank = 4 offset = 0 sizes = [1, 3, 3, 2] strides = [18, 6, 2, 1] data = 
-// CHECK{LITERAL}: [[[[4,     1], 
-// CHECK{LITERAL}:    [4,     1], 
-// CHECK{LITERAL}:    [4,     1]], 
-// CHECK{LITERAL}:   [[4,     1], 
-// CHECK{LITERAL}:    [4,     1], 
-// CHECK{LITERAL}:    [4,     1]], 
-// CHECK{LITERAL}:   [[4,     1], 
-// CHECK{LITERAL}:    [4,     1], 
-// CHECK{LITERAL}:    [4,     1]]]]
+// CHECK{LITERAL}: [[[[4,     5], 
+// CHECK{LITERAL}:    [4,     5], 
+// CHECK{LITERAL}:    [4,     5]], 
+// CHECK{LITERAL}:   [[4,     5], 
+// CHECK{LITERAL}:    [4,     5], 
+// CHECK{LITERAL}:    [4,     5]], 
+// CHECK{LITERAL}:   [[4,     5], 
+// CHECK{LITERAL}:    [4,     5], 
+// CHECK{LITERAL}:    [4,     5]]]]
diff --git a/examples/MLIRVector/vector-iteration.mlir b/examples/MLIRVector/vector-iteration.mlir
@@ -60,16 +60,14 @@ func.func @main() -> i32 {
     %load_vec2 = vector.load %mem_pat_1[%i] : memref<10xf32>, vector<4xf32>
     %res = arith.addf %load_vec1, %load_vec2 : vector<4xf32>
     vector.store %res, %mem_pat_1[%i] : memref<10xf32>, vector<4xf32>
-    scf.yield %i : index
+    %i_next = arith.addi %i, %vl_step_pat_1 : index
+    scf.yield %i_next : index
   }
   // CHECK: [0,  2,  4,  6,  8,  10,  12,  14,  8,  9]
   call @printMemrefF32(%print_mem_pat_1) : (memref<*xf32>) -> ()
 
-  // 5. Calculate the position for tail processing.
-  %tail_idx_pat_1 = arith.addi %iter_idx_pat_1, %vl_step_pat_1 : index
-
-  // 6. Process the remainder of the elements with scalar operations.
-  scf.for %i = %tail_idx_pat_1 to %vl_total_pat_1 step %c1 {
+  // 5. Process the remainder of the elements with scalar operations.
+  scf.for %i = %iter_idx_pat_1 to %vl_total_pat_1 step %c1 {
     %ele1 = memref.load %mem_pat_1[%i] : memref<10xf32>
     %ele2 = memref.load %mem_pat_1[%i] : memref<10xf32>
     %res = arith.addf %ele1, %ele2 : f32
@@ -105,25 +103,23 @@ func.func @main() -> i32 {
     %load_vec2 = vector.load %mem_pat_2[%i] : memref<10xf32>, vector<4xf32>
     %res = arith.addf %load_vec1, %load_vec2 : vector<4xf32>
     vector.store %res, %mem_pat_2[%i] : memref<10xf32>, vector<4xf32>
-    scf.yield %i : index
+    %i_next = arith.addi %i, %vl_step_pat_1 : index
+    scf.yield %i_next : index
   }
   // CHECK: [0,  2,  4,  6,  8,  10,  12,  14,  8,  9]
   call @printMemrefF32(%print_mem_pat_2) : (memref<*xf32>) -> ()
 
-  // 5. Calculate the position for tail processing.
-  %tail_idx_pat_2 = arith.addi %iter_idx_pat_2, %vl_step_pat_2 : index
-
-  // 6. Compute the tail size and create mask and pass-through vector for the
+  // 5. Compute the tail size and create mask and pass-through vector for the
   //    remaining elements.
-  %tail_size_pat_2 = arith.subi %vl_total_pat_2, %iter_idx_pat_2 :index
+  %tail_size_pat_2 = arith.subi %vl_total_pat_2, %iter_idx_pat_2 : index
   %mask_pat_2 = vector.create_mask %tail_size_pat_2 : vector<4xi1>
   %pass_thr_vec = arith.constant dense<0.> : vector<4xf32>
 
-  // 7. Process the remaining elements using masked vector operations.
-  %ele1 = vector.maskedload %mem_pat_2[%tail_idx_pat_2], %mask_pat_2, %pass_thr_vec : memref<10xf32>, vector<4xi1>, vector<4xf32> into vector<4xf32>
-  %ele2 = vector.maskedload %mem_pat_2[%tail_idx_pat_2], %mask_pat_2, %pass_thr_vec : memref<10xf32>, vector<4xi1>, vector<4xf32> into vector<4xf32>
+  // 6. Process the remaining elements using masked vector operations.
+  %ele1 = vector.maskedload %mem_pat_2[%iter_idx_pat_2], %mask_pat_2, %pass_thr_vec : memref<10xf32>, vector<4xi1>, vector<4xf32> into vector<4xf32>
+  %ele2 = vector.maskedload %mem_pat_2[%iter_idx_pat_2], %mask_pat_2, %pass_thr_vec : memref<10xf32>, vector<4xi1>, vector<4xf32> into vector<4xf32>
   %res = arith.addf %ele1, %ele2 : vector<4xf32>
-  vector.maskedstore %mem_pat_2[%tail_idx_pat_2], %mask_pat_2, %res : memref<10xf32>, vector<4xi1>, vector<4xf32>
+  vector.maskedstore %mem_pat_2[%iter_idx_pat_2], %mask_pat_2, %res : memref<10xf32>, vector<4xi1>, vector<4xf32>
   // CHECK: [0,  2,  4,  6,  8,  10,  12,  14,  16,  18]
   call @printMemrefF32(%print_mem_pat_2) : (memref<*xf32>) -> ()
 

diff --git a/examples/VectorExpDialect/makefile b/examples/VectorExpDialect/makefile
@@ -319,3 +319,24 @@ vector-exp-dynamic-vector-run:
 		-L${CROSS_MLIR_LIB} -lmlir_runner_utils -lmlir_c_runner_utils \
 		-o a.out
 	@LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max a.out
+
+vector-exp-iteration-aot:
+	@${BUDDY_OPT} ./vector-exp-iteration.mlir \
+		-lower-vector-exp \
+		-lower-affine \
+		-convert-vector-to-scf \
+		-convert-scf-to-cf \
+		-convert-vector-to-llvm \
+		-convert-index-to-llvm \
+		-convert-arith-to-llvm \
+		-convert-func-to-llvm \
+		-finalize-memref-to-llvm \
+		-reconcile-unrealized-casts | \
+	${BUDDY_TRANSLATE} -buddy-to-llvmir -o log.ll
+	${LOCAL_CLANG} -O3 log.ll \
+		-march=rv64gcv --target=riscv64-unknown-linux-gnu -fPIC \
+		--sysroot=${RISCV_GNU_TOOLCHAIN}/sysroot \
+		--gcc-toolchain=${RISCV_GNU_TOOLCHAIN} \
+		-L${CROSS_MLIR_LIB} -lmlir_runner_utils -lmlir_c_runner_utils \
+		-o a.out
+