[examples] Set an example for vector dialect to RVV asm.

buddy-compiler · Jan 7, 2025 · 91bbd57 · 91bbd57
1 parent 9e9ea47
commit 91bbd57
Show file tree

Hide file tree

Showing 2 changed files with 43 additions and 20 deletions.
diff --git a/examples/MLIRVector/makefile b/examples/MLIRVector/makefile
@@ -1,10 +1,17 @@
 #!/bin/bash
+MLIR_BUILD_DIR := ../../llvm/build/
+BUDDY_MLIR_BUILD_DIR := ../../build/
 BUDDY_OPT := ../../build/bin/buddy-opt
 MLIR_OPT := ../../llvm/build/bin/mlir-opt
 MLIR_TRANSLATE := ../../llvm/build/bin/mlir-translate
 MLIR_CPU_RUNNER := ../../llvm/build/bin/mlir-cpu-runner
 LLC := ../../llvm/build/bin/llc
 OPT_FLAG := -O0
+LOCAL_CLANG := ../../llvm/build/bin/clang
+
+# RISC-V GNU Toolchain
+RISCV_GNU_TOOLCHAIN := ${BUDDY_MLIR_BUILD_DIR}/thirdparty/riscv-gnu-toolchain
+RISCV_GNU_TOOLCHAIN_SYSROOT := ${RISCV_GNU_TOOLCHAIN}/sysroot
 
 ifeq ($(shell uname),Linux)
 MLIR_RUNNER_UTILS := ../../llvm/build/lib/libmlir_runner_utils.so
@@ -32,6 +39,18 @@ vector-load-translate:
 		--reconcile-unrealized-casts | \
 	${MLIR_TRANSLATE} --mlir-to-llvmir -o log.ll
 
+vector-load-asm-rvv:
+	@${MLIR_OPT} ./vector-load.mlir \
+		--convert-vector-to-scf --lower-affine --convert-scf-to-cf \
+		--convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \
+		--reconcile-unrealized-casts | \
+	${MLIR_TRANSLATE} --mlir-to-llvmir -o log.ll
+	@${LOCAL_CLANG} -c log.ll \
+		-march=rv64gcv --target=riscv64-unknown-linux-gnu \
+		--sysroot=${RISCV_GNU_TOOLCHAIN_SYSROOT} --gcc-toolchain=${RISCV_GNU_TOOLCHAIN} \
+		-fno-inline -O3 -S \
+		-o log.s
+
 run-targets += vector-load-run
 vector-load-run:
 	@${MLIR_OPT} ./vector-load.mlir \
@@ -298,7 +317,7 @@ vector-splat-run:
 		-split-input-file -verify-diagnostics \
 		--reconcile-unrealized-casts | \
 	${MLIR_CPU_RUNNER} ${OPT_FLAG} -e main -entry-point-result=i32 \
-		-shared-libs=${MLIR_RUNNER_UTILS} -shared-libs=${MLIR_C_RUNNER_UTILS}		
+		-shared-libs=${MLIR_RUNNER_UTILS} -shared-libs=${MLIR_C_RUNNER_UTILS}
 
 vector-insert-lower:
 	@${MLIR_OPT} ./vector-insert.mlir \
@@ -321,8 +340,8 @@ vector-insert-run:
 		-split-input-file -verify-diagnostics \
 		--reconcile-unrealized-casts | \
 	${MLIR_CPU_RUNNER} ${OPT_FLAG} -e main -entry-point-result=i32 \
-		-shared-libs=${MLIR_RUNNER_UTILS} -shared-libs=${MLIR_C_RUNNER_UTILS}		
-	
+		-shared-libs=${MLIR_RUNNER_UTILS} -shared-libs=${MLIR_C_RUNNER_UTILS}
+
 vector-reduction-lower:
 	@${MLIR_OPT} ./vector-reduction.mlir \
 		--convert-vector-to-scf --lower-affine --convert-scf-to-cf \
@@ -344,7 +363,7 @@ vector-reduction-run:
 		-split-input-file -verify-diagnostics \
 		--reconcile-unrealized-casts | \
 	${MLIR_CPU_RUNNER} ${OPT_FLAG} -e main -entry-point-result=i32 \
-		-shared-libs=${MLIR_RUNNER_UTILS} -shared-libs=${MLIR_C_RUNNER_UTILS}	
+		-shared-libs=${MLIR_RUNNER_UTILS} -shared-libs=${MLIR_C_RUNNER_UTILS}
 
 vector-outerproduct-lower:
 	@${MLIR_OPT} ./vector-outerproduct.mlir \
@@ -367,7 +386,7 @@ vector-outerproduct-run:
 		-split-input-file -verify-diagnostics \
 		--reconcile-unrealized-casts | \
 	${MLIR_CPU_RUNNER} ${OPT_FLAG} -e main -entry-point-result=i32 \
-		-shared-libs=${MLIR_RUNNER_UTILS} -shared-libs=${MLIR_C_RUNNER_UTILS}	
+		-shared-libs=${MLIR_RUNNER_UTILS} -shared-libs=${MLIR_C_RUNNER_UTILS}
 
 vector-create-mask-lower:
 	@${MLIR_OPT} ./vector-create-mask.mlir \
@@ -389,7 +408,7 @@ vector-create-mask-run:
 		--convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \
 		--reconcile-unrealized-casts | \
 	${MLIR_CPU_RUNNER} ${OPT_FLAG} -e main -entry-point-result=i32 \
-		-shared-libs=${MLIR_RUNNER_UTILS} -shared-libs=${MLIR_C_RUNNER_UTILS}	
+		-shared-libs=${MLIR_RUNNER_UTILS} -shared-libs=${MLIR_C_RUNNER_UTILS}
 
 vector-extract-lower:
 	@${MLIR_OPT} ./vector-extract.mlir \
@@ -502,7 +521,7 @@ vector-constant-mask-run:
 		--reconcile-unrealized-casts | \
 	${MLIR_CPU_RUNNER} ${OPT_FLAG} -e main -entry-point-result=i32 \
 		-shared-libs=${MLIR_RUNNER_UTILS} -shared-libs=${MLIR_C_RUNNER_UTILS}
-	
+
 vector-expandload-lower:
 	@${MLIR_OPT} ./vector-expandload.mlir \
 		--convert-vector-to-scf --lower-affine --convert-scf-to-cf \
@@ -523,7 +542,7 @@ vector-expandload-run:
 		--convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \
 		--reconcile-unrealized-casts | \
 	${MLIR_CPU_RUNNER} ${OPT_FLAG} -e main -entry-point-result=i32 \
-		-shared-libs=${MLIR_RUNNER_UTILS} -shared-libs=${MLIR_C_RUNNER_UTILS}	
+		-shared-libs=${MLIR_RUNNER_UTILS} -shared-libs=${MLIR_C_RUNNER_UTILS}
 
 vector-compressstore-lower:
 	@${MLIR_OPT} ./vector-compressstore.mlir \
@@ -567,7 +586,7 @@ vector-insert-strided-slice-run:
 		--convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \
 		--reconcile-unrealized-casts | \
 	${MLIR_CPU_RUNNER} ${OPT_FLAG} -e main -entry-point-result=i32 \
-		-shared-libs=${MLIR_RUNNER_UTILS} -shared-libs=${MLIR_C_RUNNER_UTILS}		
+		-shared-libs=${MLIR_RUNNER_UTILS} -shared-libs=${MLIR_C_RUNNER_UTILS}
 
 vector-scatter-lower:
 	@${MLIR_OPT} ./vector-scatter.mlir \

diff --git a/examples/MLIRVector/vector-load.mlir b/examples/MLIRVector/vector-load.mlir
@@ -16,6 +16,15 @@ memref.global "private" @gv1 : memref<4x4xi32> = dense<[[0, 1, 2, 3],
 
 memref.global "private" @gv2 : memref<8xi32> = dense<[0, 1, 2, 3, 4, 5, 6, 7]>
 
+func.func @kernel_1(%arg0: memref<8xi32>) {
+  %c0 = arith.constant 0 : index
+  // load normal usage
+  %v0 = vector.load %arg0[%c0] : memref<8xi32>, vector<3xi32>
+  // CHECK: ( 0, 1, 2 )
+  vector.print %v0 : vector<3xi32>
+  return
+}
+
 func.func @main() -> i32 {
   // vector.load can load n-D vector from m-D scalar memref or k-D vector memref
 
@@ -30,12 +39,7 @@ func.func @main() -> i32 {
   %base1 = memref.get_global @gv1 : memref<4x4xi32>
   %base2 = memref.get_global @gv2 : memref<8xi32>
 
-
-  // load normal usage
-  %v0 = vector.load %base0[%c0] : memref<8xi32>, vector<3xi32>
-  // CHECK: ( 0, 1, 2 )
-  vector.print %v0 : vector<3xi32>
-
+  call @kernel_1(%base0) : (memref<8xi32>) -> ()
 
   // load with m-D memref
   //  case 1: inside inner-most dimension
@@ -82,14 +86,14 @@ func.func @main() -> i32 {
   %v5 = vector.load %base5[%c1, %c1] : memref<?x?xi32>, vector<8xi32>
   // ( 5, 6, 7, 8, 9, 10, 11, 12 )
   vector.print %v5 : vector<8xi32>
-  
+
 
   // load with dynamic memref
   //    case 2: out of bound
   // The document says:
-  //    Representation-wise, the ‘vector.load’ operation permits out-of-bounds reads. 
-  //    Support and implementation of out-of-bounds vector loads is target-specific. 
-  //    No assumptions should be made on the value of elements loaded out of bounds. 
+  //    Representation-wise, the ‘vector.load’ operation permits out-of-bounds reads.
+  //    Support and implementation of out-of-bounds vector loads is target-specific.
+  //    No assumptions should be made on the value of elements loaded out of bounds.
   //    Not all targets may support out-of-bounds vector loads.
   %v6 = vector.load %base5[%c3, %c1] : memref<?x?xi32>, vector<8xi32>
   // ( 13, 14, 15, 0, 1, 2, 3, 4 )
@@ -98,7 +102,7 @@ func.func @main() -> i32 {
 
   // load with unranked memref is not allowed
   %base6 = memref.cast %base1 : memref<4x4xi32> to memref<*xi32>
-  // %v7 = vector.load %base6[%c0, %c0] : memref<*xi32>, vector<8xi32> 
+  // %v7 = vector.load %base6[%c0, %c0] : memref<*xi32>, vector<8xi32>
 
   %ret = arith.constant 0 : i32
   return %ret : i32