Skip to content

Commit

Permalink
[examples] Add embedding example.
Browse files Browse the repository at this point in the history
  • Loading branch information
zhanghb97 committed Nov 24, 2024
1 parent 6966512 commit dffc6c7
Show file tree
Hide file tree
Showing 2 changed files with 121 additions and 0 deletions.
44 changes: 44 additions & 0 deletions examples/BuddyNext/makefile
Original file line number Diff line number Diff line change
Expand Up @@ -312,3 +312,47 @@ next-sgemm-run:
-reconcile-unrealized-casts | \
${MLIR_CPU_RUNNER} ${OPT_FLAG} -e main -entry-point-result=void \
-shared-libs=${MLIR_RUNNER_UTILS} -shared-libs=${MLIR_C_RUNNER_UTILS}

next-embedding-lower:
@${MLIR_OPT} ./next-embedding.mlir \
-pass-pipeline "builtin.module(func.func(tosa-to-linalg-named),func.func(tosa-to-linalg),func.func(tosa-to-tensor),func.func(tosa-to-arith))" | \
${MLIR_OPT} \
-arith-expand \
-eliminate-empty-tensors \
-empty-tensor-to-alloc-tensor \
-one-shot-bufferize \
-convert-linalg-to-affine-loops \
-o log.mlir

next-embedding-run:
@${MLIR_OPT} ./next-embedding.mlir \
-pass-pipeline "builtin.module(func.func(tosa-to-linalg-named),func.func(tosa-to-linalg),func.func(tosa-to-tensor),func.func(tosa-to-arith))" | \
${MLIR_OPT} \
-arith-expand \
-eliminate-empty-tensors \
-empty-tensor-to-alloc-tensor \
-one-shot-bufferize \
-convert-linalg-to-affine-loops \
-affine-loop-fusion \
-lower-affine \
-func-bufferize \
-arith-bufferize \
-tensor-bufferize \
-buffer-deallocation \
-finalizing-bufferize \
-convert-vector-to-scf \
-expand-strided-metadata \
-convert-vector-to-llvm \
-memref-expand \
-arith-expand \
-convert-arith-to-llvm \
-finalize-memref-to-llvm \
-convert-scf-to-cf \
-convert-openmp-to-llvm \
-convert-arith-to-llvm \
-convert-math-to-llvm \
-convert-math-to-libm \
-convert-func-to-llvm \
-reconcile-unrealized-casts | \
${MLIR_CPU_RUNNER} ${OPT_FLAG} -e main -entry-point-result=void \
-shared-libs=${MLIR_RUNNER_UTILS} -shared-libs=${MLIR_C_RUNNER_UTILS}
77 changes: 77 additions & 0 deletions examples/BuddyNext/next-embedding.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
// RUN: buddy-opt %s \
// RUN: -pass-pipeline "builtin.module(func.func(tosa-to-linalg-named),func.func(tosa-to-linalg),func.func(tosa-to-tensor),func.func(tosa-to-arith))" \
// RUN: | buddy-opt \
// RUN: -arith-expand \
// RUN: -eliminate-empty-tensors \
// RUN: -empty-tensor-to-alloc-tensor \
// RUN: -one-shot-bufferize \
// RUN: -convert-linalg-to-affine-loops \
// RUN: -affine-loop-fusion \
// RUN: -lower-affine \
// RUN: -func-bufferize \
// RUN: -arith-bufferize \
// RUN: -tensor-bufferize \
// RUN: -buffer-deallocation \
// RUN: -finalizing-bufferize \
// RUN: -convert-vector-to-scf \
// RUN: -expand-strided-metadata \
// RUN: -convert-vector-to-llvm \
// RUN: -memref-expand \
// RUN: -arith-expand \
// RUN: -convert-arith-to-llvm \
// RUN: -finalize-memref-to-llvm \
// RUN: -convert-scf-to-cf \
// RUN: -convert-openmp-to-llvm \
// RUN: -convert-arith-to-llvm \
// RUN: -convert-math-to-llvm \
// RUN: -convert-math-to-libm \
// RUN: -convert-func-to-llvm \
// RUN: -reconcile-unrealized-casts \
// RUN: | mlir-cpu-runner -e main -entry-point-result=void \
// RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_runner_utils%shlibext \
// RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext \
// RUN: | FileCheck %s

func.func private @rtclock() -> f64

func.func @kernel(%t0: tensor<32000x4096xf32>, %t1: tensor<1x40xi64>) {
%t_start = call @rtclock() : () -> f64

%0 = "tosa.const"() <{value = dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39]> : tensor<40xi64>}> : () -> tensor<40xi64>
%1 = tosa.reshape %0 {new_shape = array<i64: 1, 40>} : (tensor<40xi64>) -> tensor<1x40xi64>
%2 = tosa.reshape %1 {new_shape = array<i64: 1, 40>} : (tensor<1x40xi64>) -> tensor<1x40xi64>
%3 = tosa.cast %t1 : (tensor<1x40xi64>) -> tensor<1x40xi32>
%4 = tosa.reshape %t0 {new_shape = array<i64: 1, 32000, 4096>} : (tensor<32000x4096xf32>) -> tensor<1x32000x4096xf32>
%5 = tosa.gather %4, %3 : (tensor<1x32000x4096xf32>, tensor<1x40xi32>) -> tensor<1x40x4096xf32>
%6 = tosa.reshape %5 {new_shape = array<i64: 1, 40, 4096>} : (tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32>

%t_end = call @rtclock() : () -> f64
%time = arith.subf %t_end, %t_start : f64

%tensor_unranked = tensor.cast %6 : tensor<1x40x4096xf32> to tensor<*xf32>

// All the elements of the MemRef are the same,
// only check the first line to verify the correctness.
// CHECK: Unranked Memref base@ = {{.*}} rank = 3 offset = 0 sizes = [1, 40, 4096] strides = [163840, 4096, 1] data =
// CHECK-NEXT: [
// CHECK-SAME: [
// CHECK-SAME: [3{{(, 3)*}}],

// Print results.
call @printMemrefF32(%tensor_unranked) : (tensor<*xf32>) -> ()
// Print timings.
vector.print %time : f64

return
}

func.func @main() {

%c0 = arith.constant dense<3.0> : tensor<32000x4096xf32>
%c1 = arith.constant dense <1> : tensor<1x40xi64>

call @kernel(%c0, %c1) : (tensor<32000x4096xf32>, tensor<1x40xi64>) -> ()

return
}
func.func private @printMemrefF32(%ptr : tensor<*xf32>)

0 comments on commit dffc6c7

Please sign in to comment.