Failure in GPU pass pipeline #651

fschlimb · 2023-05-26T11:01:26Z

With the following MLIR in out.mlir:

#map = affine_map<(d0, d1) -> (d0, d1)>
#map1 = affine_map<(d0) -> (d0)>
module {
  func.func private @printMemrefI64(tensor<*xi64>)
  func.func private @printMemrefF64(tensor<*xf64>)
  func.func @main() {
    %cst = arith.constant dense<6> : tensor<2xindex>
    %cst_0 = arith.constant 0.000000e+00 : f64
    %c5_i64 = arith.constant 5 : i64
    %c0 = arith.constant 0 : index
    %c1 = arith.constant 1 : index
    %0 = tensor.empty() : tensor<1x5xi64>
    %1 = linalg.generic {indexing_maps = [#map], iterator_types = ["parallel", "parallel"]} outs(%0 : tensor<1x5xi64>) {
    ^bb0(%out: i64):
      linalg.yield %c5_i64 : i64
    } -> tensor<1x5xi64>
    %2 = tensor.empty() : tensor<36xi64>
    %3 = linalg.generic {indexing_maps = [#map1], iterator_types = ["parallel"]} outs(%2 : tensor<36xi64>) {
    ^bb0(%out: i64):
      %9 = linalg.index 0 : index
      %10 = arith.index_cast %9 : index to i64
      %11 = arith.sitofp %10 : i64 to f64
      %12 = arith.addf %11, %cst_0 : f64
      %13 = arith.fptosi %12 : f64 to i64
      linalg.yield %13 : i64
    } -> tensor<36xi64>
    %cast = tensor.cast %3 : tensor<36xi64> to tensor<?xi64>
    %reshape = tensor.reshape %cast(%cst) : (tensor<?xi64>, tensor<2xindex>) -> tensor<?x?xi64>
    %dim = tensor.dim %reshape, %c0 : tensor<?x?xi64>
    %dim_1 = tensor.dim %reshape, %c1 : tensor<?x?xi64>
    %4 = tensor.empty(%dim, %dim_1) : tensor<?x?xi64>
    %5 = bufferization.to_memref %4 : memref<?x?xi64>
    %6 = bufferization.to_memref %reshape : memref<?x?xi64, strided<[?, ?], offset: ?>>
    memref.copy %6, %5 : memref<?x?xi64, strided<[?, ?], offset: ?>> to memref<?x?xi64>
    %7 = bufferization.to_memref %1 : memref<1x5xi64>
    %8 = bufferization.to_memref %reshape : memref<?x?xi64, strided<[?, ?], offset: ?>>
    %subview = memref.subview %8[1, 1] [1, 5] [5, 1] : memref<?x?xi64, strided<[?, ?], offset: ?>> to memref<1x5xi64, strided<[?, ?], offset: ?>>
    linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel"]} ins(%7 : memref<1x5xi64>) outs(%subview : memref<1x5xi64, strided<[?, ?], offset: ?>>) {
    ^bb0(%in: i64, %out: i64):
      linalg.yield %in : i64
    }
    %cast_2 = tensor.cast %reshape : tensor<?x?xi64> to tensor<*xi64>
    call @printMemrefI64(%cast_2) : (tensor<*xi64>) -> ()
    return
  }
}

the command python bin/imex-runner.py -f ./ptensor-gpu.pp -i ./out.mlir fails with the following message:

./out.mlir:39:5: error: failed to legalize operation 'gpu.func'
    linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel"]} ins(%7 : memref<1x5xi64>) outs(%subview : memref<1x5xi64, strided<[?, ?], offset: ?>>) {
    ^
./out.mlir:39:5: note: see current operation: 
"gpu.func"() ({
^bb0(%arg0: memref<1x5xi64, #spirv.storage_class<CrossWorkgroup>>, %arg1: index, %arg2: memref<1x5xi64, strided<[?, 1], offset: ?>, #spirv.storage_class<CrossWorkgroup>>):
  %0 = "gpu.block_id"() {dimension = #gpu<dim y>} : () -> index
  %1 = "memref.load"(%arg0, %arg1, %0) {nontemporal = false} : (memref<1x5xi64, #spirv.storage_class<CrossWorkgroup>>, index, index) -> i64
  "memref.store"(%1, %arg2, %arg1, %0) {nontemporal = false} : (i64, memref<1x5xi64, strided<[?, 1], offset: ?>, #spirv.storage_class<CrossWorkgroup>>, index, index) -> ()
  "gpu.return"() : () -> ()
}) {function_type = (memref<1x5xi64, #spirv.storage_class<CrossWorkgroup>>, index, memref<1x5xi64, strided<[?, 1], offset: ?>, #spirv.storage_class<CrossWorkgroup>>) -> (), gpu.kernel, gpu.known_block_size = array<i32: 1, 1, 1>, gpu.known_grid_size = array<i32: 1, 5, 1>, spirv.entry_point_abi = #spirv.entry_point_abi<>, sym_name = "main_kernel", workgroup_attributions = 0 : i64} : () -> ()

Here's the content of ptensor-gpu.pp:

builtin.module(
    convert-tensor-to-linalg
    arith-bufferize
    func.func(empty-tensor-to-alloc-tensor
          //eliminate-empty-tensors
          scf-bufferize
          shape-bufferize
          linalg-bufferize
          bufferization-bufferize
          tensor-bufferize)
    func-bufferize
    func.func(finalizing-bufferize
          convert-linalg-to-parallel-loops
          imex-add-outer-parallel-loop
          gpu-map-parallel-loops
          convert-parallel-loops-to-gpu)
// insert-gpu-allocs pass can have client-api = opencl or vulkan args
    func.func(insert-gpu-allocs{client-api=opencl})
    canonicalize
    normalize-memrefs
// Unstride memrefs does not seem to be needed.
//  func.func(unstride-memrefs)
    func.func(lower-affine)
    gpu-kernel-outlining
    canonicalize
    cse
// The following set-spirv-* passes can have client-api = opencl or vulkan args
    set-spirv-capabilities{client-api=opencl}
    gpu.module(set-spirv-abi-attrs{client-api=opencl})
    canonicalize
    fold-memref-alias-ops
    imex-convert-gpu-to-spirv
    spirv.module(spirv-lower-abi-attrs
             spirv-update-vce)
    func.func(llvm-request-c-wrappers)
    serialize-spirv
    convert-gpu-to-gpux
    convert-func-to-llvm
    convert-math-to-llvm
    convert-gpux-to-llvm
    expand-strided-metadata
    lower-affine
    finalize-memref-to-llvm
    reconcile-unrealized-casts)

The text was updated successfully, but these errors were encountered:

silee2 · 2023-06-06T17:23:38Z

Lowering fails because on of the arguments to gpu.func
%subview = memref.subview %8[1, 1] [1, 5] [5, 1] : memref> to memref<1x5xi64, strided<[?, ?], offset: ?>>
Has a dynamic shape.
Implementing #653 should address this issue.

silee2 mentioned this issue Jun 6, 2023

Add dynamic shape handling for GPU to SPIRV lowering. #653

Open

silee2 added the bug Something isn't working label Jun 6, 2023

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Failure in GPU pass pipeline #651

Failure in GPU pass pipeline #651

fschlimb commented May 26, 2023

silee2 commented Jun 6, 2023

Failure in GPU pass pipeline #651

Failure in GPU pass pipeline #651

Comments

fschlimb commented May 26, 2023

silee2 commented Jun 6, 2023