Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Auto-inserted deallocation of GPU memory leads to crash #664

Open
fschlimb opened this issue Oct 18, 2023 · 0 comments
Open

Auto-inserted deallocation of GPU memory leads to crash #664

fschlimb opened this issue Oct 18, 2023 · 0 comments
Assignees
Labels
bug Something isn't working

Comments

@fschlimb
Copy link
Contributor

The following IR does not work with the below pipeline. The returned pointer had been deallocated. It works fine when the generation of the gpu-dealloc op/call is omitted.

// RUN: %python_executable %imex_runner --requires=l0-runtime -i %s --pass-pipeline-file=%p/ptensor-gpu.pp --runner imex-cpu-runner -e main --entry-point-result=void --shared-libs=%mlir_runner_utils,%mlir_c_runner_utils,%levelzero_runtime --filecheck --O3
// RUN: %python_executable %imex_runner --requires=sycl-runtime -i %s --pass-pipeline-file=%p/ptensor-gpu.pp \
// RUN:                                        --runner imex-cpu-runner -e main \
// RUN:                                        --entry-point-result=void \
// RUN:                                        --shared-libs=%mlir_runner_utils,%mlir_c_runner_utils,%sycl_runtime --filecheck
#map = affine_map<(d0, d1) -> (d0, d1)>
module {
  func.func private @printMemrefI32(tensor<*xi32>)
  func.func private @printMemrefF32(tensor<*xf32>)
  func.func @main() {
    %0:4 = call @ddpt_jit() : () -> (memref<?x?xi32, strided<[?, ?], offset: ?>>, memref<?x?xi32, strided<[?, ?], offset: ?>>, memref<?x?xi32, strided<[?, ?], offset: ?>>, memref<2xindex>)
    %1 = bufferization.to_tensor %0#1 : memref<?x?xi32, strided<[?, ?], offset: ?>>
    %cast = tensor.cast %1 : tensor<?x?xi32> to tensor<*xi32>
    call @printMemrefI32(%cast) : (tensor<*xi32>) -> ()
    // CHECK: Unranked Memref base@ = {{(0x)?[-9a-f]*}}
    return
  }
  func.func @ddpt_jit() -> (memref<?x?xi32, strided<[?, ?], offset: ?>>, memref<?x?xi32, strided<[?, ?], offset: ?>>, memref<?x?xi32, strided<[?, ?], offset: ?>>, memref<2xindex>) attributes {llvm.emit_c_interface} {
    %c1 = arith.constant 1 : index
    %c0 = arith.constant 0 : index
    %c0_i32 = arith.constant 0 : i32
    %0 = tensor.empty() : tensor<16x16xi32>
    %1 = linalg.generic {indexing_maps = [#map], iterator_types = ["parallel", "parallel"]} outs(%0 : tensor<16x16xi32>) {
    ^bb0(%out: i32):
      linalg.yield %c0_i32 : i32
    } -> tensor<16x16xi32>
    %2 = tensor.empty() : tensor<0x0xi32>
    %3 = bufferization.to_memref %2 : memref<0x0xi32>
    %cast = memref.cast %3 : memref<0x0xi32> to memref<?x?xi32, strided<[?, ?], offset: ?>>
    %4 = bufferization.to_memref %1 : memref<16x16xi32>
    %cast_0 = memref.cast %4 : memref<16x16xi32> to memref<?x?xi32, strided<[?, ?], offset: ?>>
    %alloc = memref.alloc() {alignment = 8 : i64} : memref<2xindex>
    memref.store %c0, %alloc[%c0] : memref<2xindex>
    memref.store %c0, %alloc[%c1] : memref<2xindex>
    return %cast, %cast_0, %cast, %alloc : memref<?x?xi32, strided<[?, ?], offset: ?>>, memref<?x?xi32, strided<[?, ?], offset: ?>>, memref<?x?xi32, strided<[?, ?], offset: ?>>, memref<2xindex>
  }
}
builtin.module(
    func.func(tosa-make-broadcastable)
    func.func(tosa-to-linalg)
    func.func(tosa-to-tensor)
    canonicalize
    linalg-fuse-elementwise-ops
    arith-expand
    memref-expand
    arith-bufferize
    func-bufferize
    func.func(empty-tensor-to-alloc-tensor)
    func.func(scf-bufferize)
    func.func(tensor-bufferize)
    func.func(bufferization-bufferize)
    func.func(linalg-bufferize)
    func.func(linalg-detensorize)
    func.func(tensor-bufferize)
    func.func(finalizing-bufferize)
    imex-remove-temporaries
    func.func(convert-linalg-to-parallel-loops)
    func.func(scf-parallel-loop-fusion)
// GPU
    func.func(imex-add-outer-parallel-loop)
    func.func(gpu-map-parallel-loops)
    func.func(convert-parallel-loops-to-gpu)
// insert-gpu-allocs pass can have client-api = opencl or vulkan args
    func.func(insert-gpu-allocs{client-api=opencl})
    canonicalize
    normalize-memrefs
// Unstride memrefs does not seem to be needed.
//  func.func(unstride-memrefs)
    func.func(lower-affine)
    gpu-kernel-outlining
    canonicalize
    cse
// The following set-spirv-* passes can have client-api = opencl or vulkan args
    set-spirv-capabilities{client-api=opencl}
    gpu.module(set-spirv-abi-attrs{client-api=opencl})
    canonicalize
    fold-memref-alias-ops
    imex-convert-gpu-to-spirv
    spirv.module(spirv-lower-abi-attrs
             spirv-update-vce)
    func.func(llvm-request-c-wrappers)
    serialize-spirv
    convert-gpu-to-gpux
    convert-func-to-llvm
    convert-math-to-llvm
    convert-gpux-to-llvm
    expand-strided-metadata
    lower-affine
    finalize-memref-to-llvm
    reconcile-unrealized-casts)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
bug Something isn't working
Projects
None yet
Development

No branches or pull requests

3 participants