diff --git a/flang/lib/Optimizer/Transforms/CUFGPUToLLVMConversion.cpp b/flang/lib/Optimizer/Transforms/CUFGPUToLLVMConversion.cpp index c64f35542a6e59..426cd52b7ef83e 100644 --- a/flang/lib/Optimizer/Transforms/CUFGPUToLLVMConversion.cpp +++ b/flang/lib/Optimizer/Transforms/CUFGPUToLLVMConversion.cpp @@ -58,7 +58,7 @@ static mlir::Value createKernelArgArray(mlir::Location loc, loc, ptrTy, structTy, argStruct, mlir::ArrayRef({indice})); rewriter.create(loc, arg, structMember); mlir::Value arrayMember = rewriter.create( - loc, ptrTy, structTy, argArray, mlir::ArrayRef({indice})); + loc, ptrTy, ptrTy, argArray, mlir::ArrayRef({indice})); rewriter.create(loc, structMember, arrayMember); } return argArray; diff --git a/flang/test/Fir/CUDA/cuda-gpu-launch-func.mlir b/flang/test/Fir/CUDA/cuda-gpu-launch-func.mlir index 7fede7c6c17b78..accdeae30aa61c 100644 --- a/flang/test/Fir/CUDA/cuda-gpu-launch-func.mlir +++ b/flang/test/Fir/CUDA/cuda-gpu-launch-func.mlir @@ -99,9 +99,16 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry : ve } // CHECK-LABEL: _QMmod1Phost_sub - +// CHECK: %[[STRUCT:.*]] = llvm.alloca %{{.*}} x !llvm.struct<(ptr)> : (i32) -> !llvm.ptr +// CHECK: %[[PARAMS:.*]] = llvm.alloca %{{.*}} x !llvm.ptr : (i32) -> !llvm.ptr +// CHECK: %[[ZERO:.*]] = llvm.mlir.constant(0 : i32) : i32 +// CHECK: %[[STRUCT_PTR:.*]] = llvm.getelementptr %[[STRUCT]][%[[ZERO]]] : (!llvm.ptr, i32) -> !llvm.ptr, !llvm.struct<(ptr)> +// CHECK: llvm.store %{{.*}}, %[[STRUCT_PTR]] : !llvm.ptr, !llvm.ptr +// CHECK: %[[PARAM_PTR:.*]] = llvm.getelementptr %[[PARAMS]][%[[ZERO]]] : (!llvm.ptr, i32) -> !llvm.ptr, !llvm.ptr +// CHECK: llvm.store %[[STRUCT_PTR]], %[[PARAM_PTR]] : !llvm.ptr, !llvm.ptr // CHECK: %[[KERNEL_PTR:.*]] = llvm.mlir.addressof @_QMmod1Psub1 : !llvm.ptr -// CHECK: llvm.call @_FortranACUFLaunchKernel(%[[KERNEL_PTR]], {{.*}}) +// CHECK: %[[NULL:.*]] = llvm.mlir.zero : !llvm.ptr +// CHECK: llvm.call @_FortranACUFLaunchKernel(%[[KERNEL_PTR]], {{.*}}, %[[PARAMS]], %[[NULL]]) // -----