diff --git a/test/Conversion/GPUToGPUX/gpux-alloc-dealloc.mlir b/test/Conversion/GPUToGPUX/gpux-alloc-dealloc.mlir index def552f0d..f99f8f61b 100644 --- a/test/Conversion/GPUToGPUX/gpux-alloc-dealloc.mlir +++ b/test/Conversion/GPUToGPUX/gpux-alloc-dealloc.mlir @@ -4,18 +4,18 @@ func.func @main() attributes {llvm.emit_c_interface} { %c8 = arith.constant 8 : index %c1 = arith.constant 1 : index // CHECK: %[[STREAM:.*]] = "gpux.create_stream"() : () -> !gpux.StreamType - // CHECK: %[[ALLOC_0:.*]] = "gpux.alloc"(%[[STREAM:.*]]) <{operandSegmentSizes = array}> : (!gpux.StreamType) -> memref<8xf32> + // CHECK: %[[ALLOC_0:.*]] = "gpux.alloc"(%[[STREAM]]) <{operandSegmentSizes = array}> : (!gpux.StreamType) -> memref<8xf32> %memref = gpu.alloc () : memref<8xf32> - // CHECK: %[[ALLOC_1:.*]] = "gpux.alloc"(%[[STREAM:.*]]) <{operandSegmentSizes = array}> : (!gpux.StreamType) -> memref<8xf32> + // CHECK: %[[ALLOC_1:.*]] = "gpux.alloc"(%[[STREAM]]) <{operandSegmentSizes = array}> : (!gpux.StreamType) -> memref<8xf32> %memref_1 = gpu.alloc () : memref<8xf32> - // CHECK: %[[ALLOC_2:.*]] = "gpux.alloc"(%[[STREAM:.*]]) <{operandSegmentSizes = array}> : (!gpux.StreamType) -> memref<8xf32> + // CHECK: %[[ALLOC_2:.*]] = "gpux.alloc"(%[[STREAM]]) <{operandSegmentSizes = array}> : (!gpux.StreamType) -> memref<8xf32> %memref_2 = gpu.alloc () : memref<8xf32> - // CHECK: "gpux.dealloc"(%[[STREAM:.*]], %[[ALLOC_0:.*]]) : (!gpux.StreamType, memref<8xf32>) -> () + // CHECK: "gpux.dealloc"(%[[STREAM]], %[[ALLOC_0]]) : (!gpux.StreamType, memref<8xf32>) -> () gpu.dealloc %memref : memref<8xf32> - // CHECK: "gpux.dealloc"(%[[STREAM:.*]], %[[ALLOC_1:.*]]) : (!gpux.StreamType, memref<8xf32>) -> () + // CHECK: "gpux.dealloc"(%[[STREAM]], %[[ALLOC_1]]) : (!gpux.StreamType, memref<8xf32>) -> () gpu.dealloc %memref_1 : memref<8xf32> - // CHECK: "gpux.dealloc"(%[[STREAM:.*]], %[[ALLOC_2:.*]]) : (!gpux.StreamType, memref<8xf32>) -> () + // CHECK: "gpux.dealloc"(%[[STREAM]], %[[ALLOC_2]]) : (!gpux.StreamType, memref<8xf32>) -> () gpu.dealloc %memref_2 : memref<8xf32> - // CHECK: "gpux.destroy_stream"(%[[STREAM:.*]]) : (!gpux.StreamType) -> () + // CHECK: "gpux.destroy_stream"(%[[STREAM]]) : (!gpux.StreamType) -> () return } diff --git a/test/Conversion/GPUToGPUX/gpux-launch-func.mlir b/test/Conversion/GPUToGPUX/gpux-launch-func.mlir index 785b9cf70..1bb6dc59c 100644 --- a/test/Conversion/GPUToGPUX/gpux-launch-func.mlir +++ b/test/Conversion/GPUToGPUX/gpux-launch-func.mlir @@ -11,18 +11,18 @@ func.func @main() attributes {llvm.emit_c_interface} { // CHECK: %[[C1:.*]] = arith.constant 1 : index // CHECK: %[[C8:.*]] = arith.constant 8 : index // CHECK: %[[STREAM:.*]] = "gpux.create_stream"() : () -> !gpux.StreamType - // CHECK: %[[ALLOC_0:.*]] = "gpux.alloc"(%[[STREAM:.*]]) <{operandSegmentSizes = array}> : (!gpux.StreamType) -> memref<8xf32> + // CHECK: %[[ALLOC_0:.*]] = "gpux.alloc"(%[[STREAM]]) <{operandSegmentSizes = array}> : (!gpux.StreamType) -> memref<8xf32> %memref = gpu.alloc () : memref<8xf32> - // CHECK: %[[ALLOC_1:.*]] = "gpux.alloc"(%[[STREAM:.*]]) <{operandSegmentSizes = array}> : (!gpux.StreamType) -> memref<8xf32> + // CHECK: %[[ALLOC_1:.*]] = "gpux.alloc"(%[[STREAM]]) <{operandSegmentSizes = array}> : (!gpux.StreamType) -> memref<8xf32> %memref_1 = gpu.alloc () : memref<8xf32> - // CHECK: %[[ALLOC_2:.*]] = "gpux.alloc"(%[[STREAM:.*]]) <{operandSegmentSizes = array}> : (!gpux.StreamType) -> memref<8xf32> + // CHECK: %[[ALLOC_2:.*]] = "gpux.alloc"(%[[STREAM]]) <{operandSegmentSizes = array}> : (!gpux.StreamType) -> memref<8xf32> %memref_2 = gpu.alloc () : memref<8xf32> - // CHECK: "gpux.launch_func"(%[[STREAM:.*]], %[[C8:.*]], %[[C1:.*]], %[[C1:.*]], %[[C1:.*]], %[[C1:.*]], %[[C1:.*]], %[[ALLOC_0:.*]], %[[ALLOC_1:.*]], %[[ALLOC_2:.*]]) <{kernel = @Kernels::@kernel_1, operandSegmentSizes = array}> : (!gpux.StreamType, index, index, index, index, index, index, memref<8xf32>, memref<8xf32>, memref<8xf32>) -> () + // CHECK: "gpux.launch_func"(%[[STREAM]], %[[C8]], %[[C1]], %[[C1]], %[[C1]], %[[C1]], %[[C1]], %[[ALLOC_0]], %[[ALLOC_1]], %[[ALLOC_2]]) <{kernel = @Kernels::@kernel_1, operandSegmentSizes = array}> : (!gpux.StreamType, index, index, index, index, index, index, memref<8xf32>, memref<8xf32>, memref<8xf32>) -> () gpu.launch_func @Kernels::@kernel_1 blocks in (%c8, %c1, %c1) threads in (%c1, %c1, %c1) args(%memref : memref<8xf32>, %memref_1 : memref<8xf32>, %memref_2 : memref<8xf32>) gpu.dealloc %memref : memref<8xf32> gpu.dealloc %memref_1 : memref<8xf32> gpu.dealloc %memref_2 : memref<8xf32> - // CHECK: "gpux.destroy_stream"(%[[STREAM:.*]]) : (!gpux.StreamType) -> () + // CHECK: "gpux.destroy_stream"(%[[STREAM]]) : (!gpux.StreamType) -> () return } diff --git a/test/Conversion/GPUToGPUX/gpux-memcpy.mlir b/test/Conversion/GPUToGPUX/gpux-memcpy.mlir index e3fcb4853..00b126ab9 100644 --- a/test/Conversion/GPUToGPUX/gpux-memcpy.mlir +++ b/test/Conversion/GPUToGPUX/gpux-memcpy.mlir @@ -4,7 +4,7 @@ func.func @memcpy(%dst : memref<3x7xf32>, %src : memref<3x7xf32, 1>) { // CHECK-LABEL: func @memcpy // CHECK: %[[STREAM:.*]] = "gpux.create_stream"() : () -> !gpux.StreamType - // CHECK: "gpux.memcpy"(%[[STREAM:.*]], {{.*}}, {{.*}}) : (!gpux.StreamType, memref<3x7xf32>, memref<3x7xf32, 1>) -> () + // CHECK: "gpux.memcpy"(%[[STREAM]], {{.*}}, {{.*}}) : (!gpux.StreamType, memref<3x7xf32>, memref<3x7xf32, 1>) -> () gpu.memcpy %dst, %src : memref<3x7xf32>, memref<3x7xf32, 1> // CHECK: "gpux.destroy_stream"(%0) : (!gpux.StreamType) -> () return diff --git a/test/Conversion/GPUToGPUX/gpux-memset.mlir b/test/Conversion/GPUToGPUX/gpux-memset.mlir index f9922e43d..4c6afdc75 100644 --- a/test/Conversion/GPUToGPUX/gpux-memset.mlir +++ b/test/Conversion/GPUToGPUX/gpux-memset.mlir @@ -4,7 +4,7 @@ func.func @memset(%dst : memref<3x7xf32>, %value : f32) { // CHECK-LABEL: func @memset // CHECK: %[[STREAM:.*]] = "gpux.create_stream"() : () -> !gpux.StreamType - // CHECK: "gpux.memset"(%[[STREAM:.*]], {{.*}}, {{.*}}) : (!gpux.StreamType, memref<3x7xf32>, f32) -> () + // CHECK: "gpux.memset"(%[[STREAM]], {{.*}}, {{.*}}) : (!gpux.StreamType, memref<3x7xf32>, f32) -> () gpu.memset %dst, %value : memref<3x7xf32>, f32 // CHECK: "gpux.destroy_stream"(%0) : (!gpux.StreamType) -> () return diff --git a/test/Conversion/GPUXToLLVM/alloc-dealloc-to-gpu-runtime.mlir b/test/Conversion/GPUXToLLVM/alloc-dealloc-to-gpu-runtime.mlir index a0816f6cc..9dc4ceffc 100644 --- a/test/Conversion/GPUXToLLVM/alloc-dealloc-to-gpu-runtime.mlir +++ b/test/Conversion/GPUXToLLVM/alloc-dealloc-to-gpu-runtime.mlir @@ -5,11 +5,11 @@ module attributes {gpu.container_module}{ func.func @main() attributes {llvm.emit_c_interface} { // CHECK: %[[DEVICE:.*]] = llvm.mlir.zero : !llvm.ptr // CHECK: %[[CONTEXT:.*]] = llvm.mlir.zero : !llvm.ptr - // CHECK: %[[STREAM:.*]] = llvm.call @gpuCreateStream(%[[DEVICE:.*]], %[[CONTEXT:.*]]) : (!llvm.ptr, !llvm.ptr) -> !llvm.ptr + // CHECK: %[[STREAM:.*]] = llvm.call @gpuCreateStream(%[[DEVICE]], %[[CONTEXT]]) : (!llvm.ptr, !llvm.ptr) -> !llvm.ptr %0 = "gpux.create_stream"() : () -> !gpux.StreamType - // CHECK: llvm.call @gpuMemAlloc(%[[stream:.*]], %{{.*}}, %{{.*}}, %{{.*}}) : (!llvm.ptr, i64, i64, i32) -> !llvm.ptr + // CHECK: llvm.call @gpuMemAlloc(%[[STREAM]], %{{.*}}, %{{.*}}, %{{.*}}) : (!llvm.ptr, i64, i64, i32) -> !llvm.ptr %memref = "gpux.alloc"(%0) {operandSegmentSizes = array} : (!gpux.StreamType) -> memref<8xf32> - // CHECK: llvm.call @gpuMemFree(%[[stream:.*]], %{{.*}}) : (!llvm.ptr, !llvm.ptr) -> () + // CHECK: llvm.call @gpuMemFree(%[[STREAM]], %{{.*}}) : (!llvm.ptr, !llvm.ptr) -> () "gpux.dealloc"(%0, %memref) : (!gpux.StreamType, memref<8xf32>) -> () "gpux.destroy_stream"(%0) : (!gpux.StreamType) -> () return diff --git a/test/Conversion/GPUXToLLVM/create-destroy-stream.mlir b/test/Conversion/GPUXToLLVM/create-destroy-stream.mlir index eb09f2ef1..f8f9aa9aa 100644 --- a/test/Conversion/GPUXToLLVM/create-destroy-stream.mlir +++ b/test/Conversion/GPUXToLLVM/create-destroy-stream.mlir @@ -5,9 +5,9 @@ module attributes {gpu.container_module}{ func.func @main() attributes {llvm.emit_c_interface} { // CHECK: %[[DEVICE:.*]] = llvm.mlir.zero : !llvm.ptr // CHECK: %[[CONTEXT:.*]] = llvm.mlir.zero : !llvm.ptr - // CHECK: %[[STREAM:.*]] = llvm.call @gpuCreateStream(%[[DEVICE:.*]], %[[CONTEXT:.*]]) : (!llvm.ptr, !llvm.ptr) -> !llvm.ptr + // CHECK: %[[STREAM:.*]] = llvm.call @gpuCreateStream(%[[DEVICE]], %[[CONTEXT]]) : (!llvm.ptr, !llvm.ptr) -> !llvm.ptr %0 = "gpux.create_stream"() : () -> !gpux.StreamType - // CHECK: llvm.call @gpuStreamDestroy(%[[STREAM:.*]]) : (!llvm.ptr) -> () + // CHECK: llvm.call @gpuStreamDestroy(%[[STREAM]]) : (!llvm.ptr) -> () "gpux.destroy_stream"(%0) : (!gpux.StreamType) -> () return } diff --git a/test/Conversion/GPUXToLLVM/launch-func-to-gpu-runtime.mlir b/test/Conversion/GPUXToLLVM/launch-func-to-gpu-runtime.mlir index f582a169d..a737ad78a 100644 --- a/test/Conversion/GPUXToLLVM/launch-func-to-gpu-runtime.mlir +++ b/test/Conversion/GPUXToLLVM/launch-func-to-gpu-runtime.mlir @@ -8,17 +8,17 @@ module attributes {gpu.container_module, spirv.target_env = #spirv.target_env<#s %c8 = arith.constant 8 : index // CHECK: %[[DEVICE:.*]] = llvm.mlir.zero : !llvm.ptr // CHECK: %[[CONTEXT:.*]] = llvm.mlir.zero : !llvm.ptr - // CHECK: %[[STREAM:.*]] = llvm.call @gpuCreateStream(%[[DEVICE:.*]], %[[CONTEXT:.*]]) : (!llvm.ptr, !llvm.ptr) -> !llvm.ptr + // CHECK: %[[STREAM:.*]] = llvm.call @gpuCreateStream(%[[DEVICE]], %[[CONTEXT]]) : (!llvm.ptr, !llvm.ptr) -> !llvm.ptr %0 = "gpux.create_stream"() : () -> !gpux.StreamType %memref = "gpux.alloc"(%0) {operandSegmentSizes = array} : (!gpux.StreamType) -> memref<8xf32> %memref_0 = "gpux.alloc"(%0) {operandSegmentSizes = array} : (!gpux.StreamType) -> memref<8xf32> %memref_1 = "gpux.alloc"(%0) {operandSegmentSizes = array} : (!gpux.StreamType) -> memref<8xf32> // CHECK: llvm.mlir.addressof @Kernels_spirv_binary : !llvm.ptr> - // CHECK: %[[MODULE:.*]] = llvm.call @gpuModuleLoad(%[[STREAM:.*]], %{{.*}}, %{{.*}}) : (!llvm.ptr, !llvm.ptr, i64) -> !llvm.ptr + // CHECK: %[[MODULE:.*]] = llvm.call @gpuModuleLoad(%[[STREAM]], %{{.*}}, %{{.*}}) : (!llvm.ptr, !llvm.ptr, i64) -> !llvm.ptr // CHECK: llvm.mlir.addressof @Kernels_kernel_1_kernel_name : !llvm.ptr> - // CHECK: %[[KERNEL:.*]] = llvm.call @gpuKernelGet(%[[STREAM:.*]], %[[MODULE:.*]], %{{.*}}) : (!llvm.ptr, !llvm.ptr, !llvm.ptr) -> !llvm.ptr - // CHECK: llvm.call @gpuLaunchKernel(%[[STREAM:.*]], %[[KERNEL:.*]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!llvm.ptr, !llvm.ptr, i64, i64, i64, i64, i64, i64, i32, !llvm.ptr, i64)>>) -> () + // CHECK: %[[KERNEL:.*]] = llvm.call @gpuKernelGet(%[[STREAM]], %[[MODULE]], %{{.*}}) : (!llvm.ptr, !llvm.ptr, !llvm.ptr) -> !llvm.ptr + // CHECK: llvm.call @gpuLaunchKernel(%[[STREAM]], %[[KERNEL]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!llvm.ptr, !llvm.ptr, i64, i64, i64, i64, i64, i64, i32, !llvm.ptr, i64)>>) -> () "gpux.launch_func"(%0, %c8, %c1, %c1, %c1, %c1, %c1, %memref, %memref_0, %memref_1) {kernel = @Kernels::@kernel_1, operandSegmentSizes = array} : (!gpux.StreamType, index, index, index, index, index, index, memref<8xf32>, memref<8xf32>, memref<8xf32>) -> () "gpux.dealloc"(%0, %memref) : (!gpux.StreamType, memref<8xf32>) -> () "gpux.dealloc"(%0, %memref_0) : (!gpux.StreamType, memref<8xf32>) -> () diff --git a/test/Transforms/BF16ToGPU/BroadcastNonNumpy.bf16.mlir b/test/Transforms/BF16ToGPU/BroadcastNonNumpy.bf16.mlir index d5882add5..5bed68629 100644 --- a/test/Transforms/BF16ToGPU/BroadcastNonNumpy.bf16.mlir +++ b/test/Transforms/BF16ToGPU/BroadcastNonNumpy.bf16.mlir @@ -10,26 +10,26 @@ module @broadcast_non_numpy attributes {gpu.container_module} { %c1 = arith.constant 1 : index // CHECK: %[[MEMREF:.*]] = gpu.alloc host_shared () : memref<6xi8> // CHECK: %[[VIEW:.*]] = memref.view %memref[%[[CONST0:.*]]][] : memref<6xi8> to memref<3xbf16> - // CHECK: %[[VIEW0:.*]] = memref.view %memref[%[[CONST0:.*]]][] : memref<6xi8> to memref<3xi16> - // CHECK: memref.copy %arg0, %[[VIEW:.*]] : memref<3xbf16> to memref<3xbf16> + // CHECK: %[[VIEW0:.*]] = memref.view %memref[%[[CONST0]]][] : memref<6xi8> to memref<3xi16> + // CHECK: memref.copy %arg0, %[[VIEW]] : memref<3xbf16> to memref<3xbf16> %memref = gpu.alloc host_shared () : memref<3xbf16> memref.copy %arg0, %memref : memref<3xbf16> to memref<3xbf16> // CHECK: %[[MEMREF1:.*]] = gpu.alloc () : memref<24xi8> - // CHECK: %[[VIEW2:.*]] = memref.view %[[MEMREF1:.*]][%[[CONST0:.*]]][] : memref<24xi8> to memref<3x4xi16> + // CHECK: %[[VIEW2:.*]] = memref.view %[[MEMREF1]][%[[CONST0]]][] : memref<24xi8> to memref<3x4xi16> %memref_0 = gpu.alloc () : memref<3x4xbf16> - // CHECK: gpu.launch_func @test_kernel::@test_kernel blocks in (%[[CONST3:.*]], %[[CONST4:.*]], %[[CONST1:.*]]) threads in (%[[CONST1:.*]], %[[CONST1:.*]], %[[CONST1:.*]]) args(%[[CONST0_I16:.*]] : i16, %[[VIEW2:.*]] : memref<3x4xi16>) + // CHECK: gpu.launch_func @test_kernel::@test_kernel blocks in (%[[CONST3:.*]], %[[CONST4:.*]], %[[CONST1:.*]]) threads in (%[[CONST1]], %[[CONST1]], %[[CONST1]]) args(%[[CONST0_I16:.*]] : i16, %[[VIEW2]] : memref<3x4xi16>) gpu.launch_func @test_kernel::@test_kernel blocks in (%c3, %c4, %c1) threads in (%c1, %c1, %c1) args(%cst : bf16, %memref_0 : memref<3x4xbf16>) // CHECK: %[[MEMREF3:.*]] = gpu.alloc host_shared () : memref<24xi8> - // CHECK: %[[VIEW4:.*]] = memref.view %[[MEMREF3:.*]][%[[CONST0:.*]]][] : memref<24xi8> to memref<3x4xbf16> - // CHECK: %[[VIEW5:.*]] = memref.view %[[MEMREF3:.*]][%[[CONST0:.*]]][] : memref<24xi8> to memref<3x4xi16> + // CHECK: %[[VIEW4:.*]] = memref.view %[[MEMREF3]][%[[CONST0]]][] : memref<24xi8> to memref<3x4xbf16> + // CHECK: %[[VIEW5:.*]] = memref.view %[[MEMREF3]][%[[CONST0]]][] : memref<24xi8> to memref<3x4xi16> %memref_1 = gpu.alloc host_shared () : memref<3x4xbf16> - // CHECK: gpu.launch_func @test_kernel_0::@test_kernel blocks in (%[[CONST3:.*]], %[[CONST4:.*]], %[[CONST1:.*]]) threads in (%[[CONST1:.*]], %[[CONST1:.*]], %[[CONST1:.*]]) args(%[[VIEW0:.*]] : memref<3xi16>, %[[VIEW5:.*]] : memref<3x4xi16>) + // CHECK: gpu.launch_func @test_kernel_0::@test_kernel blocks in (%[[CONST3]], %[[CONST4]], %[[CONST1]]) threads in (%[[CONST1]], %[[CONST1]], %[[CONST1]]) args(%[[VIEW0]] : memref<3xi16>, %[[VIEW5]] : memref<3x4xi16>) gpu.launch_func @test_kernel_0::@test_kernel blocks in (%c3, %c4, %c1) threads in (%c1, %c1, %c1) args(%memref : memref<3xbf16>, %memref_1 : memref<3x4xbf16>) - // CHECK: gpu.dealloc %[[MEMREF1:.*]] : memref<24xi8> + // CHECK: gpu.dealloc %[[MEMREF1]] : memref<24xi8> gpu.dealloc %memref_0 : memref<3x4xbf16> - // CHECK: gpu.dealloc %[[MEMREF:.*]] : memref<6xi8> + // CHECK: gpu.dealloc %[[MEMREF]] : memref<6xi8> gpu.dealloc %memref : memref<3xbf16> - // CHECK: return %[[VIEW4:.*]] : memref<3x4xbf16> + // CHECK: return %[[VIEW4]] : memref<3x4xbf16> return %memref_1 : memref<3x4xbf16> } gpu.module @test_kernel attributes {spirv.target_env = #spirv.target_env<#spirv.vce, api=OpenCL, #spirv.resource_limits<>>} { @@ -48,7 +48,7 @@ module @broadcast_non_numpy attributes {gpu.container_module} { %0 = gpu.block_id x %1 = gpu.block_id y // CHECK: %[[VAR2_2:.*]] = memref.load %arg0[%[[VAR0_2:.*]]] : memref<3xi16> - // CHECK: memref.store %[[VAR2_2:.*]], %arg1[%[[VAR0_2:.*]], %[[VAR1_2:.*]]] : memref<3x4xi16> + // CHECK: memref.store %[[VAR2_2]], %arg1[%[[VAR0_2]], %[[VAR1_2:.*]]] : memref<3x4xi16> %2 = memref.load %arg0[%0] : memref<3xbf16> memref.store %2, %arg1[%0, %1] : memref<3x4xbf16> gpu.return diff --git a/test/Transforms/BF16ToGPU/EltwiseAdd.bf16.mlir b/test/Transforms/BF16ToGPU/EltwiseAdd.bf16.mlir index 89113ee1f..2a81c56aa 100644 --- a/test/Transforms/BF16ToGPU/EltwiseAdd.bf16.mlir +++ b/test/Transforms/BF16ToGPU/EltwiseAdd.bf16.mlir @@ -9,26 +9,26 @@ module @eltwise_add attributes {gpu.container_module} { %c10 = arith.constant 10 : index %c1 = arith.constant 1 : index // CHECK: %[[MEMREF:.*]] = gpu.alloc host_shared () : memref<400xi8> - // CHECK: %[[VIEW:.*]] = memref.view %[[MEMREF:.*]][%[[CONST0:.*]]][] : memref<400xi8> to memref<10x20xbf16> - // CHECK: %[[VIEW_0:.*]] = memref.view %[[MEMREF:.*]][%[[CONST0:.*]]][] : memref<400xi8> to memref<10x20xi16> - // CHECK: memref.copy %arg1, %[[VIEW:.*]] : memref<10x20xbf16> to memref<10x20xbf16> + // CHECK: %[[VIEW:.*]] = memref.view %[[MEMREF]][%[[CONST0:.*]]][] : memref<400xi8> to memref<10x20xbf16> + // CHECK: %[[VIEW_0:.*]] = memref.view %[[MEMREF]][%[[CONST0]]][] : memref<400xi8> to memref<10x20xi16> + // CHECK: memref.copy %arg1, %[[VIEW]] : memref<10x20xbf16> to memref<10x20xbf16> %memref = gpu.alloc host_shared () : memref<10x20xbf16> memref.copy %arg1, %memref : memref<10x20xbf16> to memref<10x20xbf16> // CHECK: %[[MEMREF_1:.*]] = gpu.alloc host_shared () : memref<400xi8> - // CHECK: %[[VIEW_2:.*]] = memref.view %[[MEMREF_1:.*]][%[[CONST0:.*]]][] : memref<400xi8> to memref<10x20xbf16> - // CHECK: %[[VIEW_3:.*]] = memref.view %[[MEMREF_1:.*]][%[[CONST0:.*]]][] : memref<400xi8> to memref<10x20xi16> - // CHECK: memref.copy %arg0, %[[VIEW_2:.*]] : memref<10x20xbf16> to memref<10x20xbf16> + // CHECK: %[[VIEW_2:.*]] = memref.view %[[MEMREF_1]][%[[CONST0]]][] : memref<400xi8> to memref<10x20xbf16> + // CHECK: %[[VIEW_3:.*]] = memref.view %[[MEMREF_1]][%[[CONST0]]][] : memref<400xi8> to memref<10x20xi16> + // CHECK: memref.copy %arg0, %[[VIEW_2]] : memref<10x20xbf16> to memref<10x20xbf16> %memref_0 = gpu.alloc host_shared () : memref<10x20xbf16> memref.copy %arg0, %memref_0 : memref<10x20xbf16> to memref<10x20xbf16> // CHECK: %[[MEMREF_4:.*]] = gpu.alloc host_shared () : memref<400xi8> - // CHECK: %[[VIEW_5:.*]] = memref.view %[[MEMREF_4:.*]][%[[CONST0:.*]]][] : memref<400xi8> to memref<10x20xbf16> - // CHECK: %[[VIEW_6:.*]] = memref.view %[[MEMREF_4:.*]][%[[CONST0:.*]]][] : memref<400xi8> to memref<10x20xi16> + // CHECK: %[[VIEW_5:.*]] = memref.view %[[MEMREF_4]][%[[CONST0]]][] : memref<400xi8> to memref<10x20xbf16> + // CHECK: %[[VIEW_6:.*]] = memref.view %[[MEMREF_4]][%[[CONST0]]][] : memref<400xi8> to memref<10x20xi16> %memref_1 = gpu.alloc host_shared () : memref<10x20xbf16> - // CHECK: args(%[[VIEW_3:.*]] : memref<10x20xi16>, %[[VIEW_0:.*]] : memref<10x20xi16>, %[[VIEW_6:.*]] : memref<10x20xi16>) + // CHECK: args(%[[VIEW_3]] : memref<10x20xi16>, %[[VIEW_0]] : memref<10x20xi16>, %[[VIEW_6]] : memref<10x20xi16>) gpu.launch_func @test_kernel::@test_kernel blocks in (%c10, %c20, %c1) threads in (%c1, %c1, %c1) args(%memref_0 : memref<10x20xbf16>, %memref : memref<10x20xbf16>, %memref_1 : memref<10x20xbf16>) - // CHECK: gpu.dealloc %[[MEMREF_1:.*]] : memref<400xi8> - // CHECK: gpu.dealloc %[[MEMREF:.*]] : memref<400xi8> - // CHECK: return %[[VIEW_5:.*]] : memref<10x20xbf16> + // CHECK: gpu.dealloc %[[MEMREF_1]] : memref<400xi8> + // CHECK: gpu.dealloc %[[MEMREF]] : memref<400xi8> + // CHECK: return %[[VIEW_5]] : memref<10x20xbf16> gpu.dealloc %memref_0 : memref<10x20xbf16> gpu.dealloc %memref : memref<10x20xbf16> return %memref_1 : memref<10x20xbf16> @@ -40,17 +40,17 @@ module @eltwise_add attributes {gpu.container_module} { %1 = gpu.block_id y // CHECK: %[[VAR2:.*]] = memref.load %arg0[%[[VAR0:.*]], %[[VAR1:.*]]] : memref<10x20xi16> %2 = memref.load %arg0[%0, %1] : memref<10x20xbf16> - // CHECK: %[[VAR3:.*]] = memref.load %arg1[%[[VAR0:.*]], %[[VAR1:.*]]] : memref<10x20xi16> + // CHECK: %[[VAR3:.*]] = memref.load %arg1[%[[VAR0]], %[[VAR1]]] : memref<10x20xi16> %3 = memref.load %arg1[%0, %1] : memref<10x20xbf16> - // CHECK: %[[VAR4:.*]] = arith.bitcast %[[VAR2:.*]] : i16 to bf16 - // CHECK: %[[VAR5:.*]] = arith.extf %[[VAR4:.*]] : bf16 to f32 - // CHECK: %[[VAR6:.*]] = arith.bitcast %[[VAR3:.*]] : i16 to bf16 - // CHECK: %[[VAR7:.*]] = arith.extf %[[VAR6:.*]] : bf16 to f32 - // CHECK: %[[VAR8:.*]] = arith.addf %[[VAR5:.*]], %[[VAR7:.*]] : f32 - // CHECK: %[[VAR9:.*]] = arith.truncf %[[VAR8:.*]] : f32 to bf16 - // CHECK: %[[VAR10:.*]] = arith.bitcast %[[VAR9:.*]] : bf16 to i16 + // CHECK: %[[VAR4:.*]] = arith.bitcast %[[VAR2]] : i16 to bf16 + // CHECK: %[[VAR5:.*]] = arith.extf %[[VAR4]] : bf16 to f32 + // CHECK: %[[VAR6:.*]] = arith.bitcast %[[VAR3]] : i16 to bf16 + // CHECK: %[[VAR7:.*]] = arith.extf %[[VAR6]] : bf16 to f32 + // CHECK: %[[VAR8:.*]] = arith.addf %[[VAR5]], %[[VAR7]] : f32 + // CHECK: %[[VAR9:.*]] = arith.truncf %[[VAR8]] : f32 to bf16 + // CHECK: %[[VAR10:.*]] = arith.bitcast %[[VAR9]] : bf16 to i16 %4 = arith.addf %2, %3 : bf16 - // CHECK: memref.store %[[VAR10:.*]], %arg2[%[[VAR0:.*]], %[[VAR1:.*]]] : memref<10x20xi16> + // CHECK: memref.store %[[VAR10]], %arg2[%[[VAR0]], %[[VAR1]]] : memref<10x20xi16> memref.store %4, %arg2[%0, %1] : memref<10x20xbf16> gpu.return } diff --git a/test/Transforms/BF16ToGPU/GEMM.bf16.mlir b/test/Transforms/BF16ToGPU/GEMM.bf16.mlir index 2c9ed51b3..29904df2f 100644 --- a/test/Transforms/BF16ToGPU/GEMM.bf16.mlir +++ b/test/Transforms/BF16ToGPU/GEMM.bf16.mlir @@ -20,28 +20,28 @@ module @gemm attributes {gpu.container_module} { %c1 = arith.constant 1 : index %c0 = arith.constant 0 : index // CHECK: %[[MEMREF:.*]] = gpu.alloc host_shared () : memref<18xi8> - // CHECK: %[[VIEW:.*]] = memref.view %[[MEMREF:.*]][%[[CONST0:.*]]][] : memref<18xi8> to memref<3x3xbf16> - // CHECK: %[[VIEW_0:.*]] = memref.view %[[MEMREF:.*]][%[[CONST0:.*]]][] : memref<18xi8> to memref<3x3xi16> - // CHECK: memref.copy %arg1, %[[VIEW:.*]] : memref<3x3xbf16> to memref<3x3xbf16> + // CHECK: %[[VIEW:.*]] = memref.view %[[MEMREF]][%[[CONST0:.*]]][] : memref<18xi8> to memref<3x3xbf16> + // CHECK: %[[VIEW_0:.*]] = memref.view %[[MEMREF]][%[[CONST0]]][] : memref<18xi8> to memref<3x3xi16> + // CHECK: memref.copy %arg1, %[[VIEW]] : memref<3x3xbf16> to memref<3x3xbf16> %memref = gpu.alloc host_shared () : memref<3x3xbf16> memref.copy %arg1, %memref : memref<3x3xbf16> to memref<3x3xbf16> // CHECK: %[[MEMREF_1:.*]] = gpu.alloc host_shared () : memref<18xi8> - // CHECK: %[[VIEW_2:.*]] = memref.view %[[MEMREF_1:.*]][%[[CONST0:.*]]][] : memref<18xi8> to memref<3x3xbf16> - // CHECK: %[[VIEW_3:.*]] = memref.view %[[MEMREF_1:.*]][%[[CONST0:.*]]][] : memref<18xi8> to memref<3x3xi16> - // CHECK: memref.copy %arg0, %[[VIEW_2:.*]] : memref<3x3xbf16> to memref<3x3xbf16> + // CHECK: %[[VIEW_2:.*]] = memref.view %[[MEMREF_1]][%[[CONST0]]][] : memref<18xi8> to memref<3x3xbf16> + // CHECK: %[[VIEW_3:.*]] = memref.view %[[MEMREF_1]][%[[CONST0]]][] : memref<18xi8> to memref<3x3xi16> + // CHECK: memref.copy %arg0, %[[VIEW_2]] : memref<3x3xbf16> to memref<3x3xbf16> %memref_0 = gpu.alloc host_shared () : memref<3x3xbf16> memref.copy %arg0, %memref_0 : memref<3x3xbf16> to memref<3x3xbf16> // CHECK: %[[MEMREF_4:.*]] = gpu.alloc host_shared () : memref<18xi8> - // CHECK: %[[VIEW_5:.*]] = memref.view %[[MEMREF_1:.*]][%[[CONST0:.*]]][] : memref<18xi8> to memref<3x3xbf16> - // CHECK: %[[VIEW_6:.*]] = memref.view %[[MEMREF_1:.*]][%[[CONST0:.*]]][] : memref<18xi8> to memref<3x3xi16> - // CHECK: memref.copy %arg2, %[[VIEW_5:.*]] : memref<3x3xbf16> to memref<3x3xbf16> + // CHECK: %[[VIEW_5:.*]] = memref.view %[[MEMREF_4]][%[[CONST0]]][] : memref<18xi8> to memref<3x3xbf16> + // CHECK: %[[VIEW_6:.*]] = memref.view %[[MEMREF_4]][%[[CONST0]]][] : memref<18xi8> to memref<3x3xi16> + // CHECK: memref.copy %arg2, %[[VIEW_5]] : memref<3x3xbf16> to memref<3x3xbf16> %memref_1 = gpu.alloc host_shared () : memref<3x3xbf16> memref.copy %arg2, %memref_1 : memref<3x3xbf16> to memref<3x3xbf16> - // CHECK: args(%[[VIEW_3:.*]] : memref<3x3xi16>, %[[VIEW_0:.*]] : memref<3x3xi16>, %[[VIEW_6:.*]] : memref<3x3xi16> + // CHECK: args(%[[VIEW_3]] : memref<3x3xi16>, %[[VIEW_0]] : memref<3x3xi16>, %[[VIEW_6]] : memref<3x3xi16> gpu.launch_func @test_kernel::@test_kernel blocks in (%c3, %c3, %c1) threads in (%c1, %c1, %c1) args(%memref_0 : memref<3x3xbf16>, %memref : memref<3x3xbf16>, %memref_1 : memref<3x3xbf16>, %c0 : index, %c3 : index, %c1 : index) - // CHECK: gpu.dealloc %[[MEMREF_1:.*]] : memref<18xi8> - // CHECK: gpu.dealloc %[[MEMREF:.*]] : memref<18xi8> - // CHECK: return %[[VIEW_5:.*]] : memref<3x3xbf16> + // CHECK: gpu.dealloc %[[MEMREF_1]] : memref<18xi8> + // CHECK: gpu.dealloc %[[MEMREF]] : memref<18xi8> + // CHECK: return %[[VIEW_5]] : memref<3x3xbf16> gpu.dealloc %memref_0 : memref<3x3xbf16> gpu.dealloc %memref : memref<3x3xbf16> return %memref_1 : memref<3x3xbf16> @@ -54,25 +54,25 @@ module @gemm attributes {gpu.container_module} { scf.for %arg6 = %arg3 to %arg4 step %arg5 { // CHECK: %[[VAR2:.*]] = memref.load %arg0[%[[VAR0:.*]], %arg6] : memref<3x3xi16> // CHECK: %[[VAR3:.*]] = memref.load %arg1[%arg6, %[[VAR1:.*]]] : memref<3x3xi16> - // CHECK: %[[VAR4:.*]] = memref.load %arg2[%[[VAR0:.*]], %[[VAR1:.*]]] : memref<3x3xi16> + // CHECK: %[[VAR4:.*]] = memref.load %arg2[%[[VAR0]], %[[VAR1]]] : memref<3x3xi16> %2 = memref.load %arg0[%0, %arg6] : memref<3x3xbf16> %3 = memref.load %arg1[%arg6, %1] : memref<3x3xbf16> %4 = memref.load %arg2[%0, %1] : memref<3x3xbf16> - // CHECK: %[[VAR5:.*]] = arith.bitcast %[[VAR2:.*]] : i16 to bf16 - // CHECK: %[[VAR6:.*]] = arith.extf %[[VAR5:.*]] : bf16 to f32 - // CHECK: %[[VAR7:.*]] = arith.bitcast %[[VAR3:.*]] : i16 to bf16 - // CHECK: %[[VAR8:.*]] = arith.extf %[[VAR7:.*]] : bf16 to f32 - // CHECK: %[[VAR9:.*]] = arith.mulf %[[VAR6:.*]], %[[VAR8:.*]] : f32 - // CHECK: %[[VAR10:.*]] = arith.truncf %[[VAR9:.*]] : f32 to bf16 + // CHECK: %[[VAR5:.*]] = arith.bitcast %[[VAR2]] : i16 to bf16 + // CHECK: %[[VAR6:.*]] = arith.extf %[[VAR5]] : bf16 to f32 + // CHECK: %[[VAR7:.*]] = arith.bitcast %[[VAR3]] : i16 to bf16 + // CHECK: %[[VAR8:.*]] = arith.extf %[[VAR7]] : bf16 to f32 + // CHECK: %[[VAR9:.*]] = arith.mulf %[[VAR6]], %[[VAR8]] : f32 + // CHECK: %[[VAR10:.*]] = arith.truncf %[[VAR9]] : f32 to bf16 %5 = arith.mulf %2, %3 : bf16 - // CHECK: %[[VAR11:.*]] = arith.bitcast %[[VAR4:.*]] : i16 to bf16 - // CHECK: %[[VAR12:.*]] = arith.extf %[[VAR11:.*]] : bf16 to f32 - // CHECK: %[[VAR13:.*]] = arith.extf %[[VAR10:.*]] : bf16 to f32 - // CHECK: %[[VAR14:.*]] = arith.addf %[[VAR12:.*]], %[[VAR13:.*]] : f32 - // CHECK: %[[VAR15:.*]] = arith.truncf %[[VAR14:.*]] : f32 to bf16 - // CHECK: %[[VAR16:.*]] = arith.bitcast %[[VAR15:.*]] : bf16 to i16 + // CHECK: %[[VAR11:.*]] = arith.bitcast %[[VAR4]] : i16 to bf16 + // CHECK: %[[VAR12:.*]] = arith.extf %[[VAR11]] : bf16 to f32 + // CHECK: %[[VAR13:.*]] = arith.extf %[[VAR10]] : bf16 to f32 + // CHECK: %[[VAR14:.*]] = arith.addf %[[VAR12]], %[[VAR13]] : f32 + // CHECK: %[[VAR15:.*]] = arith.truncf %[[VAR14]] : f32 to bf16 + // CHECK: %[[VAR16:.*]] = arith.bitcast %[[VAR15]] : bf16 to i16 %6 = arith.addf %4, %5 : bf16 - // CHECK: memref.store %[[VAR16:.*]], %arg2[%[[VAR0:.*]], %[[VAR1:.*]]] : memref<3x3xi16> + // CHECK: memref.store %[[VAR16]], %arg2[%[[VAR0]], %[[VAR1]]] : memref<3x3xi16> memref.store %6, %arg2[%0, %1] : memref<3x3xbf16> } gpu.return diff --git a/test/Transforms/BF16ToGPU/Relu.bf16.mlir b/test/Transforms/BF16ToGPU/Relu.bf16.mlir index 878ad669e..01b90d0f2 100644 --- a/test/Transforms/BF16ToGPU/Relu.bf16.mlir +++ b/test/Transforms/BF16ToGPU/Relu.bf16.mlir @@ -17,23 +17,23 @@ module @relu attributes {gpu.container_module} { %cst = arith.constant 0.000000e+00 : bf16 %c1 = arith.constant 1 : index // CHECK: %[[MEMREF:.*]] = gpu.alloc host_shared () : memref<40xi8> - // CHECK: %[[VIEW:.*]] = memref.view %[[MEMREF:.*]][%[[CONST0:.*]]][] : memref<40xi8> to memref<4x5xbf16> - // CHECK: %[[VIEW_0:.*]] = memref.view %[[MEMREF:.*]][%[[CONST0:.*]]][] : memref<40xi8> to memref<4x5xi16> - // CHECK: memref.copy %arg0, %[[VIEW:.*]] : memref<4x5xbf16> to memref<4x5xbf16> + // CHECK: %[[VIEW:.*]] = memref.view %[[MEMREF]][%[[CONST0:.*]]][] : memref<40xi8> to memref<4x5xbf16> + // CHECK: %[[VIEW_0:.*]] = memref.view %[[MEMREF]][%[[CONST0]]][] : memref<40xi8> to memref<4x5xi16> + // CHECK: memref.copy %arg0, %[[VIEW]] : memref<4x5xbf16> to memref<4x5xbf16> %memref = gpu.alloc host_shared () : memref<4x5xbf16> memref.copy %arg0, %memref : memref<4x5xbf16> to memref<4x5xbf16> %memref_0 = gpu.alloc () : memref<4x5xi1> - // CHECK: args(%[[VIEW_0:.*]] : memref<4x5xi16>, %[[CONST0_I16:.*]] : i16 + // CHECK: args(%[[VIEW_0]] : memref<4x5xi16>, %[[CONST0_I16:.*]] : i16 gpu.launch_func @test_kernel::@test_kernel blocks in (%c4, %c5, %c1) threads in (%c1, %c1, %c1) args(%memref : memref<4x5xbf16>, %cst : bf16, %memref_0 : memref<4x5xi1>) // CHECK: %[[MEMREF_2:.*]] = gpu.alloc host_shared () : memref<40xi8> - // CHECK: %[[VIEW_3:.*]] = memref.view %[[MEMREF_2:.*]][%c0][] : memref<40xi8> to memref<4x5xbf16> - // CHECK: %[[VIEW_4:.*]] = memref.view %[[MEMREF_2:.*]][%c0][] : memref<40xi8> to memref<4x5xi16> + // CHECK: %[[VIEW_3:.*]] = memref.view %[[MEMREF_2]][%c0][] : memref<40xi8> to memref<4x5xbf16> + // CHECK: %[[VIEW_4:.*]] = memref.view %[[MEMREF_2]][%c0][] : memref<40xi8> to memref<4x5xi16> %memref_1 = gpu.alloc host_shared () : memref<4x5xbf16> - // CHECK: args(%[[MEMREF_1:.*]] : memref<4x5xi1>, %[[VIEW_0:.*]] : memref<4x5xi16>, %[[CONST0_I16:.*]] : i16, %[[VIEW_4:.*]] : memref<4x5xi16>) + // CHECK: args(%[[MEMREF_1:.*]] : memref<4x5xi1>, %[[VIEW_0]] : memref<4x5xi16>, %[[CONST0_I16]] : i16, %[[VIEW_4]] : memref<4x5xi16>) gpu.launch_func @test_kernel_0::@test_kernel blocks in (%c4, %c5, %c1) threads in (%c1, %c1, %c1) args(%memref_0 : memref<4x5xi1>, %memref : memref<4x5xbf16>, %cst : bf16, %memref_1 : memref<4x5xbf16>) gpu.dealloc %memref_0 : memref<4x5xi1> - // CHECK: gpu.dealloc %[[MEMREF:.*]] : memref<40xi8> - // CHECK: return %[[VIEW_3:.*]] : memref<4x5xbf16> + // CHECK: gpu.dealloc %[[MEMREF]] : memref<40xi8> + // CHECK: return %[[VIEW_3]] : memref<4x5xbf16> gpu.dealloc %memref : memref<4x5xbf16> return %memref_1 : memref<4x5xbf16> } @@ -44,11 +44,11 @@ module @relu attributes {gpu.container_module} { %1 = gpu.block_id y // CHECK: %[[VAR2_1:.*]] = memref.load %arg0[%[[VAR0_1:.*]], %[[VAR1_1:.*]]] : memref<4x5xi16> %2 = memref.load %arg0[%0, %1] : memref<4x5xbf16> - // CHECK: %[[VAR3_1:.*]] = arith.bitcast %[[VAR2_1:.*]] : i16 to bf16 - // CHECK: %[[VAR4_1:.*]] = arith.extf %[[VAR3_1:.*]] : bf16 to f32 + // CHECK: %[[VAR3_1:.*]] = arith.bitcast %[[VAR2_1]] : i16 to bf16 + // CHECK: %[[VAR4_1:.*]] = arith.extf %[[VAR3_1]] : bf16 to f32 // CHECK: %[[VAR5_1:.*]] = arith.bitcast %arg1 : i16 to bf16 - // CHECK: %[[VAR6_1:.*]] = arith.extf %[[VAR5_1:.*]] : bf16 to f32 - // CHECK: %[[VAR7_1:.*]] = arith.cmpf olt, %[[VAR4_1:.*]], %[[VAR6_1:.*]] : f32 + // CHECK: %[[VAR6_1:.*]] = arith.extf %[[VAR5_1]] : bf16 to f32 + // CHECK: %[[VAR7_1:.*]] = arith.cmpf olt, %[[VAR4_1]], %[[VAR6_1]] : f32 %3 = arith.cmpf olt, %2, %arg1 : bf16 memref.store %3, %arg2[%0, %1] : memref<4x5xi1> gpu.return @@ -63,14 +63,14 @@ module @relu attributes {gpu.container_module} { // CHECK: %[[VAR3_2:.*]] = memref.load %arg1[%[[VAR0_2:.*]], %[[VAR1_2:.*]]] : memref<4x5xi16> %3 = memref.load %arg1[%0, %1] : memref<4x5xbf16> // CHECK: %[[VAR4_2:.*]] = arith.bitcast %arg2 : i16 to bf16 - // CHECK: %[[VAR5_2:.*]] = arith.extf %[[VAR4_2:.*]] : bf16 to f32 - // CHECK: %[[VAR6_2:.*]] = arith.bitcast %[[VAR3_2:.*]] : i16 to bf16 - // CHECK: %[[VAR7_2:.*]] = arith.extf %[[VAR6_2:.*]] : bf16 to f32 - // CHECK: %[[VAR8_2:.*]] = arith.select %[[VAR2_2:.*]], %[[VAR5_2:.*]], %[[VAR7_2:.*]] : f32 - // CHECK: %[[VAR9_2:.*]] = arith.truncf %[[VAR8_2:.*]] : f32 to bf16 - // CHECK: %[[VAR10_2:.*]] = arith.bitcast %[[VAR9_2:.*]] : bf16 to i16 + // CHECK: %[[VAR5_2:.*]] = arith.extf %[[VAR4_2]] : bf16 to f32 + // CHECK: %[[VAR6_2:.*]] = arith.bitcast %[[VAR3_2]] : i16 to bf16 + // CHECK: %[[VAR7_2:.*]] = arith.extf %[[VAR6_2]] : bf16 to f32 + // CHECK: %[[VAR8_2:.*]] = arith.select %[[VAR2_2:.*]], %[[VAR5_2]], %[[VAR7_2]] : f32 + // CHECK: %[[VAR9_2:.*]] = arith.truncf %[[VAR8_2]] : f32 to bf16 + // CHECK: %[[VAR10_2:.*]] = arith.bitcast %[[VAR9_2]] : bf16 to i16 %4 = arith.select %2, %arg2, %3 : bf16 - // CHECK: memref.store %[[VAR10_2:.*]], %arg3[%[[VAR0_2:.*]], %[[VAR1_2:.*]]] : memref<4x5xi16> + // CHECK: memref.store %[[VAR10_2]], %arg3[%[[VAR0_2]], %[[VAR1_2]]] : memref<4x5xi16> memref.store %4, %arg3[%0, %1] : memref<4x5xbf16> gpu.return } diff --git a/test/Transforms/InsertGpuAllocs/add-gpu-alloc.mlir b/test/Transforms/InsertGpuAllocs/add-gpu-alloc.mlir index 80a9da054..f7beea259 100644 --- a/test/Transforms/InsertGpuAllocs/add-gpu-alloc.mlir +++ b/test/Transforms/InsertGpuAllocs/add-gpu-alloc.mlir @@ -6,14 +6,14 @@ func.func @addt(%arg0: memref<2x5xf32>, %arg1: memref<2x5xf32>) -> memref<2x5xf3 %c2 = arith.constant 2 : index %c1 = arith.constant 1 : index %c5 = arith.constant 5 : index - // OPENCL: %[[MEMREF0:.*]]= gpu.alloc host_shared () : memref<2x5xf32> - // OPENCL: memref.copy %arg1, %[[MEMREF0:.*]] : memref<2x5xf32> to memref<2x5xf32> - // OPENCL: %[[MEMREF1:.*]]= gpu.alloc host_shared () : memref<2x5xf32> - // OPENCL: memref.copy %arg0, %[[MEMREF1:.*]] : memref<2x5xf32> to memref<2x5xf32> - // VULKAN: %[[MEMREF0:.*]]= memref.alloc() : memref<2x5xf32> - // VULKAN: memref.copy %arg1, %[[MEMREF0:.*]] : memref<2x5xf32> to memref<2x5xf32> - // VULKAN: %[[MEMREF1:.*]]= memref.alloc() : memref<2x5xf32> - // VULKAN: memref.copy %arg0, %[[MEMREF1:.*]] : memref<2x5xf32> to memref<2x5xf32> + // OPENCL: %[[MEMREF0:.*]] = gpu.alloc host_shared () : memref<2x5xf32> + // OPENCL: memref.copy %arg1, %[[MEMREF0]] : memref<2x5xf32> to memref<2x5xf32> + // OPENCL: %[[MEMREF1:.*]] = gpu.alloc host_shared () : memref<2x5xf32> + // OPENCL: memref.copy %arg0, %[[MEMREF1]] : memref<2x5xf32> to memref<2x5xf32> + // VULKAN: %[[MEMREF0:.*]] = memref.alloc() : memref<2x5xf32> + // VULKAN: memref.copy %arg1, %[[MEMREF0]] : memref<2x5xf32> to memref<2x5xf32> + // VULKAN: %[[MEMREF1:.*]] = memref.alloc() : memref<2x5xf32> + // VULKAN: memref.copy %arg0, %[[MEMREF1]] : memref<2x5xf32> to memref<2x5xf32> %0 = memref.alloc() {alignment = 128 : i64} : memref<2x5xf32> // OPENCL: %[[MEMREF2:.*]] = gpu.alloc host_shared () : memref<2x5xf32> diff --git a/test/Transforms/InsertGpuAllocs/call-op.mlir b/test/Transforms/InsertGpuAllocs/call-op.mlir index b3f6255e9..aa6a5dc78 100644 --- a/test/Transforms/InsertGpuAllocs/call-op.mlir +++ b/test/Transforms/InsertGpuAllocs/call-op.mlir @@ -8,24 +8,24 @@ func.func @main() { %0 = memref.alloc() : memref<8xf32> %1 = memref.alloc() : memref<8xf32> %2 = memref.alloc() : memref<8xf32> - // OPENCL: %[[MEMREF0:.*]]= gpu.alloc () : memref<8xf32> - // OPENCL: %[[MEMREF1:.*]]= gpu.alloc () : memref<8xf32> - // OPENCL: %[[MEMREF2:.*]]= gpu.alloc () : memref<8xf32> - // VULKAN: %[[MEMREF0:.*]]= memref.alloc() : memref<8xf32> - // VULKAN: %[[MEMREF1:.*]]= memref.alloc() : memref<8xf32> + // OPENCL: %[[MEMREF0:.*]] = gpu.alloc () : memref<8xf32> + // OPENCL: %[[MEMREF1:.*]] = gpu.alloc () : memref<8xf32> + // OPENCL: %[[MEMREF2:.*]] = gpu.alloc () : memref<8xf32> + // VULKAN: %[[MEMREF0:.*]] = memref.alloc() : memref<8xf32> + // VULKAN: %[[MEMREF1:.*]] = memref.alloc() : memref<8xf32> gpu.launch blocks(%arg0, %arg1, %arg2) in (%arg6 = %c8, %arg7 = %c1, %arg8 = %c1) threads(%arg3, %arg4, %arg5) in (%arg9 = %c1, %arg10 = %c1, %arg11 = %c1) { // OPENCL: gpu.launch {{.*}} // VULKAN: gpu.launch {{.*}} %7 = gpu.block_id x - // OPENCL: [[VAR0:.*]] = gpu.block_id x - // VULKAN: [[VAR0:.*]] = gpu.block_id x + // OPENCL: [[VAR0:%.*]] = gpu.block_id x + // VULKAN: [[VAR0:%.*]] = gpu.block_id x %8 = memref.load %0[%7] : memref<8xf32> %9 = memref.load %1[%7] : memref<8xf32> - // OPENCL: [[VAR1:.*]] = memref.load %[[MEMREF0:.*]][[[VAR0:.*]]] : memref<8xf32> - // OPENCL: [[VAR2:.*]] = memref.load %[[MEMREF1:.*]][[[VAR0:.*]]] : memref<8xf32> - // VULKAN: [[VAR1:.*]] = memref.load %[[MEMREF0:.*]][[[VAR0:.*]]] : memref<8xf32> - // VULKAN: [[VAR2:.*]] = memref.load %[[MEMREF1:.*]][[[VAR0:.*]]] : memref<8xf32> + // OPENCL: [[VAR1:%.*]] = memref.load %[[MEMREF0]][[[VAR0]]] : memref<8xf32> + // OPENCL: [[VAR2:%.*]] = memref.load %[[MEMREF1]][[[VAR0]]] : memref<8xf32> + // VULKAN: [[VAR1:%.*]] = memref.load %[[MEMREF0]][[[VAR0]]] : memref<8xf32> + // VULKAN: [[VAR2:%.*]] = memref.load %[[MEMREF1]][[[VAR0]]] : memref<8xf32> %10 = func.call @addf(%8, %9) : (f32, f32) -> f32 memref.store %10, %2[%7] : memref<8xf32> //%11 = func.call @cast(%2) : (memref<8xf32>) -> memref diff --git a/test/Transforms/InsertGpuAllocs/dynamic-dims.mlir b/test/Transforms/InsertGpuAllocs/dynamic-dims.mlir index ccde5f579..5eaa5986e 100644 --- a/test/Transforms/InsertGpuAllocs/dynamic-dims.mlir +++ b/test/Transforms/InsertGpuAllocs/dynamic-dims.mlir @@ -6,12 +6,12 @@ func.func @addt(%arg0: memref<2x5xf32>, %arg1: memref) -> memref<2x5xf3 %c2 = arith.constant 2 : index %c1 = arith.constant 1 : index %c5 = arith.constant 5 : index - // OPENCL: [[VAR0:.*]] = memref.dim %arg1, %c0 : memref - // OPENCL: [[VAR1:.*]] = memref.dim %arg1, %c1 : memref - // OPENCL: %[[MEMREF0:.*]] = gpu.alloc host_shared ([[VAR0:.*]], [[VAR1:.*]]) : memref - // VULKAN: [[VAR0:.*]] = memref.dim %arg1, %c0 : memref - // VULKAN: [[VAR1:.*]] = memref.dim %arg1, %c1 : memref - // VULKAN: %[[MEMREF0:.*]] = memref.alloc([[VAR0:.*]], [[VAR1:.*]]) : memref + // OPENCL: [[VAR0:%.*]] = memref.dim %arg1, %c0 : memref + // OPENCL: [[VAR1:%.*]] = memref.dim %arg1, %c1 : memref + // OPENCL: %[[MEMREF0:.*]] = gpu.alloc host_shared ([[VAR0]], [[VAR1]]) : memref + // VULKAN: [[VAR0:%.*]] = memref.dim %arg1, %c0 : memref + // VULKAN: [[VAR1:%.*]] = memref.dim %arg1, %c1 : memref + // VULKAN: %[[MEMREF0:.*]] = memref.alloc([[VAR0]], [[VAR1]]) : memref %0 = memref.alloc() {alignment = 128 : i64} : memref<2x5xf32> %c1_0 = arith.constant 1 : index %1 = affine.apply affine_map<(d0)[s0, s1] -> ((d0 - s0) ceildiv s1)>(%c2)[%c0, %c1] diff --git a/test/Transforms/InsertGpuAllocs/memref-alloc-to-gpu-alloc.mlir b/test/Transforms/InsertGpuAllocs/memref-alloc-to-gpu-alloc.mlir index 63a5d8009..ebbbc4f9d 100644 --- a/test/Transforms/InsertGpuAllocs/memref-alloc-to-gpu-alloc.mlir +++ b/test/Transforms/InsertGpuAllocs/memref-alloc-to-gpu-alloc.mlir @@ -9,24 +9,24 @@ func.func @main() { %0 = memref.alloc() : memref<8xf32> %1 = memref.alloc() : memref<8xf32> %2 = memref.alloc() : memref<8xf32> - // OPENCL: %[[MEMREF0:.*]]= gpu.alloc () : memref<8xf32> - // OPENCL: %[[MEMREF1:.*]]= gpu.alloc () : memref<8xf32> - // OPENCL: %[[MEMREF2:.*]]= gpu.alloc () : memref<8xf32> - // VULKAN: %[[MEMREF0:.*]]= memref.alloc() : memref<8xf32> - // VULKAN: %[[MEMREF1:.*]]= memref.alloc() : memref<8xf32> + // OPENCL: %[[MEMREF0:.*]] = gpu.alloc () : memref<8xf32> + // OPENCL: %[[MEMREF1:.*]] = gpu.alloc () : memref<8xf32> + // OPENCL: %[[MEMREF2:.*]] = gpu.alloc () : memref<8xf32> + // VULKAN: %[[MEMREF0:.*]] = memref.alloc() : memref<8xf32> + // VULKAN: %[[MEMREF1:.*]] = memref.alloc() : memref<8xf32> gpu.launch blocks(%arg0, %arg1, %arg2) in (%arg6 = %c8, %arg7 = %c1, %arg8 = %c1) threads(%arg3, %arg4, %arg5) in (%arg9 = %c1, %arg10 = %c1, %arg11 = %c1) { // OPENCL: gpu.launch {{.*}} // VULKAN: gpu.launch {{.*}} %7 = gpu.block_id x - // OPENCL: [[VAR0:.*]] = gpu.block_id x - // VULKAN: [[VAR0:.*]] = gpu.block_id x + // OPENCL: [[VAR0:%.*]] = gpu.block_id x + // VULKAN: [[VAR0:%.*]] = gpu.block_id x %8 = memref.load %0[%7] : memref<8xf32> %9 = memref.load %1[%7] : memref<8xf32> - // OPENCL: [[VAR1:.*]] = memref.load %[[MEMREF0:.*]][[[VAR0:.*]]] : memref<8xf32> - // OPENCL: [[VAR2:.*]] = memref.load %[[MEMREF1:.*]][[[VAR0:.*]]] : memref<8xf32> - // VULKAN: [[VAR1:.*]] = memref.load %[[MEMREF0:.*]][[[VAR0:.*]]] : memref<8xf32> - // VULKAN: [[VAR2:.*]] = memref.load %[[MEMREF1:.*]][[[VAR0:.*]]] : memref<8xf32> + // OPENCL: [[VAR1:%.*]] = memref.load %[[MEMREF0]][[[VAR0]]] : memref<8xf32> + // OPENCL: [[VAR2:%.*]] = memref.load %[[MEMREF1]][[[VAR0]]] : memref<8xf32> + // VULKAN: [[VAR1:%.*]] = memref.load %[[MEMREF0]][[[VAR0]]] : memref<8xf32> + // VULKAN: [[VAR2:%.*]] = memref.load %[[MEMREF1]][[[VAR0]]] : memref<8xf32> %10 = arith.addf %8, %9 : f32 memref.store %10, %2[%7] : memref<8xf32> gpu.terminator diff --git a/test/Transforms/InsertGpuAllocs/memref-get-global.mlir b/test/Transforms/InsertGpuAllocs/memref-get-global.mlir index 7f01305e4..2d94ebfee 100644 --- a/test/Transforms/InsertGpuAllocs/memref-get-global.mlir +++ b/test/Transforms/InsertGpuAllocs/memref-get-global.mlir @@ -15,19 +15,19 @@ func.func @addt(%arg0: memref<2x5xf32>, %arg1: memref<2x5xf32>) -> memref<2x5xf3 %1 = memref.get_global @__constant_2x5xf32_0 : memref<2x5xf32> %2 = memref.alloc() {alignment = 128 : i64} : memref<2x5xf32> - // OPENCL: [[VAR0:.*]] = memref.get_global @__constant_2x5xf32 : memref<2x5xf32> + // OPENCL: [[VAR0:%.*]] = memref.get_global @__constant_2x5xf32 : memref<2x5xf32> // OPENCL: %[[MEMREF0:.*]] = gpu.alloc host_shared () : memref<2x5xf32> - // OPENCL: memref.copy [[VAR0:.*]], %[[MEMREF0:.*]] : memref<2x5xf32> to memref<2x5xf32> - // OPENCL: [[VAR1:.*]] = memref.get_global @__constant_2x5xf32_0 : memref<2x5xf32> + // OPENCL: memref.copy [[VAR0]], %[[MEMREF0]] : memref<2x5xf32> to memref<2x5xf32> + // OPENCL: [[VAR1:%.*]] = memref.get_global @__constant_2x5xf32_0 : memref<2x5xf32> // OPENCL: %[[MEMREF1:.*]] = gpu.alloc host_shared () : memref<2x5xf32> - // OPENCL: memref.copy [[VAR1:.*]], %[[MEMREF1:.*]] : memref<2x5xf32> to memref<2x5xf32> + // OPENCL: memref.copy [[VAR1]], %[[MEMREF1]] : memref<2x5xf32> to memref<2x5xf32> // OPENCL: %[[MEMREF2:.*]] = gpu.alloc host_shared () : memref<2x5xf32> - // VULKAN: [[VAR0:.*]] = memref.get_global @__constant_2x5xf32 : memref<2x5xf32> + // VULKAN: [[VAR0:%.*]] = memref.get_global @__constant_2x5xf32 : memref<2x5xf32> // VULKAN: %[[MEMREF0:.*]] = memref.alloc() : memref<2x5xf32> - // VULKAN: memref.copy [[VAR0:.*]], %[[MEMREF0:.*]] : memref<2x5xf32> to memref<2x5xf32> - // VULKAN: [[VAR1:.*]] = memref.get_global @__constant_2x5xf32_0 : memref<2x5xf32> + // VULKAN: memref.copy [[VAR0]], %[[MEMREF0]] : memref<2x5xf32> to memref<2x5xf32> + // VULKAN: [[VAR1:%.*]] = memref.get_global @__constant_2x5xf32_0 : memref<2x5xf32> // VULKAN: %[[MEMREF1:.*]] = memref.alloc() : memref<2x5xf32> - // VULKAN: memref.copy [[VAR1:.*]], %[[MEMREF1:.*]] : memref<2x5xf32> to memref<2x5xf32> + // VULKAN: memref.copy [[VAR1]], %[[MEMREF1]] : memref<2x5xf32> to memref<2x5xf32> %c1_0 = arith.constant 1 : index %3 = affine.apply affine_map<(d0)[s0, s1] -> ((d0 - s0) ceildiv s1)>(%c2)[%c0, %c1] @@ -42,10 +42,10 @@ func.func @addt(%arg0: memref<2x5xf32>, %arg1: memref<2x5xf32>) -> memref<2x5xf3 %7 = memref.load %0[%5, %6] : memref<2x5xf32> %8 = memref.load %1[%5, %6] : memref<2x5xf32> - // OPENCL: [[VAR2:.*]] = memref.load %[[MEMREF0:.*]][%4, %5] : memref<2x5xf32> - // OPENCL: [[VAR3:.*]] = memref.load %[[MEMREF1:.*]][%4, %5] : memref<2x5xf32> - // VULKAN: [[VAR2:.*]] = memref.load %[[MEMREF0:.*]][%4, %5] : memref<2x5xf32> - // VULKAN: [[VAR3:.*]] = memref.load %[[MEMREF1:.*]][%4, %5] : memref<2x5xf32> + // OPENCL: [[VAR2:%.*]] = memref.load %[[MEMREF0]][%4, %5] : memref<2x5xf32> + // OPENCL: [[VAR3:%.*]] = memref.load %[[MEMREF1]][%4, %5] : memref<2x5xf32> + // VULKAN: [[VAR2:%.*]] = memref.load %[[MEMREF0]][%4, %5] : memref<2x5xf32> + // VULKAN: [[VAR3:%.*]] = memref.load %[[MEMREF1]][%4, %5] : memref<2x5xf32> %9 = arith.addf %7, %8 : f32 memref.store %9, %2[%5, %6] : memref<2x5xf32> diff --git a/test/Transforms/InsertGpuAllocs/memref-returned-from-call.mlir b/test/Transforms/InsertGpuAllocs/memref-returned-from-call.mlir index 34f1194ff..62af82f4f 100644 --- a/test/Transforms/InsertGpuAllocs/memref-returned-from-call.mlir +++ b/test/Transforms/InsertGpuAllocs/memref-returned-from-call.mlir @@ -12,17 +12,17 @@ func.func @main() { // OPENCL: func.func @main() %0 = func.call @alloc_buffer() : () -> memref<8xf32> // OPENCL: %[[MEMREF:.*]] = gpu.alloc host_shared () : memref<8xf32> - // OPENCL: memref.copy %0, %[[MEMREF:.*]] : memref<8xf32> to memref<8xf32> + // OPENCL: memref.copy %0, %[[MEMREF]] : memref<8xf32> to memref<8xf32> %1 = memref.alloc() : memref<8xf32> %2 = memref.alloc() : memref<8xf32> gpu.launch blocks(%arg0, %arg1, %arg2) in (%arg6 = %c8, %arg7 = %c1, %arg8 = %c1) threads(%arg3, %arg4, %arg5) in (%arg9 = %c1, %arg10 = %c1, %arg11 = %c1) { // OPENCL: gpu.launch {{.*}} // VULKAN: gpu.launch {{.*}} %7 = gpu.block_id x - // OPENCL: [[VAR0:.*]] = gpu.block_id x - // VULKAN: [[VAR0:.*]] = gpu.block_id x + // OPENCL: [[VAR0:%.*]] = gpu.block_id x + // VULKAN: [[VAR0:%.*]] = gpu.block_id x - // OPENCL: [[VAR1:.*]] = memref.load %[[MEMREF:.*]][[[VAR0:.*]]] : memref<8xf32> + // OPENCL: [[VAR1:%.*]] = memref.load %[[MEMREF]][[[VAR0]]] : memref<8xf32> %8 = memref.load %0[%7] : memref<8xf32> %9 = memref.load %1[%7] : memref<8xf32> %10 = func.call @addf(%8, %9) : (f32, f32) -> f32