Skip to content

Commit

Permalink
[InsertGPUAllocs] Use gpu.memcpy for opencl instead of memref.copy
Browse files Browse the repository at this point in the history
  • Loading branch information
AndreyPavlenko committed Aug 30, 2024
1 parent 5c647e7 commit 198bc35
Show file tree
Hide file tree
Showing 4 changed files with 15 additions and 10 deletions.
15 changes: 10 additions & 5 deletions lib/Transforms/InsertGPUAllocs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -360,8 +360,10 @@ class InsertGPUAllocsPass final
auto newAlloc = builder.create<mlir::memref::AllocOp>(
loc, alloc.getType(), alloc.getDynamicSizes(),
alloc.getSymbolOperands());
builder.create<mlir::memref::CopyOp>(loc, allocResult,
newAlloc.getResult());
builder.create<mlir::gpu::MemcpyOp>(
loc, /*asyncToken*/ static_cast<mlir::Type>(nullptr),
/*asyncDependencies*/ std::nullopt, newAlloc.getResult(),
allocResult);
use.set(newAlloc.getResult());
}
}
Expand Down Expand Up @@ -401,8 +403,9 @@ class InsertGPUAllocsPass final
/*symbolOperands*/ std::nullopt, hostShared);
auto allocResult = gpuAlloc.getResult(0);
if (access.hostWrite && access.deviceRead) {
auto copy =
builder.create<mlir::memref::CopyOp>(loc, op, allocResult);
auto copy = builder.create<mlir::gpu::MemcpyOp>(
loc, /*asyncToken*/ static_cast<mlir::Type>(nullptr),
/*asyncDependencies*/ std::nullopt, allocResult, op);
filter.insert(copy);
}

Expand All @@ -421,7 +424,9 @@ class InsertGPUAllocsPass final
op.replaceAllUsesExcept(allocResult, filter);
builder.setInsertionPoint(term);
if (access.hostRead && access.deviceWrite) {
builder.create<mlir::memref::CopyOp>(loc, allocResult, op);
builder.create<mlir::gpu::MemcpyOp>(
loc, /*asyncToken*/ static_cast<mlir::Type>(nullptr),
/*asyncDependencies*/ std::nullopt, op, allocResult);
}
builder.create<mlir::gpu::DeallocOp>(loc, std::nullopt, allocResult);
}
Expand Down
4 changes: 2 additions & 2 deletions test/Transforms/InsertGpuAllocs/add-gpu-alloc.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@ func.func @addt(%arg0: memref<2x5xf32>, %arg1: memref<2x5xf32>) -> memref<2x5xf3
%c1 = arith.constant 1 : index
%c5 = arith.constant 5 : index
// OPENCL: %[[MEMREF0:.*]] = gpu.alloc host_shared () : memref<2x5xf32>
// OPENCL: memref.copy %arg1, %[[MEMREF0]] : memref<2x5xf32> to memref<2x5xf32>
// OPENCL: gpu.memcpy %[[MEMREF0]], %arg1 : memref<2x5xf32>, memref<2x5xf32>
// OPENCL: %[[MEMREF1:.*]] = gpu.alloc host_shared () : memref<2x5xf32>
// OPENCL: memref.copy %arg0, %[[MEMREF1]] : memref<2x5xf32> to memref<2x5xf32>
// OPENCL: gpu.memcpy %[[MEMREF1]], %arg0 : memref<2x5xf32>, memref<2x5xf32>
// VULKAN: %[[MEMREF0:.*]] = memref.alloc() : memref<2x5xf32>
// VULKAN: memref.copy %arg1, %[[MEMREF0]] : memref<2x5xf32> to memref<2x5xf32>
// VULKAN: %[[MEMREF1:.*]] = memref.alloc() : memref<2x5xf32>
Expand Down
4 changes: 2 additions & 2 deletions test/Transforms/InsertGpuAllocs/memref-get-global.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,10 @@ func.func @addt(%arg0: memref<2x5xf32>, %arg1: memref<2x5xf32>) -> memref<2x5xf3

// OPENCL: [[VAR0:%.*]] = memref.get_global @__constant_2x5xf32 : memref<2x5xf32>
// OPENCL: %[[MEMREF0:.*]] = gpu.alloc host_shared () : memref<2x5xf32>
// OPENCL: memref.copy [[VAR0]], %[[MEMREF0]] : memref<2x5xf32> to memref<2x5xf32>
// OPENCL: gpu.memcpy %[[MEMREF0]], [[VAR0]] : memref<2x5xf32>, memref<2x5xf32>
// OPENCL: [[VAR1:%.*]] = memref.get_global @__constant_2x5xf32_0 : memref<2x5xf32>
// OPENCL: %[[MEMREF1:.*]] = gpu.alloc host_shared () : memref<2x5xf32>
// OPENCL: memref.copy [[VAR1]], %[[MEMREF1]] : memref<2x5xf32> to memref<2x5xf32>
// OPENCL: gpu.memcpy %[[MEMREF1]], [[VAR1]] : memref<2x5xf32>, memref<2x5xf32>
// OPENCL: %[[MEMREF2:.*]] = gpu.alloc host_shared () : memref<2x5xf32>
// VULKAN: [[VAR0:%.*]] = memref.get_global @__constant_2x5xf32 : memref<2x5xf32>
// VULKAN: %[[MEMREF0:.*]] = memref.alloc() : memref<2x5xf32>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ func.func @main() {
// OPENCL: func.func @main()
%0 = func.call @alloc_buffer() : () -> memref<8xf32>
// OPENCL: %[[MEMREF:.*]] = gpu.alloc host_shared () : memref<8xf32>
// OPENCL: memref.copy %0, %[[MEMREF]] : memref<8xf32> to memref<8xf32>
// OPENCL: gpu.memcpy %[[MEMREF]], %0 : memref<8xf32>, memref<8xf32>
%1 = memref.alloc() : memref<8xf32>
%2 = memref.alloc() : memref<8xf32>
gpu.launch blocks(%arg0, %arg1, %arg2) in (%arg6 = %c8, %arg7 = %c1, %arg8 = %c1) threads(%arg3, %arg4, %arg5) in (%arg9 = %c1, %arg10 = %c1, %arg11 = %c1) {
Expand Down

0 comments on commit 198bc35

Please sign in to comment.