Skip to content

Commit

Permalink
remocing heuristic
Browse files Browse the repository at this point in the history
  • Loading branch information
leios committed Jul 25, 2024
1 parent 3560049 commit 0c7e26b
Show file tree
Hide file tree
Showing 3 changed files with 6 additions and 60 deletions.
1 change: 0 additions & 1 deletion src/GPUArrays.jl
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ using Reexport
@reexport using GPUArraysCore

## executed on-device
include("device/execution.jl")
include("device/abstractarray.jl")

using KernelAbstractions
Expand Down
39 changes: 0 additions & 39 deletions src/device/execution.jl

This file was deleted.

26 changes: 6 additions & 20 deletions src/host/broadcast.jl
Original file line number Diff line number Diff line change
Expand Up @@ -117,28 +117,14 @@ function Base.map!(f, dest::AnyGPUArray, xs::AbstractArray...)
end

# grid-stride kernel
@kernel function map_kernel(dest, bc, nelem, common_length)

j = 0
J = @index(Global, Linear)
for i in 1:nelem
j += 1
if j <= common_length

J_c = CartesianIndices(axes(bc))[(J-1)*nelem + j]
@inbounds dest[J_c] = bc[J_c]
end
end
@kernel function map_kernel(dest, bc)
j = @index(Global, Linear)
@inbounds dest[j] = bc[j]
end
elements = common_length
elements_per_thread = typemax(Int)

kernel = map_kernel(get_backend(dest))
heuristic = launch_heuristic(get_backend(dest), kernel, dest, bc, 1,
common_length; elements, elements_per_thread)
config = launch_configuration(get_backend(dest), heuristic;
elements, elements_per_thread)
kernel(dest, bc, config.elements_per_thread,
common_length; ndrange = config.threads)
config = KernelAbstractions.launch_config(kernel, common_length, nothing)
kernel(dest, bc; ndrange = config[1], workgroupsize = config[2])

if eltype(dest) <: BrokenBroadcast
throw(ArgumentError("Map operation resulting in $(eltype(eltype(dest))) is not GPU compatible"))
Expand Down

0 comments on commit 0c7e26b

Please sign in to comment.