diff --git a/ext/EvoTreesCUDAExt/EvoTreesCUDAExt.jl b/ext/EvoTreesCUDAExt/EvoTreesCUDAExt.jl index 2cc93cd5..8d0f7a8c 100644 --- a/ext/EvoTreesCUDAExt/EvoTreesCUDAExt.jl +++ b/ext/EvoTreesCUDAExt/EvoTreesCUDAExt.jl @@ -4,9 +4,9 @@ using EvoTrees using CUDA # This should be different on CPUs and GPUs -EvoTrees.device_ones(::Type{<:GPU}, ::Type{T}, n::Int) where {T} = CUDA.ones(T, n) -EvoTrees.device_array_type(::Type{<:GPU}) = CuArray -function EvoTrees.post_fit_gc(::Type{<:GPU}) +EvoTrees.device_ones(::Type{<:EvoTrees.GPU}, ::Type{T}, n::Int) where {T} = CUDA.ones(T, n) +EvoTrees.device_array_type(::Type{<:EvoTrees.GPU}) = CuArray +function EvoTrees.post_fit_gc(::Type{<:EvoTrees.GPU}) GC.gc(true) CUDA.reclaim() end diff --git a/ext/EvoTreesCUDAExt/fit.jl b/ext/EvoTreesCUDAExt/fit.jl index 92994b63..d0e12fa9 100644 --- a/ext/EvoTreesCUDAExt/fit.jl +++ b/ext/EvoTreesCUDAExt/fit.jl @@ -1,4 +1,4 @@ -function EvoTrees.grow_evotree!(evotree::EvoTree{L,K}, cache, params::EvoTrees.EvoTypes{L}, ::Type{GPU}) where {L,K} +function EvoTrees.grow_evotree!(evotree::EvoTree{L,K}, cache, params::EvoTrees.EvoTypes{L}, ::Type{<:EvoTrees.GPU}) where {L,K} # compute gradients EvoTrees.update_grads!(cache.∇, cache.pred, cache.y, params) @@ -90,7 +90,7 @@ function grow_tree!( update_hist_gpu!(nodes[n].h, h∇, ∇, x_bin, nodes[n].is, jsg, js) end end - @threads for n ∈ sort(n_current) + Threads.@threads for n ∈ sort(n_current) EvoTrees.update_gains!(nodes[n], js, params, feattypes, monotone_constraints) end end @@ -217,7 +217,7 @@ function grow_otree!( update_hist_gpu!(nodes[n].h, h∇, ∇, x_bin, nodes[n].is, jsg, js) end end - @threads for n ∈ n_current + Threads.@threads for n ∈ n_current EvoTrees.update_gains!(nodes[n], js, params, feattypes, monotone_constraints) end diff --git a/ext/EvoTreesCUDAExt/init.jl b/ext/EvoTreesCUDAExt/init.jl index be23a91d..6a8dfcda 100644 --- a/ext/EvoTreesCUDAExt/init.jl +++ b/ext/EvoTreesCUDAExt/init.jl @@ -1,4 +1,4 @@ -function EvoTrees.init_core(params::EvoTrees.EvoTypes{L}, ::Type{EvoTrees.GPU}, data, fnames, y_train, w, offset) where {L} +function EvoTrees.init_core(params::EvoTrees.EvoTypes{L}, ::Type{<:EvoTrees.GPU}, data, fnames, y_train, w, offset) where {L} # binarize data into quantiles edges, featbins, feattypes = EvoTrees.get_edges(data; fnames, nbins=params.nbins, rng=params.rng) diff --git a/ext/EvoTreesCUDAExt/predict.jl b/ext/EvoTreesCUDAExt/predict.jl index c5bb0fcb..3eaabd77 100644 --- a/ext/EvoTreesCUDAExt/predict.jl +++ b/ext/EvoTreesCUDAExt/predict.jl @@ -33,7 +33,7 @@ end GradientRegression """ function predict_kernel!( - ::Type{L}, + ::Type{<:EvoTrees.GradientRegression}, pred::CuDeviceMatrix{T}, split, feats, @@ -41,7 +41,7 @@ function predict_kernel!( leaf_pred, x_bin, feattypes, -) where {L<:EvoTrees.GradientRegression,T} +) where {T} i = threadIdx().x + (blockIdx().x - 1) * blockDim().x nid = 1 @inbounds if i <= size(pred, 2) @@ -61,7 +61,7 @@ end Logistic """ function predict_kernel!( - ::Type{L}, + ::Type{<:EvoTrees.LogLoss}, pred::CuDeviceMatrix{T}, split, feats, @@ -69,7 +69,7 @@ function predict_kernel!( leaf_pred, x_bin, feattypes, -) where {L<:EvoTrees.LogLoss,T} +) where {T} i = threadIdx().x + (blockIdx().x - 1) * blockDim().x nid = 1 @inbounds if i <= size(pred, 2) @@ -89,7 +89,7 @@ end MLE2P """ function predict_kernel!( - ::Type{L}, + ::Type{<:EvoTrees.MLE2P}, pred::CuDeviceMatrix{T}, split, feats, @@ -97,7 +97,7 @@ function predict_kernel!( leaf_pred, x_bin, feattypes, -) where {L<:EvoTrees.MLE2P,T} +) where {T} i = threadIdx().x + (blockIdx().x - 1) * blockDim().x nid = 1 @inbounds if i <= size(pred, 2) @@ -165,13 +165,13 @@ end function predict( m::EvoTree{L,K}, data, - ::Type{GPU}; + ::Type{<:EvoTrees.GPU}; ntree_limit=length(m.trees)) where {L,K} pred = CUDA.zeros(K, size(data, 1)) ntrees = length(m.trees) ntree_limit > ntrees && error("ntree_limit is larger than number of trees $ntrees.") - x_bin = CuArray(binarize(data; fnames=m.info[:fnames], edges=m.info[:edges])) + x_bin = CuArray(EvoTrees.binarize(data; fnames=m.info[:fnames], edges=m.info[:edges])) feattypes = CuArray(m.info[:feattypes]) for i = 1:ntree_limit EvoTrees.predict!(pred, m.trees[i], x_bin, feattypes)