From e47e804cec2ccab94953513c664c7ac085b9082c Mon Sep 17 00:00:00 2001 From: "jeremie.desgagne.bouchard" Date: Wed, 25 Oct 2023 02:31:52 -0400 Subject: [PATCH] replace @spawn with @threads --- benchmarks/regressor-df.jl | 24 ++++++------- benchmarks/regressor.jl | 70 +++++++++++++++++++------------------- src/fit-utils.jl | 63 ++++++++++++++++------------------ 3 files changed, 76 insertions(+), 81 deletions(-) diff --git a/benchmarks/regressor-df.jl b/benchmarks/regressor-df.jl index 19a23017..aa698ba6 100644 --- a/benchmarks/regressor-df.jl +++ b/benchmarks/regressor-df.jl @@ -6,9 +6,9 @@ using EvoTrees using DataFrames using BenchmarkTools using Random: seed! -import CUDA +# import CUDA -nobs = Int(1e6) +nobs = Int(1e7) num_feat = Int(100) nrounds = 200 T = Float64 @@ -94,13 +94,13 @@ device = "cpu" @time pred_evo = m_evo(dtrain); @btime m_evo($dtrain); -@info "EvoTrees GPU" -device = "gpu" -@info "train" -@time m_evo = fit_evotree(params_evo, dtrain; target_name, deval=dtrain, metric=metric_evo, device, verbosity, print_every_n=100); -@time m_evo = fit_evotree(params_evo, dtrain; target_name, deval=dtrain, metric=metric_evo, device, verbosity, print_every_n=100); -# @btime m_evo = fit_evotree($params_evo, $dtrain; target_name, device); -# @btime fit_evotree($params_evo, $dtrain; target_name, deval=dtrain, metric=metric_evo, device, verbosity, print_every_n=100); -@info "predict" -@time pred_evo = m_evo(dtrain; device); -@btime m_evo($dtrain; device); +# @info "EvoTrees GPU" +# device = "gpu" +# @info "train" +# @time m_evo = fit_evotree(params_evo, dtrain; target_name, deval=dtrain, metric=metric_evo, device, verbosity, print_every_n=100); +# @time m_evo = fit_evotree(params_evo, dtrain; target_name, deval=dtrain, metric=metric_evo, device, verbosity, print_every_n=100); +# # @btime m_evo = fit_evotree($params_evo, $dtrain; target_name, device); +# # @btime fit_evotree($params_evo, $dtrain; target_name, deval=dtrain, metric=metric_evo, device, verbosity, print_every_n=100); +# @info "predict" +# @time pred_evo = m_evo(dtrain; device); +# @btime m_evo($dtrain; device); diff --git a/benchmarks/regressor.jl b/benchmarks/regressor.jl index 5e20a326..ebc00926 100644 --- a/benchmarks/regressor.jl +++ b/benchmarks/regressor.jl @@ -6,7 +6,7 @@ using XGBoost using EvoTrees using BenchmarkTools using Random: seed! -import CUDA +# import CUDA ### v.0.15.1 # desktop | 1e6 | depth 11 | cpu: 37.2s @@ -45,27 +45,27 @@ elseif loss == "logloss" metric_evo = :logloss end -@info "XGBoost" -@info "train" -params_xgb = Dict( - :num_round => nrounds, - :max_depth => max_depth - 1, - :eta => 0.05, - :objective => loss_xgb, - :print_every_n => 5, - :subsample => 0.5, - :colsample_bytree => 0.5, - :tree_method => "hist", # hist/gpu_hist - :max_bin => 64, -) +# @info "XGBoost" +# @info "train" +# params_xgb = Dict( +# :num_round => nrounds, +# :max_depth => max_depth - 1, +# :eta => 0.05, +# :objective => loss_xgb, +# :print_every_n => 5, +# :subsample => 0.5, +# :colsample_bytree => 0.5, +# :tree_method => "hist", # hist/gpu_hist +# :max_bin => 64, +# ) -dtrain = DMatrix(x_train, y_train) -watchlist = Dict("train" => DMatrix(x_train, y_train)); -@time m_xgb = xgboost(dtrain; watchlist, nthread=nthread, verbosity=0, eval_metric=metric_xgb, params_xgb...); -# @btime m_xgb = xgboost($dtrain; watchlist, nthread=nthread, verbosity=0, eval_metric = metric_xgb, params_xgb...); -@info "predict" -@time pred_xgb = XGBoost.predict(m_xgb, x_train); -# @btime XGBoost.predict($m_xgb, $x_train); +# dtrain = DMatrix(x_train, y_train) +# watchlist = Dict("train" => DMatrix(x_train, y_train)); +# @time m_xgb = xgboost(dtrain; watchlist, nthread=nthread, verbosity=0, eval_metric=metric_xgb, params_xgb...); +# # @btime m_xgb = xgboost($dtrain; watchlist, nthread=nthread, verbosity=0, eval_metric = metric_xgb, params_xgb...); +# @info "predict" +# @time pred_xgb = XGBoost.predict(m_xgb, x_train); +# # @btime XGBoost.predict($m_xgb, $x_train); # @info "lightgbm train:" # m_gbm = LGBMRegression( @@ -135,17 +135,17 @@ device = "cpu" @time pred_evo = m_evo(x_train); # @btime m_evo($x_train); -@info "EvoTrees GPU" -device = "gpu" -# @info "train - no eval" -# CUDA.@time m_evo = fit_evotree(params_evo; x_train, y_train, device, verbosity, print_every_n=100); -# CUDA.@time m_evo = fit_evotree(params_evo; x_train, y_train, device, verbosity, print_every_n=100); -@info "train - eval" -CUDA.@time m_evo = fit_evotree(params_evo; x_train, y_train, x_eval=x_train, y_eval=y_train, metric=metric_evo, device, verbosity, print_every_n=100); -CUDA.@time m_evo = fit_evotree(params_evo; x_train, y_train, x_eval=x_train, y_eval=y_train, metric=metric_evo, device, verbosity, print_every_n=100); -# @time m_evo = fit_evotree(params_evo; x_train, y_train); -# @btime fit_evotree($params_evo; x_train=$x_train, y_train=$y_train, x_eval=$x_train, y_eval=$y_train, metric=metric_evo, device, verbosity); -@info "predict" -CUDA.@time pred_evo = m_evo(x_train; device); -CUDA.@time pred_evo = m_evo(x_train; device); -# @btime m_evo($x_train; device); +# @info "EvoTrees GPU" +# device = "gpu" +# # @info "train - no eval" +# # CUDA.@time m_evo = fit_evotree(params_evo; x_train, y_train, device, verbosity, print_every_n=100); +# # CUDA.@time m_evo = fit_evotree(params_evo; x_train, y_train, device, verbosity, print_every_n=100); +# @info "train - eval" +# CUDA.@time m_evo = fit_evotree(params_evo; x_train, y_train, x_eval=x_train, y_eval=y_train, metric=metric_evo, device, verbosity, print_every_n=100); +# CUDA.@time m_evo = fit_evotree(params_evo; x_train, y_train, x_eval=x_train, y_eval=y_train, metric=metric_evo, device, verbosity, print_every_n=100); +# # @time m_evo = fit_evotree(params_evo; x_train, y_train); +# # @btime fit_evotree($params_evo; x_train=$x_train, y_train=$y_train, x_eval=$x_train, y_eval=$y_train, metric=metric_evo, device, verbosity); +# @info "predict" +# CUDA.@time pred_evo = m_evo(x_train; device); +# CUDA.@time pred_evo = m_evo(x_train; device); +# # @btime m_evo($x_train; device); diff --git a/src/fit-utils.jl b/src/fit-utils.jl index a29e2808..8ea83790 100644 --- a/src/fit-utils.jl +++ b/src/fit-utils.jl @@ -175,45 +175,40 @@ function split_set_threads!( lefts = zeros(Int, nblocks) rights = zeros(Int, nblocks) - @sync begin - for bid = 1:nblocks - @spawn begin - lefts[bid], rights[bid] = split_set_chunk!( - left, - right, - is, - bid, - nblocks, - x_bin, - feat, - cond_bin, - feattype, - offset, - chunk_size, - ) - end - end + @threads for bid = 1:nblocks + lefts[bid], rights[bid] = split_set_chunk!( + left, + right, + is, + bid, + nblocks, + x_bin, + feat, + cond_bin, + feattype, + offset, + chunk_size, + ) end sum_lefts = sum(lefts) cumsum_lefts = cumsum(lefts) cumsum_rights = cumsum(rights) - @sync begin - for bid = 1:nblocks - @spawn split_views_kernel!( - out, - left, - right, - bid, - offset, - chunk_size, - lefts, - rights, - sum_lefts, - cumsum_lefts, - cumsum_rights, - ) - end + + @threads for bid = 1:nblocks + split_views_kernel!( + out, + left, + right, + bid, + offset, + chunk_size, + lefts, + rights, + sum_lefts, + cumsum_lefts, + cumsum_rights, + ) end return (