From e47e804cec2ccab94953513c664c7ac085b9082c Mon Sep 17 00:00:00 2001
From: "jeremie.desgagne.bouchard" <jeremie.desgagne.bouchard@gmail.com>
Date: Wed, 25 Oct 2023 02:31:52 -0400
Subject: [PATCH] replace @spawn with @threads

---
 benchmarks/regressor-df.jl | 24 ++++++-------
 benchmarks/regressor.jl    | 70 +++++++++++++++++++-------------------
 src/fit-utils.jl           | 63 ++++++++++++++++------------------
 3 files changed, 76 insertions(+), 81 deletions(-)

diff --git a/benchmarks/regressor-df.jl b/benchmarks/regressor-df.jl
index 19a23017..aa698ba6 100644
--- a/benchmarks/regressor-df.jl
+++ b/benchmarks/regressor-df.jl
@@ -6,9 +6,9 @@ using EvoTrees
 using DataFrames
 using BenchmarkTools
 using Random: seed!
-import CUDA
+# import CUDA
 
-nobs = Int(1e6)
+nobs = Int(1e7)
 num_feat = Int(100)
 nrounds = 200
 T = Float64
@@ -94,13 +94,13 @@ device = "cpu"
 @time pred_evo = m_evo(dtrain);
 @btime m_evo($dtrain);
 
-@info "EvoTrees GPU"
-device = "gpu"
-@info "train"
-@time m_evo = fit_evotree(params_evo, dtrain; target_name, deval=dtrain, metric=metric_evo, device, verbosity, print_every_n=100);
-@time m_evo = fit_evotree(params_evo, dtrain; target_name, deval=dtrain, metric=metric_evo, device, verbosity, print_every_n=100);
-# @btime m_evo = fit_evotree($params_evo, $dtrain; target_name, device);
-# @btime fit_evotree($params_evo, $dtrain; target_name, deval=dtrain, metric=metric_evo, device, verbosity, print_every_n=100);
-@info "predict"
-@time pred_evo = m_evo(dtrain; device);
-@btime m_evo($dtrain; device);
+# @info "EvoTrees GPU"
+# device = "gpu"
+# @info "train"
+# @time m_evo = fit_evotree(params_evo, dtrain; target_name, deval=dtrain, metric=metric_evo, device, verbosity, print_every_n=100);
+# @time m_evo = fit_evotree(params_evo, dtrain; target_name, deval=dtrain, metric=metric_evo, device, verbosity, print_every_n=100);
+# # @btime m_evo = fit_evotree($params_evo, $dtrain; target_name, device);
+# # @btime fit_evotree($params_evo, $dtrain; target_name, deval=dtrain, metric=metric_evo, device, verbosity, print_every_n=100);
+# @info "predict"
+# @time pred_evo = m_evo(dtrain; device);
+# @btime m_evo($dtrain; device);
diff --git a/benchmarks/regressor.jl b/benchmarks/regressor.jl
index 5e20a326..ebc00926 100644
--- a/benchmarks/regressor.jl
+++ b/benchmarks/regressor.jl
@@ -6,7 +6,7 @@ using XGBoost
 using EvoTrees
 using BenchmarkTools
 using Random: seed!
-import CUDA
+# import CUDA
 
 ### v.0.15.1
 # desktop | 1e6 | depth 11 | cpu: 37.2s
@@ -45,27 +45,27 @@ elseif loss == "logloss"
     metric_evo = :logloss
 end
 
-@info "XGBoost"
-@info "train"
-params_xgb = Dict(
-    :num_round => nrounds,
-    :max_depth => max_depth - 1,
-    :eta => 0.05,
-    :objective => loss_xgb,
-    :print_every_n => 5,
-    :subsample => 0.5,
-    :colsample_bytree => 0.5,
-    :tree_method => "hist", # hist/gpu_hist
-    :max_bin => 64,
-)
+# @info "XGBoost"
+# @info "train"
+# params_xgb = Dict(
+#     :num_round => nrounds,
+#     :max_depth => max_depth - 1,
+#     :eta => 0.05,
+#     :objective => loss_xgb,
+#     :print_every_n => 5,
+#     :subsample => 0.5,
+#     :colsample_bytree => 0.5,
+#     :tree_method => "hist", # hist/gpu_hist
+#     :max_bin => 64,
+# )
 
-dtrain = DMatrix(x_train, y_train)
-watchlist = Dict("train" => DMatrix(x_train, y_train));
-@time m_xgb = xgboost(dtrain; watchlist, nthread=nthread, verbosity=0, eval_metric=metric_xgb, params_xgb...);
-# @btime m_xgb = xgboost($dtrain; watchlist, nthread=nthread, verbosity=0, eval_metric = metric_xgb, params_xgb...);
-@info "predict"
-@time pred_xgb = XGBoost.predict(m_xgb, x_train);
-# @btime XGBoost.predict($m_xgb, $x_train);
+# dtrain = DMatrix(x_train, y_train)
+# watchlist = Dict("train" => DMatrix(x_train, y_train));
+# @time m_xgb = xgboost(dtrain; watchlist, nthread=nthread, verbosity=0, eval_metric=metric_xgb, params_xgb...);
+# # @btime m_xgb = xgboost($dtrain; watchlist, nthread=nthread, verbosity=0, eval_metric = metric_xgb, params_xgb...);
+# @info "predict"
+# @time pred_xgb = XGBoost.predict(m_xgb, x_train);
+# # @btime XGBoost.predict($m_xgb, $x_train);
 
 # @info "lightgbm train:"
 # m_gbm = LGBMRegression(
@@ -135,17 +135,17 @@ device = "cpu"
 @time pred_evo = m_evo(x_train);
 # @btime m_evo($x_train);
 
-@info "EvoTrees GPU"
-device = "gpu"
-# @info "train - no eval"
-# CUDA.@time m_evo = fit_evotree(params_evo; x_train, y_train, device, verbosity, print_every_n=100);
-# CUDA.@time m_evo = fit_evotree(params_evo; x_train, y_train, device, verbosity, print_every_n=100);
-@info "train - eval"
-CUDA.@time m_evo = fit_evotree(params_evo; x_train, y_train, x_eval=x_train, y_eval=y_train, metric=metric_evo, device, verbosity, print_every_n=100);
-CUDA.@time m_evo = fit_evotree(params_evo; x_train, y_train, x_eval=x_train, y_eval=y_train, metric=metric_evo, device, verbosity, print_every_n=100);
-# @time m_evo = fit_evotree(params_evo; x_train, y_train);
-# @btime fit_evotree($params_evo; x_train=$x_train, y_train=$y_train, x_eval=$x_train, y_eval=$y_train, metric=metric_evo, device, verbosity);
-@info "predict"
-CUDA.@time pred_evo = m_evo(x_train; device);
-CUDA.@time pred_evo = m_evo(x_train; device);
-# @btime m_evo($x_train; device);
+# @info "EvoTrees GPU"
+# device = "gpu"
+# # @info "train - no eval"
+# # CUDA.@time m_evo = fit_evotree(params_evo; x_train, y_train, device, verbosity, print_every_n=100);
+# # CUDA.@time m_evo = fit_evotree(params_evo; x_train, y_train, device, verbosity, print_every_n=100);
+# @info "train - eval"
+# CUDA.@time m_evo = fit_evotree(params_evo; x_train, y_train, x_eval=x_train, y_eval=y_train, metric=metric_evo, device, verbosity, print_every_n=100);
+# CUDA.@time m_evo = fit_evotree(params_evo; x_train, y_train, x_eval=x_train, y_eval=y_train, metric=metric_evo, device, verbosity, print_every_n=100);
+# # @time m_evo = fit_evotree(params_evo; x_train, y_train);
+# # @btime fit_evotree($params_evo; x_train=$x_train, y_train=$y_train, x_eval=$x_train, y_eval=$y_train, metric=metric_evo, device, verbosity);
+# @info "predict"
+# CUDA.@time pred_evo = m_evo(x_train; device);
+# CUDA.@time pred_evo = m_evo(x_train; device);
+# # @btime m_evo($x_train; device);
diff --git a/src/fit-utils.jl b/src/fit-utils.jl
index a29e2808..8ea83790 100644
--- a/src/fit-utils.jl
+++ b/src/fit-utils.jl
@@ -175,45 +175,40 @@ function split_set_threads!(
     lefts = zeros(Int, nblocks)
     rights = zeros(Int, nblocks)
 
-    @sync begin
-        for bid = 1:nblocks
-            @spawn begin
-                lefts[bid], rights[bid] = split_set_chunk!(
-                    left,
-                    right,
-                    is,
-                    bid,
-                    nblocks,
-                    x_bin,
-                    feat,
-                    cond_bin,
-                    feattype,
-                    offset,
-                    chunk_size,
-                )
-            end
-        end
+    @threads for bid = 1:nblocks
+        lefts[bid], rights[bid] = split_set_chunk!(
+            left,
+            right,
+            is,
+            bid,
+            nblocks,
+            x_bin,
+            feat,
+            cond_bin,
+            feattype,
+            offset,
+            chunk_size,
+        )
     end
 
     sum_lefts = sum(lefts)
     cumsum_lefts = cumsum(lefts)
     cumsum_rights = cumsum(rights)
-    @sync begin
-        for bid = 1:nblocks
-            @spawn split_views_kernel!(
-                out,
-                left,
-                right,
-                bid,
-                offset,
-                chunk_size,
-                lefts,
-                rights,
-                sum_lefts,
-                cumsum_lefts,
-                cumsum_rights,
-            )
-        end
+
+    @threads for bid = 1:nblocks
+        split_views_kernel!(
+            out,
+            left,
+            right,
+            bid,
+            offset,
+            chunk_size,
+            lefts,
+            rights,
+            sum_lefts,
+            cumsum_lefts,
+            cumsum_rights,
+        )
     end
 
     return (