diff --git a/benchmarks/regressor.jl b/benchmarks/regressor.jl
index 245a502..e7a2c43 100644
--- a/benchmarks/regressor.jl
+++ b/benchmarks/regressor.jl
@@ -1,14 +1,12 @@
-using Revise
+using CUDA
 using DataFrames
 using CSV
 using Statistics
 using StatsBase: sample
 using XGBoost
-# using LightGBM
 using EvoTrees
 using BenchmarkTools
 using Random: seed!
-import CUDA
 
 ### v.0.15.1
 # desktop | 1e6 | depth 11 | cpu: 37.2s
@@ -18,28 +16,42 @@ import CUDA
 # desktop | 1e6 | depth 11 | cpu: 31s gpu: 50 sec  | xgboost cpu: 26s
 # desktop | 10e6 | depth 11 | cpu 200s gpu: 80 sec | xgboost cpu: 267s
 
-#threads
-# laptop depth 6: 12.717845 seconds (2.08 M allocations: 466.228 MiB)
-
-# for device in ["cpu", "gpu"]
-#     for nobs in Int.([1e5, 1e6, 1e7])
-#         for nfeats in [10, 100]
-#             for max_depth in [6, 11]
-
-df = DataFrame()
-device = "gpu"
-for device in [device]
-    for nobs in Int.([1e5, 1e6, 1e7])
-        for nfeats in [10, 100]
-            for max_depth in [6, 11]
-
-                # nobs = Int(1e6)
-                # nfeats = Int(100)
-                # max_depth = 6
-                max_nrounds = 200
-                tree_type = "binary"
-                T = Float64
-                nthreads = Base.Threads.nthreads()
+### gpu-hist
+# desktop | 1e6 | depth 11 | cpu: Xs gpu: Xs  | xgboost cpu: Xs
+# desktop | 10e6 | depth 11 | cpu Xs gpu: Xs | xgboost cpu: Xs
+
+run_evo = true
+run_xgb = true
+max_nrounds = 200
+tree_type = "binary"
+T = Float64
+nthreads = Base.Threads.nthreads()
+
+device_list = ["cpu", "gpu"]
+# device_list = ["gpu"]
+
+nobs_list = Int.([1e5, 1e6, 1e7])
+# nobs_list = Int.([1e4, 1e5])
+
+nfeats_list = [10, 100]
+# nfeats_list = [10]
+
+max_depth_list = [6, 11]
+# max_depth_list = [6]
+
+
+for device in device_list
+    df = DataFrame()
+    for nobs in nobs_list
+        for nfeats in nfeats_list
+            for max_depth in max_depth_list
+
+                _df = DataFrame(
+                    :device => device,
+                    :nobs => nobs,
+                    :nfeats => nfeats,
+                    :max_depth => max_depth)
+
                 @info "device: $device | nobs: $nobs | nfeats: $nfeats | max_depth : $max_depth | nthreads: $nthreads | tree_type : $tree_type"
                 seed!(123)
                 x_train = rand(T, nobs, nfeats)
@@ -59,104 +71,73 @@ for device in [device]
                 end
                 tree_method = device == "gpu" ? "gpu_hist" : "hist"
 
-                @info "XGBoost"
-                params_xgb = Dict(
-                    :num_round => max_nrounds,
-                    :max_depth => max_depth - 1,
-                    :eta => 0.05,
-                    :objective => loss_xgb,
-                    :print_every_n => 5,
-                    :subsample => 0.5,
-                    :colsample_bytree => 0.5,
-                    :tree_method => tree_method, # hist/gpu_hist
-                    :max_bin => 64,
-                )
-
-                @info "train"
-                dtrain = DMatrix(x_train, y_train)
-                watchlist = Dict("train" => DMatrix(x_train, y_train))
-                # m_xgb = xgboost(dtrain; watchlist, nthread=nthreads, verbosity=0, eval_metric=metric_xgb, params_xgb...)
-                t_train_xgb = @elapsed m_xgb = xgboost(dtrain; watchlist, nthread=nthreads, verbosity=0, eval_metric=metric_xgb, params_xgb...)
-                # @btime m_xgb = xgboost($dtrain; watchlist, nthread=nthreads, verbosity=0, eval_metric = metric_xgb, params_xgb...);
-                @info "predict"
-                # pred_xgb = XGBoost.predict(m_xgb, x_train)
-                # t_infer_xgb = @elapsed pred_xgb = XGBoost.predict(m_xgb, x_train)
-                t_infer_xgb = missing
-                # @btime XGBoost.predict($m_xgb, $x_train);
-
-                # @info "lightgbm train:"
-                # m_gbm = LGBMRegression(
-                #     objective = "regression",
-                #     boosting = "gbdt",
-                #     num_iterations = 200,
-                #     learning_rate = 0.05,
-                #     num_leaves = 256,
-                #     max_depth = 5,
-                #     tree_learner = "serial",
-                #     num_threads = Sys.CPU_THREADS,
-                #     histogram_pool_size = -1.,
-                #     min_data_in_leaf = 1,
-                #     min_sum_hessian_in_leaf = 0,
-                #     max_delta_step = 0,
-                #     min_gain_to_split = 0,
-                #     feature_fraction = 0.5,
-                #     feature_fraction_seed = 2,
-                #     bagging_fraction = 0.5,
-                #     bagging_freq = 1,
-                #     bagging_seed = 3,
-                #     max_bin = 64,
-                #     bin_construct_sample_cnt = 200000,
-                #     data_random_seed = 1,
-                #     is_sparse = false,
-                #     min_data_per_group = 1,
-                #     metric = ["mae"],
-                #     metric_freq = 10,
-                #     # early_stopping_round = 10,
-                # )
-                # @time gbm_results = fit!(m_gbm, x_train, y_train, (x_train, y_train))
-                # @time pred_gbm = LightGBM.predict(m_gbm, x_train) |> vec
-
-                @info "EvoTrees"
-                verbosity = 1
-
-                params_evo = EvoTreeRegressor(;
-                    loss=loss_evo,
-                    nrounds=max_nrounds,
-                    alpha=0.5,
-                    lambda=0.0,
-                    gamma=0.0,
-                    eta=0.05,
-                    max_depth=max_depth,
-                    min_weight=1.0,
-                    rowsample=0.5,
-                    colsample=0.5,
-                    nbins=64,
-                    tree_type,
-                    rng=123
-                )
-
-                @info "train - eval"
-                @time m_evo = fit_evotree(params_evo; x_train, y_train, x_eval=x_train, y_eval=y_train, metric=metric_evo, device, verbosity, print_every_n=100);
-                t_train_evo = @elapsed m_evo = fit_evotree(params_evo; x_train, y_train, x_eval=x_train, y_eval=y_train, metric=metric_evo, device, verbosity, print_every_n=100);
-
-                @info "predict"
-                @time pred_evo = m_evo(x_train; device)
-                t_infer_evo = @elapsed pred_evo = m_evo(x_train; device)
+                if run_evo
+                    @info "EvoTrees"
+                    verbosity = 1
+
+                    params_evo = EvoTreeRegressor(;
+                        loss=loss_evo,
+                        nrounds=max_nrounds,
+                        alpha=0.5,
+                        lambda=0.0,
+                        gamma=0.0,
+                        eta=0.05,
+                        max_depth=max_depth,
+                        min_weight=1.0,
+                        rowsample=0.5,
+                        colsample=0.5,
+                        nbins=64,
+                        tree_type,
+                        rng=123
+                    )
+
+                    @info "train - eval"
+                    @time m_evo = fit_evotree(params_evo; x_train, y_train, x_eval=x_train, y_eval=y_train, metric=metric_evo, device, verbosity, print_every_n=100)
+                    t_train_evo = @elapsed m_evo = fit_evotree(params_evo; x_train, y_train, x_eval=x_train, y_eval=y_train, metric=metric_evo, device, verbosity, print_every_n=100)
+
+                    @info "predict"
+                    @time pred_evo = m_evo(x_train; device)
+                    t_infer_evo = @elapsed pred_evo = m_evo(x_train; device)
+
+                    _df = hcat(_df, DataFrame(
+                        :train_evo => t_train_evo,
+                        :infer_evo => t_infer_evo)
+                    )
+                end
 
-                _df = DataFrame(
-                    :device => device,
-                    :nobs => nobs,
-                    :nfeats => nfeats,
-                    :max_depth => max_depth,
-                    :train_evo => t_train_evo,
-                    :train_xgb => t_train_xgb,
-                    :infer_evo => t_infer_evo,
-                    :infer_xgb => t_infer_xgb)
+                if run_xgb
+                    @info "XGBoost"
+                    params_xgb = Dict(
+                        :num_round => max_nrounds,
+                        :max_depth => max_depth - 1,
+                        :eta => 0.05,
+                        :objective => loss_xgb,
+                        :print_every_n => 5,
+                        :subsample => 0.5,
+                        :colsample_bytree => 0.5,
+                        :tree_method => tree_method, # hist/gpu_hist
+                        :max_bin => 64,
+                    )
+
+                    @info "train"
+                    dtrain = DMatrix(x_train, y_train)
+                    watchlist = Dict("train" => DMatrix(x_train, y_train))
+                    m_xgb = xgboost(dtrain; watchlist, nthread=nthreads, verbosity=0, eval_metric=metric_xgb, params_xgb...)
+                    t_train_xgb = @elapsed m_xgb = xgboost(dtrain; watchlist, nthread=nthreads, verbosity=0, eval_metric=metric_xgb, params_xgb...)
+                    @info "predict"
+                    pred_xgb = XGBoost.predict(m_xgb, x_train)
+                    t_infer_xgb = @elapsed pred_xgb = XGBoost.predict(m_xgb, x_train)
+
+                    _df = hcat(_df, DataFrame(
+                        :train_xgb => t_train_xgb,
+                        :infer_xgb => t_infer_xgb)
+                    )
+                end
                 append!(df, _df)
             end
         end
     end
+    select!(df, Cols(:device, :nobs, :nfeats, :max_depth, r"train_", r"infer_"))
+    path = joinpath(@__DIR__, "results", "regressor-$device.csv")
+    CSV.write(path, df)
 end
-
-path = joinpath(@__DIR__, "regressor-$device.csv")
-CSV.write(path, df)
diff --git a/benchmarks/results/.gitkeep b/benchmarks/results/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/benchmarks/results/regressor-cpu.csv b/benchmarks/results/regressor-cpu.csv
new file mode 100644
index 0000000..331df2a
--- /dev/null
+++ b/benchmarks/results/regressor-cpu.csv
@@ -0,0 +1,13 @@
+device,nobs,nfeats,max_depth,train_evo,train_xgb,infer_evo,infer_xgb
+cpu,100000,10,6,0.472619528,0.751935788,0.038590839,0.043529276
+cpu,100000,10,11,5.489825366,1.128379095,0.082319508,0.088952176
+cpu,100000,100,6,0.988079622,1.518898589,0.099269867,0.169307562
+cpu,100000,100,11,17.393989357,3.653523911,0.099832957,0.191934632
+cpu,1000000,10,6,2.221653865,6.872771486,0.262242186,0.305962199
+cpu,1000000,10,11,10.968204915,8.351607907,0.641649869,0.635454124
+cpu,1000000,100,6,6.344018332,14.065929477,0.658609287,1.706589588
+cpu,1000000,100,11,29.885696445,19.298177012,1.198121937,2.072346858
+cpu,10000000,10,6,26.61495224,87.244083827,2.711196986,3.1358349
+cpu,10000000,10,11,60.957899261,115.66527993,6.336312816,6.502272966
+cpu,10000000,100,6,72.995730549,143.385259272,5.893038023,16.293621025
+cpu,10000000,100,11,198.913320569,190.394824939,12.254412707,20.239121901
diff --git a/benchmarks/results/regressor-gpu.csv b/benchmarks/results/regressor-gpu.csv
new file mode 100644
index 0000000..bc2c650
--- /dev/null
+++ b/benchmarks/results/regressor-gpu.csv
@@ -0,0 +1,13 @@
+device,nobs,nfeats,max_depth,train_evo,train_xgb,infer_evo,infer_xgb
+gpu,100000,10,6,0.862532855,0.318666888,0.04513749,0.013406431
+gpu,100000,10,11,13.501736503,1.427816958,0.082528616,0.024396364
+gpu,100000,100,6,1.48735084,0.60662158,0.067809115,0.184014945
+gpu,100000,100,11,27.945221285,3.465269667,0.106064917,0.192947498
+gpu,1000000,10,6,1.559561907,1.023686005,0.264465313,0.176634169
+gpu,1000000,10,11,19.957096339,2.947736318,0.642042855,0.237144095
+gpu,1000000,100,6,3.135773796,2.845227668,0.581958139,1.796915351
+gpu,1000000,100,11,38.017932344,8.1506029,1.144244461,2.073349481
+gpu,10000000,10,6,8.046695215,7.242176552,2.890508713,1.886053569
+gpu,10000000,10,11,35.250304318,12.7092334,6.110619095,2.145757661
+gpu,10000000,100,6,20.482723867,25.419202052,5.917664697,17.553081322
+gpu,10000000,100,11,68.048511202,48.709503258,12.087723485,20.207581819