Skip to content

Commit

Permalink
new benchmarks
Browse files Browse the repository at this point in the history
  • Loading branch information
jeremiedb committed Nov 20, 2023
1 parent 8e3c47a commit 8c452b6
Show file tree
Hide file tree
Showing 3 changed files with 166 additions and 129 deletions.
13 changes: 13 additions & 0 deletions benchmarks/regressor-cpu.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
device,nobs,nfeats,max_depth,train_evo,train_xgb,infer_evo,infer_xgb
cpu,100000,10,6,0.571302287,0.931940322,0.045015373,0.06524121
cpu,100000,10,11,6.11657148,1.161374098,0.084641913,0.088485924
cpu,100000,100,6,0.961452175,1.592773516,0.115538193,0.17662686
cpu,100000,100,11,17.849992393,3.853003881,0.121810382,0.263126441
cpu,1000000,10,6,2.383909891,6.795574553,0.264884995,0.361242933
cpu,1000000,10,11,11.161634211,9.32421656,0.794897806,0.714718326
cpu,1000000,100,6,6.020354898,15.24576219,0.776566288,1.873882103
cpu,1000000,100,11,30.228386878,19.789690573,1.154914415,2.376135847
cpu,10000000,10,6,27.248707143,88.515808528,2.812803051,3.182972925
cpu,10000000,10,11,62.273960418,116.989930049,6.267709913,6.473606844
cpu,10000000,100,6,71.912413469,150.482093149,6.636390315,16.507357233
cpu,10000000,100,11,196.855192543,194.620105633,12.155773288,24.468722938
13 changes: 13 additions & 0 deletions benchmarks/regressor-gpu.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
device,nobs,nfeats,max_depth,train_evo,train_xgb,infer_evo,infer_xgb
gpu,100000,10,6,1.009510498,0.653007059,0.044266191,
gpu,100000,10,11,16.381585168,1.498272035,0.083979915,
gpu,100000,100,6,1.686124929,0.697764277,0.075100306,
gpu,100000,100,11,31.022717789,3.686352557,0.111697012,
gpu,1000000,10,6,1.715696068,1.146328262,0.362810704,
gpu,1000000,10,11,24.533893695,3.147383026,0.796694699,
gpu,1000000,100,6,3.450828332,3.24444115,0.633558641,
gpu,1000000,100,11,43.326096264,8.691848273,1.184580648,
gpu,10000000,10,6,7.573575617,8.384329387,3.870231138,
gpu,10000000,10,11,38.915609007,12.824271885,6.358167029,
gpu,10000000,100,6,20.566461348,26.477476765,6.024578283,
gpu,10000000,100,11,71.042451127,55.537101359,12.329862253,
269 changes: 140 additions & 129 deletions benchmarks/regressor.jl
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
using Revise
using DataFrames
using CSV
using Statistics
using StatsBase: sample
using XGBoost
# using LightGBM
using EvoTrees
using BenchmarkTools
using Random: seed!
# import CUDA
import CUDA

### v.0.15.1
# desktop | 1e6 | depth 11 | cpu: 37.2s
Expand All @@ -19,133 +21,142 @@ using Random: seed!
#threads
# laptop depth 6: 12.717845 seconds (2.08 M allocations: 466.228 MiB)

nobs = Int(1e6)
num_feat = Int(100)
nrounds = 200
max_depth = 6
tree_type = "binary"
T = Float64
nthread = Base.Threads.nthreads()
@info "testing with: $nobs observations | $num_feat features. nthread: $nthread | tree_type : $tree_type | max_depth : $max_depth"
seed!(123)
x_train = rand(T, nobs, num_feat)
y_train = rand(T, size(x_train, 1))

@info nthread
loss = "mse"
if loss == "mse"
loss_xgb = "reg:squarederror"
metric_xgb = "mae"
loss_evo = :mse
metric_evo = :mae
elseif loss == "logloss"
loss_xgb = "reg:logistic"
metric_xgb = "logloss"
loss_evo = :logloss
metric_evo = :logloss
end
# for device in ["cpu", "gpu"]
# for nobs in Int.([1e5, 1e6, 1e7])
# for nfeats in [10, 100]
# for max_depth in [6, 11]

@info "XGBoost"
@info "train"
params_xgb = Dict(
:num_round => nrounds,
:max_depth => max_depth - 1,
:eta => 0.05,
:objective => loss_xgb,
:print_every_n => 5,
:subsample => 0.5,
:colsample_bytree => 0.5,
:tree_method => "hist", # hist/gpu_hist
:max_bin => 64,
)

dtrain = DMatrix(x_train, y_train)
watchlist = Dict("train" => DMatrix(x_train, y_train));
@time m_xgb = xgboost(dtrain; watchlist, nthread=nthread, verbosity=0, eval_metric=metric_xgb, params_xgb...);
# @btime m_xgb = xgboost($dtrain; watchlist, nthread=nthread, verbosity=0, eval_metric = metric_xgb, params_xgb...);
@info "predict"
@time pred_xgb = XGBoost.predict(m_xgb, x_train);
# @btime XGBoost.predict($m_xgb, $x_train);

# @info "lightgbm train:"
# m_gbm = LGBMRegression(
# objective = "regression",
# boosting = "gbdt",
# num_iterations = 200,
# learning_rate = 0.05,
# num_leaves = 256,
# max_depth = 5,
# tree_learner = "serial",
# num_threads = Sys.CPU_THREADS,
# histogram_pool_size = -1.,
# min_data_in_leaf = 1,
# min_sum_hessian_in_leaf = 0,
# max_delta_step = 0,
# min_gain_to_split = 0,
# feature_fraction = 0.5,
# feature_fraction_seed = 2,
# bagging_fraction = 0.5,
# bagging_freq = 1,
# bagging_seed = 3,
# max_bin = 64,
# bin_construct_sample_cnt = 200000,
# data_random_seed = 1,
# is_sparse = false,
# feature_pre_filter = false,
# is_unbalance = false,
# min_data_per_group = 1,
# metric = ["mae"],
# metric_freq = 10,
# # early_stopping_round = 10,
# )
# @time gbm_results = fit!(m_gbm, x_train, y_train, (x_train, y_train))
# @time pred_gbm = LightGBM.predict(m_gbm, x_train) |> vec

@info "EvoTrees"
verbosity = 1
params_evo = EvoTreeRegressor(;
loss=loss_evo,
nrounds=nrounds,
alpha=0.5,
lambda=0.0,
gamma=0.0,
eta=0.05,
max_depth=max_depth,
min_weight=1.0,
rowsample=0.5,
colsample=0.5,
nbins=64,
tree_type,
rng=123
)

@info "EvoTrees CPU"
device = "cpu"
# @info "init"
# @time m, cache = EvoTrees.init(params_evo, x_train, y_train);
# @time m, cache = EvoTrees.init(params_evo, x_train, y_train);
# @info "train - no eval"
# @time m_evo = fit_evotree(params_evo; x_train, y_train, device, verbosity, print_every_n=100);
# @time m_evo = fit_evotree(params_evo; x_train, y_train, device, verbosity, print_every_n=100);
@info "train - eval"
@time m_evo = fit_evotree(params_evo; x_train, y_train, x_eval=x_train, y_eval=y_train, metric=metric_evo, device, verbosity, print_every_n=100);
@time m_evo = fit_evotree(params_evo; x_train, y_train, x_eval=x_train, y_eval=y_train, metric=metric_evo, device, verbosity, print_every_n=100);
@info "predict"
@time pred_evo = m_evo(x_train);
@time pred_evo = m_evo(x_train);
# @btime m_evo($x_train);

@info "EvoTrees GPU"
df = DataFrame()
device = "gpu"
# @info "train - no eval"
# CUDA.@time m_evo = fit_evotree(params_evo; x_train, y_train, device, verbosity, print_every_n=100);
# CUDA.@time m_evo = fit_evotree(params_evo; x_train, y_train, device, verbosity, print_every_n=100);
@info "train - eval"
CUDA.@time m_evo = fit_evotree(params_evo; x_train, y_train, x_eval=x_train, y_eval=y_train, metric=metric_evo, device, verbosity, print_every_n=100);
CUDA.@time m_evo = fit_evotree(params_evo; x_train, y_train, x_eval=x_train, y_eval=y_train, metric=metric_evo, device, verbosity, print_every_n=100);
# @time m_evo = fit_evotree(params_evo; x_train, y_train);
# @btime fit_evotree($params_evo; x_train=$x_train, y_train=$y_train, x_eval=$x_train, y_eval=$y_train, metric=metric_evo, device, verbosity);
@info "predict"
CUDA.@time pred_evo = m_evo(x_train; device);
CUDA.@time pred_evo = m_evo(x_train; device);
# @btime m_evo($x_train; device);
for device in [device]
for nobs in Int.([1e5, 1e6, 1e7])
for nfeats in [10, 100]
for max_depth in [6, 11]

# nobs = Int(1e6)
# nfeats = Int(100)
# max_depth = 6
max_nrounds = 200
tree_type = "binary"
T = Float64
nthreads = Base.Threads.nthreads()
@info "device: $device | nobs: $nobs | nfeats: $nfeats | max_depth : $max_depth | nthreads: $nthreads | tree_type : $tree_type"
seed!(123)
x_train = rand(T, nobs, nfeats)
y_train = rand(T, size(x_train, 1))

loss = "mse"
if loss == "mse"
loss_xgb = "reg:squarederror"
metric_xgb = "mae"
loss_evo = :mse
metric_evo = :mae
elseif loss == "logloss"
loss_xgb = "reg:logistic"
metric_xgb = "logloss"
loss_evo = :logloss
metric_evo = :logloss
end
tree_method = device == "gpu" ? "gpu_hist" : "hist"

@info "XGBoost"
params_xgb = Dict(
:num_round => max_nrounds,
:max_depth => max_depth - 1,
:eta => 0.05,
:objective => loss_xgb,
:print_every_n => 5,
:subsample => 0.5,
:colsample_bytree => 0.5,
:tree_method => tree_method, # hist/gpu_hist
:max_bin => 64,
)

@info "train"
dtrain = DMatrix(x_train, y_train)
watchlist = Dict("train" => DMatrix(x_train, y_train))
# m_xgb = xgboost(dtrain; watchlist, nthread=nthreads, verbosity=0, eval_metric=metric_xgb, params_xgb...)
t_train_xgb = @elapsed m_xgb = xgboost(dtrain; watchlist, nthread=nthreads, verbosity=0, eval_metric=metric_xgb, params_xgb...)
# @btime m_xgb = xgboost($dtrain; watchlist, nthread=nthreads, verbosity=0, eval_metric = metric_xgb, params_xgb...);
@info "predict"
# pred_xgb = XGBoost.predict(m_xgb, x_train)
# t_infer_xgb = @elapsed pred_xgb = XGBoost.predict(m_xgb, x_train)
t_infer_xgb = missing
# @btime XGBoost.predict($m_xgb, $x_train);

# @info "lightgbm train:"
# m_gbm = LGBMRegression(
# objective = "regression",
# boosting = "gbdt",
# num_iterations = 200,
# learning_rate = 0.05,
# num_leaves = 256,
# max_depth = 5,
# tree_learner = "serial",
# num_threads = Sys.CPU_THREADS,
# histogram_pool_size = -1.,
# min_data_in_leaf = 1,
# min_sum_hessian_in_leaf = 0,
# max_delta_step = 0,
# min_gain_to_split = 0,
# feature_fraction = 0.5,
# feature_fraction_seed = 2,
# bagging_fraction = 0.5,
# bagging_freq = 1,
# bagging_seed = 3,
# max_bin = 64,
# bin_construct_sample_cnt = 200000,
# data_random_seed = 1,
# is_sparse = false,
# min_data_per_group = 1,
# metric = ["mae"],
# metric_freq = 10,
# # early_stopping_round = 10,
# )
# @time gbm_results = fit!(m_gbm, x_train, y_train, (x_train, y_train))
# @time pred_gbm = LightGBM.predict(m_gbm, x_train) |> vec

@info "EvoTrees"
verbosity = 1

params_evo = EvoTreeRegressor(;
loss=loss_evo,
nrounds=max_nrounds,
alpha=0.5,
lambda=0.0,
gamma=0.0,
eta=0.05,
max_depth=max_depth,
min_weight=1.0,
rowsample=0.5,
colsample=0.5,
nbins=64,
tree_type,
rng=123
)

@info "train - eval"
@time m_evo = fit_evotree(params_evo; x_train, y_train, x_eval=x_train, y_eval=y_train, metric=metric_evo, device, verbosity, print_every_n=100);
t_train_evo = @elapsed m_evo = fit_evotree(params_evo; x_train, y_train, x_eval=x_train, y_eval=y_train, metric=metric_evo, device, verbosity, print_every_n=100);

@info "predict"
@time pred_evo = m_evo(x_train; device)
t_infer_evo = @elapsed pred_evo = m_evo(x_train; device)

_df = DataFrame(
:device => device,
:nobs => nobs,
:nfeats => nfeats,
:max_depth => max_depth,
:train_evo => t_train_evo,
:train_xgb => t_train_xgb,
:infer_evo => t_infer_evo,
:infer_xgb => t_infer_xgb)
append!(df, _df)
end
end
end
end

path = joinpath(@__DIR__, "regressor-$device.csv")
CSV.write(path, df)

0 comments on commit 8c452b6

Please sign in to comment.