Skip to content

Commit

Permalink
Added a max_samples parameter to grid search for benchmarking purpose…
Browse files Browse the repository at this point in the history
…s, adjusted the grid search script respectively
  • Loading branch information
AbhinavBehal committed Jun 2, 2019
1 parent efbc966 commit fc44944
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 13 deletions.
15 changes: 10 additions & 5 deletions benchmarking/grid_search.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,17 @@ temp_file="${current_folder}/.temp"

mkdir -p "${current_folder}/results" && touch ${results_file}

python "${current_folder}/../main.py" -a grid -p '{"cv": 3}' > ${temp_file}
echo "Max_Samples,Time,Score,Configs_Evaluated" > ${results_file}

score=`grep -oP "(?<=Best score: ).+" ${temp_file}`
time=`grep -oP "(?<=Took: )[\\d\\.]+" ${temp_file}`
for i in 1 2 3 4 5; do
echo ${i}
python -u "${current_folder}/../main.py" -a grid -p "{\"max_samples\": ${i}, \"cv\": 3}" | tee ${temp_file}

echo "Time,Score" > ${results_file}
echo "${time},${score}" >> ${results_file}
score=`grep -oP "(?<=Best score: ).+" ${temp_file}`
time=`grep -oP "(?<=Took: )[\\d\\.]+" ${temp_file}`
configs=`grep -oP "\\d+(?= candidates)" ${temp_file}`

echo "${i},${time},${score},${configs}" >> ${results_file}
done

rm -f ${temp_file} &> /dev/null
25 changes: 21 additions & 4 deletions tuning/grid_search.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,36 @@
import numpy as np
import xgboost as xgb
from sklearn.model_selection import GridSearchCV

from tuning import util


def run(data, cv):
def run(data, max_samples, cv):
search = GridSearchCV(
estimator=xgb.XGBClassifier(n_jobs=-1),
param_grid=util.param_grid,
estimator=xgb.XGBClassifier(n_jobs=-1, n_estimators=10),
param_grid=_generate_grid(max_samples),
scoring='roc_auc',
n_jobs=-1,
verbose=2,
verbose=1,
cv=cv
)

search.fit(data['X'], data['y'])

return [search.best_score_, search.best_params_]


def _generate_grid(max_samples):
generated_grid = {}

for param, dist in util.param_distributions.items():
generated_grid[param] = []
num_samples = np.random.randint(1, max_samples + 1)

if isinstance(dist, list):
generated_grid[param].extend(np.random.choice(dist, size=num_samples))
else:
for i in range(num_samples):
generated_grid[param].append(dist.rvs())

return generated_grid
2 changes: 1 addition & 1 deletion tuning/random_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

def run(data, n_iter, cv):
search = RandomizedSearchCV(
estimator=xgb.XGBClassifier(n_jobs=-1),
estimator=xgb.XGBClassifier(n_jobs=-1, n_estimators=10),
param_distributions=util.param_distributions,
scoring='roc_auc',
n_iter=n_iter,
Expand Down
5 changes: 2 additions & 3 deletions tuning/util.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import random

import numpy as np
from scipy.stats import randint, uniform

param_distributions = {
Expand Down Expand Up @@ -28,7 +27,7 @@ def get_random_params():

for param, dist in param_distributions.items():
if isinstance(dist, list):
generated_config[param] = random.choice(dist)
generated_config[param] = np.random.choice(dist)
else:
generated_config[param] = dist.rvs()

Expand Down

0 comments on commit fc44944

Please sign in to comment.