Skip to content

Commit

Permalink
Full clean up of ACAT branch, pep8 formatting, deleted unused code
Browse files Browse the repository at this point in the history
  • Loading branch information
jgpavez committed Mar 19, 2016
1 parent abfc488 commit 31fd884
Show file tree
Hide file tree
Showing 10 changed files with 2,422 additions and 2,853 deletions.
421 changes: 164 additions & 257 deletions DecomposingTest_10D.py

Large diffs are not rendered by default.

177 changes: 97 additions & 80 deletions DecomposingTest_1D.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,16 @@
#!/usr/bin/env python
'''
This python script can be used to reproduce the results on 1D distributions
on the article Experiments using machine learning to approximate likelihood ratios
for mixture models (ACAT 2016).
This python script can be used to reproduce the results on 1D distributions
on the article Experiments using machine learning to approximate likelihood ratios
for mixture models (ACAT 2016).
'''

__author__ = "Pavez J. <[email protected]>"



import ROOT
import numpy as np
from sklearn import svm, linear_model
from sklearn.externals import joblib
from sklearn.metrics import roc_curve, auc
from sklearn.ensemble import GradientBoostingClassifier

import sys
Expand All @@ -23,9 +20,9 @@
from mlp import MLPTrainer

from make_data import makeData, makeModelND, makeModelPrivateND,\
makeModel
makeModel
from utils import printMultiFrame, printFrame, saveFig, loadData,\
makeROC, makeSigBkg, makePlotName
makeROC, makeSigBkg, makePlotName

from train_classifiers import trainClassifiers, predict
from decomposed_test import DecomposedTest
Expand All @@ -34,75 +31,95 @@


if __name__ == '__main__':
# Setting the classifier to use
model_g = None
classifiers = {'svc':svm.NuSVC(probability=True),'svr':svm.NuSVR(),
'logistic': linear_model.LogisticRegression(),
'bdt':GradientBoostingClassifier(n_estimators=100, learning_rate=1.0,
max_depth=5, random_state=0),
'mlp':MLPTrainer(n_hidden=4, L2_reg=0),
'xgboost': XGBoostClassifier(num_class=2, nthread=4, silent=1,
num_boost_round=100, eta=0.5, max_depth=4)}
clf = None
if (len(sys.argv) > 1):
model_g = sys.argv[1]
clf = classifiers.get(sys.argv[1])
if clf == None:
model_g = 'logistic'
clf = classifiers['logistic']
print 'Not found classifier, Using logistic instead'

# parameters of the mixture model
c0 = np.array([.0,.3, .7])
c1 = np.array([.1,.3, .7])
c1_g = ''

c0 = c0/c0.sum()
c1[0] = sys.argv[2]
if c1[0] < 0.01:
c1_g = "%.3f"%c1[0]
else:
c1_g = "%.2f"%c1[0]
c1[0] = (c1[0]*(c1[1]+c1[2]))/(1.-c1[0])
c1 = c1 / c1.sum()

verbose_printing = True
dir = '.'
workspace_file = 'workspace_DecomposingTestOfMixtureModelsClassifiers.root'

# features
vars_g = ['x']

ROOT.gROOT.SetBatch(ROOT.kTRUE)
ROOT.RooAbsPdf.defaultIntegratorConfig().setEpsRel(1E-15)

# Set this value to False if only final plots are needed
verbose_printing = True

if (len(sys.argv) > 3):
print 'Setting seed: {0} '.format(sys.argv[3])
ROOT.RooRandom.randomGenerator().SetSeed(int(sys.argv[3]))
np.random.seed(int(sys.argv[3]))

# Create models to sample from
makeModel(c0=c0,c1=c1,workspace=workspace_file,dir=dir,verbose_printing=
verbose_printing)

# make sintetic data to train the classifiers
makeData(vars_g=vars_g,c0=c0,c1=c1,num_train=100000,num_test=50000,
workspace=workspace_file,dir=dir, c1_g=c1_g, model_g='mlp')

# train the pairwise classifiers
trainClassifiers(clf,3,dir=dir, model_g=model_g,
c1_g=c1_g ,model_file='adaptive')

# class which implement the decomposed method
test = DecomposedTest(c0,c1,dir=dir,c1_g=c1_g,model_g=model_g,
input_workspace=workspace_file, verbose_printing = verbose_printing,
dataset_names=['0','1','2'],clf=clf if model_g=='mlp' else None)

test.fit(data_file='test',importance_sampling=False, true_dist=True,vars_g=vars_g)
test.computeRatios(true_dist=True,vars_g=vars_g,use_log=False)



# Setting the classifier to use
model_g = None
classifiers = {
'svc': svm.NuSVC(
probability=True),
'svr': svm.NuSVR(),
'logistic': linear_model.LogisticRegression(),
'bdt': GradientBoostingClassifier(
n_estimators=100,
learning_rate=1.0,
max_depth=5,
random_state=0),
'mlp': MLPTrainer(
n_hidden=4,
L2_reg=0),
'xgboost': XGBoostClassifier(
num_class=2,
nthread=4,
silent=1,
num_boost_round=100,
eta=0.5,
max_depth=4)}
clf = None
if (len(sys.argv) > 1):
model_g = sys.argv[1]
clf = classifiers.get(sys.argv[1])
if clf is None:
model_g = 'logistic'
clf = classifiers['logistic']
print 'Not found classifier, Using logistic instead'

# parameters of the mixture model
c0 = np.array([.0, .3, .7])
c1 = np.array([.1, .3, .7])
c1_g = ''

c0 = c0 / c0.sum()
c1[0] = sys.argv[2]
if c1[0] < 0.01:
c1_g = "%.3f" % c1[0]
else:
c1_g = "%.2f" % c1[0]
c1[0] = (c1[0] * (c1[1] + c1[2])) / (1. - c1[0])
c1 = c1 / c1.sum()

verbose_printing = True
dir = '.'
workspace_file = 'workspace_DecomposingTestOfMixtureModelsClassifiers.root'

# features
vars_g = ['x']

ROOT.gROOT.SetBatch(ROOT.kTRUE)
ROOT.RooAbsPdf.defaultIntegratorConfig().setEpsRel(1E-15)

# Set this value to False if only final plots are needed
verbose_printing = True

if (len(sys.argv) > 3):
print 'Setting seed: {0} '.format(sys.argv[3])
ROOT.RooRandom.randomGenerator().SetSeed(int(sys.argv[3]))
np.random.seed(int(sys.argv[3]))

# Create models to sample from
makeModel(c0=c0,c1=c1,workspace=workspace_file,dir=dir,verbose_printing=
verbose_printing)

# make sintetic data to train the classifiers
makeData(vars_g=vars_g,c0=c0,c1=c1,num_train=100000,num_test=50000,
workspace=workspace_file,dir=dir, c1_g=c1_g, model_g='mlp')

# train the pairwise classifiers
trainClassifiers(clf,3,dir=dir, model_g=model_g,
c1_g=c1_g ,model_file='adaptive')

# class which implement the decomposed method
test = DecomposedTest(
c0,
c1,
dir=dir,
c1_g=c1_g,
model_g=model_g,
input_workspace=workspace_file,
verbose_printing=verbose_printing,
dataset_names=[
'0',
'1',
'2'],
clf=clf if model_g == 'mlp' else None)

test.fit(data_file='test',true_dist=True,vars_g=vars_g)
test.computeRatios(true_dist=True, vars_g=vars_g, use_log=False)
Loading

0 comments on commit 31fd884

Please sign in to comment.